diff --git a/src/acl/RegexData.cc b/src/acl/RegexData.cc index 01a4c12..b5c1679 100644 --- a/src/acl/RegexData.cc +++ b/src/acl/RegexData.cc @@ -22,6 +22,7 @@ #include "ConfigParser.h" #include "Debug.h" #include "sbuf/List.h" +#include "sbuf/Algorithms.h" ACLRegexData::~ACLRegexData() { @@ -129,6 +130,18 @@ compileRE(std::list &curlist, const char * RE, int flags) return true; } +static bool +compileRE(std::list &curlist, const SBufList &RE, int flags) +{ + if (RE.empty()) + return curlist.empty(); // XXX: old code did this. It looks wrong. + SBuf regexp; + static const SBuf openparen("("), closeparen(")"), separator(")|("); + JoinContainerIntoSBuf(regexp, RE.begin(), RE.end(), separator, openparen, + closeparen); + return compileRE(curlist, regexp.c_str(), flags); +} + /** Compose and compile one large RE from a set of (small) REs. * The ultimate goal is to have only one RE per ACL so that match() is * called only once per ACL. @@ -137,16 +150,11 @@ static int compileOptimisedREs(std::list &curlist, const SBufList &sl) { std::list newlist; - int numREs = 0; + SBufList accumulatedRE; + int numREs = 0, reSize = 0; int flags = REG_EXTENDED | REG_NOSUB; - int largeREindex = 0; - char largeRE[BUFSIZ]; - *largeRE = 0; for (const SBuf & configurationLineWord : sl) { - int RElen; - RElen = configurationLineWord.length(); - static const SBuf minus_i("-i"); static const SBuf plus_i("+i"); if (configurationLineWord == minus_i) { @@ -155,10 +163,11 @@ compileOptimisedREs(std::list &curlist, const SBufList &sl) debugs(28, 2, "optimisation of -i ... -i" ); } else { debugs(28, 2, "-i" ); - if (!compileRE(newlist, largeRE, flags)) + if (!compileRE(newlist, accumulatedRE, flags)) return 0; flags |= REG_ICASE; - largeRE[largeREindex=0] = '\0'; + accumulatedRE.clear(); + reSize = 0; } } else if (configurationLineWord == plus_i) { if ((flags & REG_ICASE) == 0) { @@ -166,37 +175,34 @@ compileOptimisedREs(std::list &curlist, const SBufList &sl) debugs(28, 2, "optimisation of +i ... +i"); } else { debugs(28, 2, "+i"); - if (!compileRE(newlist, largeRE, flags)) + if (!compileRE(newlist, accumulatedRE, flags)) return 0; flags &= ~REG_ICASE; - largeRE[largeREindex=0] = '\0'; + accumulatedRE.clear(); + reSize = 0; } - } else if (RElen + largeREindex + 3 < BUFSIZ-1) { + } else if (reSize < 1024) { debugs(28, 2, "adding RE '" << configurationLineWord << "'"); - if (largeREindex > 0) { - largeRE[largeREindex] = '|'; - ++largeREindex; - } - largeRE[largeREindex] = '('; - ++largeREindex; - configurationLineWord.copy(largeRE+largeREindex, BUFSIZ-largeREindex); - largeREindex += configurationLineWord.length(); - largeRE[largeREindex] = ')'; - ++largeREindex; - largeRE[largeREindex] = '\0'; + accumulatedRE.push_back(configurationLineWord); ++numREs; + reSize += configurationLineWord.length(); } else { debugs(28, 2, "buffer full, generating new optimised RE..." ); - if (!compileRE(newlist, largeRE, flags)) + accumulatedRE.push_back(configurationLineWord); + if (!compileRE(newlist, accumulatedRE, flags)) return 0; - largeRE[largeREindex=0] = '\0'; + accumulatedRE.clear(); + reSize = 0; continue; /* do the loop again to add the RE to largeRE */ } } - if (!compileRE(newlist, largeRE, flags)) + if (!compileRE(newlist, accumulatedRE, flags)) return 0; + accumulatedRE.clear(); + reSize = 0; + /* all was successful, so put the new list at the tail */ curlist.splice(curlist.end(), newlist); diff --git a/src/sbuf/Algorithms.h b/src/sbuf/Algorithms.h index 21ee889..338e9c0 100644 --- a/src/sbuf/Algorithms.h +++ b/src/sbuf/Algorithms.h @@ -81,6 +81,57 @@ SBufContainerJoin(const Container &items, const SBuf& separator) return rv; } +/** Join container of SBufs and append to supplied target + * + * append to the target SBuf all elements in the [begin,end) range from + * an iterable container, prefixed by prefix, separated by separator and + * followed by suffix. Prefix and suffix are added also in case of empty + * iterable + * + * \return the modified dest + */ +template +SBuf& +JoinContainerIntoSBuf(SBuf &dest, const ContainerIterator &begin, + const ContainerIterator &end, const SBuf& separator, + const SBuf& prefix = SBuf(), const SBuf& suffix = SBuf()) +{ + if (begin == end) { + dest.append(prefix).append(suffix); + return dest; + } + + // optimization: pre-calculate needed storage + const SBuf::size_type totalContainerSize = + std::accumulate(begin, end, 0, SBufAddLength(separator)) + + dest.length() + prefix.length() + suffix.length(); + SBufReservationRequirements req; + req.minSpace = totalContainerSize; + dest.reserve(req); + + auto i = begin; + dest.append(prefix); + dest.append(*i); + ++i; + for (; i != end; ++i) + dest.append(separator).append(*i); + dest.append(suffix); + return dest; +} + + +/// convenience wrapper of JoinContainerIntoSBuf with no caller-supplied SBuf +template +SBuf +JoinContainerToSBuf(const ContainerIterator &begin, + const ContainerIterator &end, const SBuf& separator, + const SBuf& prefix = SBuf(), const SBuf& suffix = SBuf()) +{ + SBuf rv; + return JoinContainerIntoSBuf(rv, begin, end, separator, prefix, suffix); +} + + namespace std { /// default hash functor to support std::unordered_map template <>