diff --git a/SOURCES/0001-Fix-incremental-parsing-of-chunked-quoted-extensions.patch b/SOURCES/0001-Fix-incremental-parsing-of-chunked-quoted-extensions.patch new file mode 100644 index 0000000..7a65628 --- /dev/null +++ b/SOURCES/0001-Fix-incremental-parsing-of-chunked-quoted-extensions.patch @@ -0,0 +1,1190 @@ +From 96d95b036e28c863c810b334f17d0ec619bf421c Mon Sep 17 00:00:00 2001 +From: Eduard Bagdasaryan +Date: Sun, 5 Nov 2023 11:20:35 +0000 +Subject: [PATCH 1/2] Fix incremental parsing of chunked quoted extensions + (#310) + +Before this change, incremental parsing of quoted chunked extensions +was broken for two reasons: + +* Http::One::Parser::skipLineTerminator() unexpectedly threw after + partially received quoted chunk extension value. + +* When Http::One::Tokenizer was unable to parse a quoted extension, + it incorrectly restored the input buffer to the beginning of the + extension value (instead of the extension itself), thus making + further incremental parsing iterations impossible. + +IMO, the reason for this problem was that Http::One::Tokenizer::qdText() +could not distinguish two cases (returning false in both): + +* the end of the quoted string not yet reached + +* an input error, e.g., wrong/unexpected character + +A possible approach could be to improve Http::One::Tokenizer, making it +aware about "needs more data" state. However, to be acceptable, +these improvements should be done in the base Parser::Tokenizer +class instead. These changes seem to be non-trivial and could be +done separately and later. + +Another approach, used here, is to simplify the complex and error-prone +chunked extensions parsing algorithm, fixing incremental parsing bugs +and still parse incrementally in almost all cases. The performance +regression could be expected only in relatively rare cases of partially +received or malformed extensions. + +Also: +* fixed parsing of partial use-original-body extension values +* do not treat an invalid use-original-body as an unknown extension +* optimization: parse use-original-body extension only in ICAP context + (i.e., where it is expected) +* improvement: added a new API to TeChunkedParser to specify known + chunked extensions list + +Modified-by: Alex Burmashev +Signed-off-by: Alex Burmashev +--- + src/adaptation/icap/ModXact.cc | 22 ++++- + src/adaptation/icap/ModXact.h | 20 +++++ + src/http/one/Parser.cc | 35 ++++---- + src/http/one/Parser.h | 10 ++- + src/http/one/RequestParser.cc | 16 ++-- + src/http/one/RequestParser.h | 8 +- + src/http/one/ResponseParser.cc | 17 ++-- + src/http/one/ResponseParser.h | 2 +- + src/http/one/TeChunkedParser.cc | 139 ++++++++++++++++++-------------- + src/http/one/TeChunkedParser.h | 41 ++++++++-- + src/http/one/Tokenizer.cc | 104 ++++++++++++------------ + src/http/one/Tokenizer.h | 89 ++++++++------------ + src/http/one/forward.h | 3 + + src/parser/BinaryTokenizer.h | 3 +- + src/parser/Makefile.am | 1 + + src/parser/Tokenizer.cc | 40 +++++++++ + src/parser/Tokenizer.h | 13 +++ + src/parser/forward.h | 22 +++++ + 18 files changed, 364 insertions(+), 221 deletions(-) + create mode 100644 src/parser/forward.h + +diff --git a/src/adaptation/icap/ModXact.cc b/src/adaptation/icap/ModXact.cc +index 2db0a68..22a87f5 100644 +--- a/src/adaptation/icap/ModXact.cc ++++ b/src/adaptation/icap/ModXact.cc +@@ -25,12 +25,13 @@ + #include "comm.h" + #include "comm/Connection.h" + #include "err_detail_type.h" +-#include "http/one/TeChunkedParser.h" + #include "HttpHeaderTools.h" + #include "HttpMsg.h" + #include "HttpReply.h" + #include "HttpRequest.h" + #include "MasterXaction.h" ++#include "parser/Tokenizer.h" ++#include "sbuf/Stream.h" + #include "SquidTime.h" + + // flow and terminology: +@@ -44,6 +45,8 @@ CBDATA_NAMESPACED_CLASS_INIT(Adaptation::Icap, ModXactLauncher); + + static const size_t TheBackupLimit = BodyPipe::MaxCapacity; + ++const SBuf Adaptation::Icap::ChunkExtensionValueParser::UseOriginalBodyName("use-original-body"); ++ + Adaptation::Icap::ModXact::State::State() + { + memset(this, 0, sizeof(*this)); +@@ -1108,6 +1111,7 @@ void Adaptation::Icap::ModXact::decideOnParsingBody() + state.parsing = State::psBody; + replyHttpBodySize = 0; + bodyParser = new Http1::TeChunkedParser; ++ bodyParser->parseExtensionValuesWith(&extensionParser); + makeAdaptedBodyPipe("adapted response from the ICAP server"); + Must(state.sending == State::sendingAdapted); + } else { +@@ -1142,9 +1146,8 @@ void Adaptation::Icap::ModXact::parseBody() + } + + if (parsed) { +- if (state.readyForUob && bodyParser->useOriginBody >= 0) { +- prepPartialBodyEchoing( +- static_cast(bodyParser->useOriginBody)); ++ if (state.readyForUob && extensionParser.sawUseOriginalBody()) { ++ prepPartialBodyEchoing(extensionParser.useOriginalBody()); + stopParsing(); + return; + } +@@ -2014,3 +2017,14 @@ void Adaptation::Icap::ModXactLauncher::updateHistory(bool doStart) + } + } + ++void ++Adaptation::Icap::ChunkExtensionValueParser::parse(Tokenizer &tok, const SBuf &extName) ++{ ++ if (extName == UseOriginalBodyName) { ++ useOriginalBody_ = tok.udec64("use-original-body"); ++ assert(useOriginalBody_ >= 0); ++ } else { ++ Ignore(tok, extName); ++ } ++} ++ +diff --git a/src/adaptation/icap/ModXact.h b/src/adaptation/icap/ModXact.h +index f7afa69..fb4dec0 100644 +--- a/src/adaptation/icap/ModXact.h ++++ b/src/adaptation/icap/ModXact.h +@@ -15,6 +15,7 @@ + #include "adaptation/icap/Xaction.h" + #include "BodyPipe.h" + #include "http/one/forward.h" ++#include "http/one/TeChunkedParser.h" + + /* + * ICAPModXact implements ICAP REQMOD and RESPMOD transaction using +@@ -105,6 +106,23 @@ private: + enum State { stDisabled, stWriting, stIeof, stDone } theState; + }; + ++/// handles ICAP-specific chunk extensions supported by Squid ++class ChunkExtensionValueParser: public Http1::ChunkExtensionValueParser ++{ ++public: ++ /* Http1::ChunkExtensionValueParser API */ ++ virtual void parse(Tokenizer &tok, const SBuf &extName) override; ++ ++ bool sawUseOriginalBody() const { return useOriginalBody_ >= 0; } ++ uint64_t useOriginalBody() const { assert(sawUseOriginalBody()); return static_cast(useOriginalBody_); } ++ ++private: ++ static const SBuf UseOriginalBodyName; ++ ++ /// the value of the parsed use-original-body chunk extension (or -1) ++ int64_t useOriginalBody_ = -1; ++}; ++ + class ModXact: public Xaction, public BodyProducer, public BodyConsumer + { + CBDATA_CLASS(ModXact); +@@ -270,6 +288,8 @@ private: + + int adaptHistoryId; ///< adaptation history slot reservation + ++ ChunkExtensionValueParser extensionParser; ++ + class State + { + +diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc +index 0c86733..affe0b1 100644 +--- a/src/http/one/Parser.cc ++++ b/src/http/one/Parser.cc +@@ -7,10 +7,11 @@ + */ + + #include "squid.h" ++#include "base/CharacterSet.h" + #include "Debug.h" + #include "http/one/Parser.h" +-#include "http/one/Tokenizer.h" + #include "mime_header.h" ++#include "parser/Tokenizer.h" + #include "SquidConfig.h" + + /// RFC 7230 section 2.6 - 7 magic octets +@@ -61,20 +62,19 @@ Http::One::Parser::DelimiterCharacters() + RelaxedDelimiterCharacters() : CharacterSet::SP; + } + +-bool +-Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const ++void ++Http::One::Parser::skipLineTerminator(Tokenizer &tok) const + { + if (tok.skip(Http1::CrLf())) +- return true; ++ return; + + if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF)) +- return true; ++ return; + + if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r')) +- return false; // need more data ++ throw InsufficientInput(); + + throw TexcHere("garbage instead of CRLF line terminator"); +- return false; // unreachable, but make naive compilers happy + } + + /// all characters except the LF line terminator +@@ -102,7 +102,7 @@ LineCharacters() + void + Http::One::Parser::cleanMimePrefix() + { +- Http1::Tokenizer tok(mimeHeaderBlock_); ++ Tokenizer tok(mimeHeaderBlock_); + while (tok.skipOne(RelaxedDelimiterCharacters())) { + (void)tok.skipAll(LineCharacters()); // optional line content + // LF terminator is required. +@@ -137,7 +137,7 @@ Http::One::Parser::cleanMimePrefix() + void + Http::One::Parser::unfoldMime() + { +- Http1::Tokenizer tok(mimeHeaderBlock_); ++ Tokenizer tok(mimeHeaderBlock_); + const auto szLimit = mimeHeaderBlock_.length(); + mimeHeaderBlock_.clear(); + // prevent the mime sender being able to make append() realloc/grow multiple times. +@@ -228,7 +228,7 @@ Http::One::Parser::getHostHeaderField() + debugs(25, 5, "looking for " << name); + + // while we can find more LF in the SBuf +- Http1::Tokenizer tok(mimeHeaderBlock_); ++ Tokenizer tok(mimeHeaderBlock_); + SBuf p; + + while (tok.prefix(p, LineCharacters())) { +@@ -250,7 +250,7 @@ Http::One::Parser::getHostHeaderField() + p.consume(namelen + 1); + + // TODO: optimize SBuf::trim to take CharacterSet directly +- Http1::Tokenizer t(p); ++ Tokenizer t(p); + t.skipAll(CharacterSet::WSP); + p = t.remaining(); + +@@ -278,10 +278,15 @@ Http::One::ErrorLevel() + } + + // BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule +-bool +-Http::One::ParseBws(Tokenizer &tok) ++void ++Http::One::ParseBws(Parser::Tokenizer &tok) + { +- if (const auto count = tok.skipAll(Parser::WhitespaceCharacters())) { ++ const auto count = tok.skipAll(Parser::WhitespaceCharacters()); ++ ++ if (tok.atEnd()) ++ throw InsufficientInput(); // even if count is positive ++ ++ if (count) { + // Generating BWS is a MUST-level violation so warn about it as needed. + debugs(33, ErrorLevel(), "found " << count << " BWS octets"); + // RFC 7230 says we MUST parse BWS, so we fall through even if +@@ -289,6 +294,6 @@ Http::One::ParseBws(Tokenizer &tok) + } + // else we successfully "parsed" an empty BWS sequence + +- return true; ++ // success: no more BWS characters expected + } + +diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h +index 58a5cae..40e281b 100644 +--- a/src/http/one/Parser.h ++++ b/src/http/one/Parser.h +@@ -12,6 +12,7 @@ + #include "anyp/ProtocolVersion.h" + #include "http/one/forward.h" + #include "http/StatusCode.h" ++#include "parser/forward.h" + #include "sbuf/SBuf.h" + + namespace Http { +@@ -40,6 +41,7 @@ class Parser : public RefCountable + { + public: + typedef SBuf::size_type size_type; ++ typedef ::Parser::Tokenizer Tokenizer; + + Parser() : parseStatusCode(Http::scNone), parsingStage_(HTTP_PARSE_NONE), hackExpectsMime_(false) {} + virtual ~Parser() {} +@@ -118,11 +120,11 @@ protected: + * detect and skip the CRLF or (if tolerant) LF line terminator + * consume from the tokenizer. + * +- * throws if non-terminator is detected. ++ * \throws exception on bad or InsuffientInput. + * \retval true only if line terminator found. + * \retval false incomplete or missing line terminator, need more data. + */ +- bool skipLineTerminator(Http1::Tokenizer &tok) const; ++ void skipLineTerminator(Tokenizer &) const; + + /** + * Scan to find the mime headers block for current message. +@@ -159,8 +161,8 @@ private: + }; + + /// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace) +-/// \returns true (always; unlike all the skip*() functions) +-bool ParseBws(Tokenizer &tok); ++/// \throws InsufficientInput when the end of BWS cannot be confirmed ++void ParseBws(Parser::Tokenizer &); + + /// the right debugs() level for logging HTTP violation messages + int ErrorLevel(); +diff --git a/src/http/one/RequestParser.cc b/src/http/one/RequestParser.cc +index a325f7d..0f13c92 100644 +--- a/src/http/one/RequestParser.cc ++++ b/src/http/one/RequestParser.cc +@@ -9,8 +9,8 @@ + #include "squid.h" + #include "Debug.h" + #include "http/one/RequestParser.h" +-#include "http/one/Tokenizer.h" + #include "http/ProtocolVersion.h" ++#include "parser/Tokenizer.h" + #include "profiler/Profiler.h" + #include "SquidConfig.h" + +@@ -64,7 +64,7 @@ Http::One::RequestParser::skipGarbageLines() + * RFC 7230 section 2.6, 3.1 and 3.5 + */ + bool +-Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok) ++Http::One::RequestParser::parseMethodField(Tokenizer &tok) + { + // method field is a sequence of TCHAR. + // Limit to 32 characters to prevent overly long sequences of non-HTTP +@@ -145,7 +145,7 @@ Http::One::RequestParser::RequestTargetCharacters() + } + + bool +-Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok) ++Http::One::RequestParser::parseUriField(Tokenizer &tok) + { + /* Arbitrary 64KB URI upper length limit. + * +@@ -178,7 +178,7 @@ Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok) + } + + bool +-Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok) ++Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok) + { + static const SBuf http1p0("HTTP/1.0"); + static const SBuf http1p1("HTTP/1.1"); +@@ -253,7 +253,7 @@ Http::One::RequestParser::skipDelimiter(const size_t count, const char *where) + + /// Parse CRs at the end of request-line, just before the terminating LF. + bool +-Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok) ++Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok) + { + if (Config.onoff.relaxed_header_parser) { + (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK +@@ -289,12 +289,12 @@ Http::One::RequestParser::parseRequestFirstLine() + // Earlier, skipGarbageLines() took care of any leading LFs (if allowed). + // Now, the request line has to end at the first LF. + static const CharacterSet lineChars = CharacterSet::LF.complement("notLF"); +- ::Parser::Tokenizer lineTok(buf_); ++ Tokenizer lineTok(buf_); + if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) { + if (buf_.length() >= Config.maxRequestHeaderSize) { + /* who should we blame for our failure to parse this line? */ + +- Http1::Tokenizer methodTok(buf_); ++ Tokenizer methodTok(buf_); + if (!parseMethodField(methodTok)) + return -1; // blame a bad method (or its delimiter) + +@@ -308,7 +308,7 @@ Http::One::RequestParser::parseRequestFirstLine() + return 0; + } + +- Http1::Tokenizer tok(line); ++ Tokenizer tok(line); + + if (!parseMethodField(tok)) + return -1; +diff --git a/src/http/one/RequestParser.h b/src/http/one/RequestParser.h +index 7086548..26697cd 100644 +--- a/src/http/one/RequestParser.h ++++ b/src/http/one/RequestParser.h +@@ -54,11 +54,11 @@ private: + bool doParse(const SBuf &aBuf); + + /* all these return false and set parseStatusCode on parsing failures */ +- bool parseMethodField(Http1::Tokenizer &); +- bool parseUriField(Http1::Tokenizer &); +- bool parseHttpVersionField(Http1::Tokenizer &); ++ bool parseMethodField(Tokenizer &); ++ bool parseUriField(Tokenizer &); ++ bool parseHttpVersionField(Tokenizer &); + bool skipDelimiter(const size_t count, const char *where); +- bool skipTrailingCrs(Http1::Tokenizer &tok); ++ bool skipTrailingCrs(Tokenizer &tok); + + bool http0() const {return !msgProtocol_.major;} + static const CharacterSet &RequestTargetCharacters(); +diff --git a/src/http/one/ResponseParser.cc b/src/http/one/ResponseParser.cc +index 24af849..65baf09 100644 +--- a/src/http/one/ResponseParser.cc ++++ b/src/http/one/ResponseParser.cc +@@ -9,8 +9,8 @@ + #include "squid.h" + #include "Debug.h" + #include "http/one/ResponseParser.h" +-#include "http/one/Tokenizer.h" + #include "http/ProtocolVersion.h" ++#include "parser/Tokenizer.h" + #include "profiler/Profiler.h" + #include "SquidConfig.h" + +@@ -47,7 +47,7 @@ Http::One::ResponseParser::firstLineSize() const + // NP: we found the protocol version and consumed it already. + // just need the status code and reason phrase + int +-Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, const CharacterSet &WspDelim) ++Http::One::ResponseParser::parseResponseStatusAndReason(Tokenizer &tok, const CharacterSet &WspDelim) + { + if (!completedStatus_) { + debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "..."); +@@ -87,14 +87,13 @@ Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, c + static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT; + (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing + try { +- if (skipLineTerminator(tok)) { +- debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); +- buf_ = tok.remaining(); // resume checkpoint +- return 1; +- } ++ skipLineTerminator(tok); ++ buf_ = tok.remaining(); // resume checkpoint ++ debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length())); ++ return 1; ++ } catch (const InsufficientInput &) { + reasonPhrase_.clear(); + return 0; // need more to be sure we have it all +- + } catch (const std::exception &ex) { + debugs(74, 6, "invalid status-line: " << ex.what()); + } +@@ -119,7 +118,7 @@ Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, c + int + Http::One::ResponseParser::parseResponseFirstLine() + { +- Http1::Tokenizer tok(buf_); ++ Tokenizer tok(buf_); + + const CharacterSet &WspDelim = DelimiterCharacters(); + +diff --git a/src/http/one/ResponseParser.h b/src/http/one/ResponseParser.h +index 15db4a0..cf13b4d 100644 +--- a/src/http/one/ResponseParser.h ++++ b/src/http/one/ResponseParser.h +@@ -43,7 +43,7 @@ public: + + private: + int parseResponseFirstLine(); +- int parseResponseStatusAndReason(Http1::Tokenizer&, const CharacterSet &); ++ int parseResponseStatusAndReason(Tokenizer&, const CharacterSet &); + + /// magic prefix for identifying ICY response messages + static const SBuf IcyMagic; +diff --git a/src/http/one/TeChunkedParser.cc b/src/http/one/TeChunkedParser.cc +index 754086e..6d2f8ea 100644 +--- a/src/http/one/TeChunkedParser.cc ++++ b/src/http/one/TeChunkedParser.cc +@@ -13,10 +13,13 @@ + #include "http/one/Tokenizer.h" + #include "http/ProtocolVersion.h" + #include "MemBuf.h" ++#include "parser/Tokenizer.h" + #include "Parsing.h" ++#include "sbuf/Stream.h" + #include "SquidConfig.h" + +-Http::One::TeChunkedParser::TeChunkedParser() ++Http::One::TeChunkedParser::TeChunkedParser(): ++ customExtensionValueParser(nullptr) + { + // chunked encoding only exists in HTTP/1.1 + Http1::Parser::msgProtocol_ = Http::ProtocolVersion(1,1); +@@ -31,7 +34,11 @@ Http::One::TeChunkedParser::clear() + buf_.clear(); + theChunkSize = theLeftBodySize = 0; + theOut = NULL; +- useOriginBody = -1; ++ // XXX: We do not reset customExtensionValueParser here. Based on the ++ // clear() API description, we must, but it makes little sense and could ++ // break method callers if they appear because some of them may forget to ++ // reset customExtensionValueParser. TODO: Remove Http1::Parser as our ++ // parent class and this unnecessary method with it. + } + + bool +@@ -49,14 +56,14 @@ Http::One::TeChunkedParser::parse(const SBuf &aBuf) + if (parsingStage_ == Http1::HTTP_PARSE_NONE) + parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ; + +- Http1::Tokenizer tok(buf_); ++ Tokenizer tok(buf_); + + // loop for as many chunks as we can + // use do-while instead of while so that we can incrementally + // restart in the middle of a chunk/frame + do { + +- if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkExtension(tok, theChunkSize)) ++ if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkMetadataSuffix(tok)) + return false; + + if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok)) +@@ -80,7 +87,7 @@ Http::One::TeChunkedParser::needsMoreSpace() const + + /// RFC 7230 section 4.1 chunk-size + bool +-Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok) ++Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok) + { + Must(theChunkSize <= 0); // Should(), really + +@@ -104,66 +111,75 @@ Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok) + return false; // should not be reachable + } + +-/** +- * Parses chunk metadata suffix, looking for interesting extensions and/or +- * getting to the line terminator. RFC 7230 section 4.1.1 and its Errata #4667: +- * +- * chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) +- * chunk-ext-name = token +- * chunk-ext-val = token / quoted-string +- * +- * ICAP 'use-original-body=N' extension is supported. +- */ ++/// Parses "[chunk-ext] CRLF" from RFC 7230 section 4.1.1: ++/// chunk = chunk-size [ chunk-ext ] CRLF chunk-data CRLF ++/// last-chunk = 1*"0" [ chunk-ext ] CRLF + bool +-Http::One::TeChunkedParser::parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown) ++Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok) + { +- SBuf ext; +- SBuf value; +- while ( +- ParseBws(tok) && // Bug 4492: IBM_HTTP_Server sends SP after chunk-size +- tok.skip(';') && +- ParseBws(tok) && // Bug 4492: ICAP servers send SP before chunk-ext-name +- tok.prefix(ext, CharacterSet::TCHAR)) { // chunk-ext-name +- +- // whole value part is optional. if no '=' expect next chunk-ext +- if (ParseBws(tok) && tok.skip('=') && ParseBws(tok)) { +- +- if (!skipKnown) { +- if (ext.cmp("use-original-body",17) == 0 && tok.int64(useOriginBody, 10)) { +- debugs(94, 3, "Found chunk extension " << ext << "=" << useOriginBody); +- buf_ = tok.remaining(); // parse checkpoint +- continue; +- } +- } +- +- debugs(94, 5, "skipping unknown chunk extension " << ext); +- +- // unknown might have a value token or quoted-string +- if (tok.quotedStringOrToken(value) && !tok.atEnd()) { +- buf_ = tok.remaining(); // parse checkpoint +- continue; +- } +- +- // otherwise need more data OR corrupt syntax +- break; +- } +- +- if (!tok.atEnd()) +- buf_ = tok.remaining(); // parse checkpoint (unless there might be more token name) +- } +- +- if (skipLineTerminator(tok)) { +- buf_ = tok.remaining(); // checkpoint +- // non-0 chunk means data, 0-size means optional Trailer follows ++ // Code becomes much simpler when incremental parsing functions throw on ++ // bad or insufficient input, like in the code below. TODO: Expand up. ++ try { ++ parseChunkExtensions(tok); // a possibly empty chunk-ext list ++ skipLineTerminator(tok); ++ buf_ = tok.remaining(); + parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME; + return true; ++ } catch (const InsufficientInput &) { ++ tok.reset(buf_); // backtrack to the last commit point ++ return false; + } ++ // other exceptions bubble up to kill message parsing ++} + +- return false; ++/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667): ++/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) ++void ++Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok) ++{ ++ do { ++ ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size ++ ++ if (!tok.skip(';')) ++ return; // reached the end of extensions (if any) ++ ++ parseOneChunkExtension(tok); ++ buf_ = tok.remaining(); // got one extension ++ } while (true); ++} ++ ++void ++Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName) ++{ ++ const auto ignoredValue = tokenOrQuotedString(tok); ++ debugs(94, 5, extName << " with value " << ignoredValue); ++} ++ ++/// Parses a single chunk-ext list element: ++/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) ++void ++Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok) ++{ ++ ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name ++ ++ const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR); ++ ++ ParseBws(tok); ++ ++ if (!tok.skip('=')) ++ return; // parsed a valueless chunk-ext ++ ++ ParseBws(tok); ++ ++ // optimization: the only currently supported extension needs last-chunk ++ if (!theChunkSize && customExtensionValueParser) ++ customExtensionValueParser->parse(tok, extName); ++ else ++ ChunkExtensionValueParser::Ignore(tok, extName); + } + + bool +-Http::One::TeChunkedParser::parseChunkBody(Http1::Tokenizer &tok) ++Http::One::TeChunkedParser::parseChunkBody(Tokenizer &tok) + { + if (theLeftBodySize > 0) { + buf_ = tok.remaining(); // sync buffers before buf_ use +@@ -188,17 +204,20 @@ Http::One::TeChunkedParser::parseChunkBody(Http1::Tokenizer &tok) + } + + bool +-Http::One::TeChunkedParser::parseChunkEnd(Http1::Tokenizer &tok) ++Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok) + { + Must(theLeftBodySize == 0); // Should(), really + +- if (skipLineTerminator(tok)) { ++ try { ++ skipLineTerminator(tok); + buf_ = tok.remaining(); // parse checkpoint + theChunkSize = 0; // done with the current chunk + parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ; + return true; + } +- +- return false; ++ catch (const InsufficientInput &) { ++ return false; ++ } ++ // other exceptions bubble up to kill message parsing + } + +diff --git a/src/http/one/TeChunkedParser.h b/src/http/one/TeChunkedParser.h +index 1b0319e..2ca8988 100644 +--- a/src/http/one/TeChunkedParser.h ++++ b/src/http/one/TeChunkedParser.h +@@ -18,6 +18,26 @@ namespace Http + namespace One + { + ++using ::Parser::InsufficientInput; ++ ++// TODO: Move this class into http/one/ChunkExtensionValueParser.* ++/// A customizable parser of a single chunk extension value (chunk-ext-val). ++/// From RFC 7230 section 4.1.1 and its Errata #4667: ++/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) ++/// chunk-ext-name = token ++/// chunk-ext-val = token / quoted-string ++class ChunkExtensionValueParser ++{ ++public: ++ typedef ::Parser::Tokenizer Tokenizer; ++ ++ /// extracts and ignores the value of a named extension ++ static void Ignore(Tokenizer &tok, const SBuf &extName); ++ ++ /// extracts and then interprets (or ignores) the extension value ++ virtual void parse(Tokenizer &tok, const SBuf &extName) = 0; ++}; ++ + /** + * An incremental parser for chunked transfer coding + * defined in RFC 7230 section 4.1. +@@ -25,7 +45,7 @@ namespace One + * + * The parser shovels content bytes from the raw + * input buffer into the content output buffer, both caller-supplied. +- * Ignores chunk extensions except for ICAP's ieof. ++ * Chunk extensions like use-original-body are handled via parseExtensionValuesWith(). + * Trailers are available via mimeHeader() if wanted. + */ + class TeChunkedParser : public Http1::Parser +@@ -37,6 +57,10 @@ public: + /// set the buffer to be used to store decoded chunk data + void setPayloadBuffer(MemBuf *parsedContent) {theOut = parsedContent;} + ++ /// Instead of ignoring all chunk extension values, give the supplied ++ /// parser a chance to handle them. Only applied to last-chunk (for now). ++ void parseExtensionValuesWith(ChunkExtensionValueParser *parser) { customExtensionValueParser = parser; } ++ + bool needsMoreSpace() const; + + /* Http1::Parser API */ +@@ -45,17 +69,20 @@ public: + virtual Parser::size_type firstLineSize() const {return 0;} // has no meaning with multiple chunks + + private: +- bool parseChunkSize(Http1::Tokenizer &tok); +- bool parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown); +- bool parseChunkBody(Http1::Tokenizer &tok); +- bool parseChunkEnd(Http1::Tokenizer &tok); ++ bool parseChunkSize(Tokenizer &tok); ++ bool parseChunkMetadataSuffix(Tokenizer &); ++ void parseChunkExtensions(Tokenizer &); ++ void parseOneChunkExtension(Tokenizer &); ++ bool parseChunkBody(Tokenizer &tok); ++ bool parseChunkEnd(Tokenizer &tok); + + MemBuf *theOut; + uint64_t theChunkSize; + uint64_t theLeftBodySize; + +-public: +- int64_t useOriginBody; ++ /// An optional plugin for parsing and interpreting custom chunk-ext-val. ++ /// This "visitor" object is owned by our creator. ++ ChunkExtensionValueParser *customExtensionValueParser; + }; + + } // namespace One +diff --git a/src/http/one/Tokenizer.cc b/src/http/one/Tokenizer.cc +index 804b8e1..3a6bef3 100644 +--- a/src/http/one/Tokenizer.cc ++++ b/src/http/one/Tokenizer.cc +@@ -8,35 +8,18 @@ + + #include "squid.h" + #include "Debug.h" ++#include "http/one/Parser.h" + #include "http/one/Tokenizer.h" +- +-bool +-Http::One::Tokenizer::quotedString(SBuf &returnedToken, const bool http1p0) +-{ +- checkpoint(); +- +- if (!skip('"')) +- return false; +- +- return qdText(returnedToken, http1p0); +-} +- +-bool +-Http::One::Tokenizer::quotedStringOrToken(SBuf &returnedToken, const bool http1p0) ++#include "parser/Tokenizer.h" ++#include "sbuf/Stream.h" ++ ++/// Extracts quoted-string after the caller removes the initial '"'. ++/// \param http1p0 whether to prohibit \-escaped characters in quoted strings ++/// \throws InsufficientInput when input can be a token _prefix_ ++/// \returns extracted quoted string (without quotes and with chars unescaped) ++static SBuf ++parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0) + { +- checkpoint(); +- +- if (!skip('"')) +- return prefix(returnedToken, CharacterSet::TCHAR); +- +- return qdText(returnedToken, http1p0); +-} +- +-bool +-Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0) +-{ +- // the initial DQUOTE has been skipped by the caller +- + /* + * RFC 1945 - defines qdtext: + * inclusive of LWS (which includes CR and LF) +@@ -61,12 +44,17 @@ Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0) + // best we can do is a conditional reference since http1p0 value may change per-client + const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1); + +- for (;;) { +- SBuf::size_type prefixLen = buf().findFirstNotOf(tokenChars); +- returnedToken.append(consume(prefixLen)); ++ SBuf parsedToken; ++ ++ while (!tok.atEnd()) { ++ SBuf qdText; ++ if (tok.prefix(qdText, tokenChars)) ++ parsedToken.append(qdText); ++ ++ if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not ++ if (tok.atEnd()) ++ break; + +- // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not +- if (!http1p0 && skip('\\')) { + /* RFC 7230 section 3.2.6 + * + * The backslash octet ("\") can be used as a single-octet quoting +@@ -78,32 +66,42 @@ Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0) + */ + static const CharacterSet qPairChars = CharacterSet::HTAB + CharacterSet::SP + CharacterSet::VCHAR + CharacterSet::OBSTEXT; + SBuf escaped; +- if (!prefix(escaped, qPairChars, 1)) { +- returnedToken.clear(); +- restoreLastCheckpoint(); +- return false; +- } +- returnedToken.append(escaped); ++ if (!tok.prefix(escaped, qPairChars, 1)) ++ throw TexcHere("invalid escaped character in quoted-pair"); ++ ++ parsedToken.append(escaped); + continue; ++ } + +- } else if (skip('"')) { +- break; // done ++ if (tok.skip('"')) ++ return parsedToken; // may be empty + +- } else if (atEnd()) { +- // need more data +- returnedToken.clear(); +- restoreLastCheckpoint(); +- return false; +- } ++ if (tok.atEnd()) ++ break; + +- // else, we have an error +- debugs(24, 8, "invalid bytes for set " << tokenChars.name); +- returnedToken.clear(); +- restoreLastCheckpoint(); +- return false; ++ throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name)); + } + +- // found the whole string +- return true; ++ throw Http::One::InsufficientInput(); ++} ++ ++SBuf ++Http::One::tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0) ++{ ++ if (tok.skip('"')) ++ return parseQuotedStringSuffix(tok, http1p0); ++ ++ if (tok.atEnd()) ++ throw InsufficientInput(); ++ ++ SBuf parsedToken; ++ if (!tok.prefix(parsedToken, CharacterSet::TCHAR)) ++ throw TexcHere("invalid input while expecting an HTTP token"); ++ ++ if (tok.atEnd()) ++ throw InsufficientInput(); ++ ++ // got the complete token ++ return parsedToken; + } + +diff --git a/src/http/one/Tokenizer.h b/src/http/one/Tokenizer.h +index 658875f..2d40574 100644 +--- a/src/http/one/Tokenizer.h ++++ b/src/http/one/Tokenizer.h +@@ -9,68 +9,47 @@ + #ifndef SQUID_SRC_HTTP_ONE_TOKENIZER_H + #define SQUID_SRC_HTTP_ONE_TOKENIZER_H + +-#include "parser/Tokenizer.h" ++#include "parser/forward.h" ++#include "sbuf/forward.h" + + namespace Http { + namespace One { + + /** +- * Lexical processor extended to tokenize HTTP/1.x syntax. ++ * Extracts either an HTTP/1 token or quoted-string while dealing with ++ * possibly incomplete input typical for incremental text parsers. ++ * Unescapes escaped characters in HTTP/1.1 quoted strings. + * +- * \see ::Parser::Tokenizer for more detail ++ * \param http1p0 whether to prohibit \-escaped characters in quoted strings ++ * \throws InsufficientInput as appropriate, including on unterminated tokens ++ * \returns extracted token or quoted string (without quotes) ++ * ++ * Governed by: ++ * - RFC 1945 section 2.1 ++ * " ++ * A string of text is parsed as a single word if it is quoted using ++ * double-quote marks. ++ * ++ * quoted-string = ( <"> *(qdtext) <"> ) ++ * ++ * qdtext = and CTLs, ++ * but including LWS> ++ * ++ * Single-character quoting using the backslash ("\") character is not ++ * permitted in HTTP/1.0. ++ * " ++ * ++ * - RFC 7230 section 3.2.6 ++ * " ++ * A string of text is parsed as a single value if it is quoted using ++ * double-quote marks. ++ * ++ * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE ++ * qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text ++ * obs-text = %x80-FF ++ * " + */ +-class Tokenizer : public ::Parser::Tokenizer +-{ +-public: +- Tokenizer(SBuf &s) : ::Parser::Tokenizer(s), savedStats_(0) {} +- +- /** +- * Attempt to parse a quoted-string lexical construct. +- * +- * Governed by: +- * - RFC 1945 section 2.1 +- * " +- * A string of text is parsed as a single word if it is quoted using +- * double-quote marks. +- * +- * quoted-string = ( <"> *(qdtext) <"> ) +- * +- * qdtext = and CTLs, +- * but including LWS> +- * +- * Single-character quoting using the backslash ("\") character is not +- * permitted in HTTP/1.0. +- * " +- * +- * - RFC 7230 section 3.2.6 +- * " +- * A string of text is parsed as a single value if it is quoted using +- * double-quote marks. +- * +- * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE +- * qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text +- * obs-text = %x80-FF +- * " +- * +- * \param escaped HTTP/1.0 does not permit \-escaped characters +- */ +- bool quotedString(SBuf &value, const bool http1p0 = false); +- +- /** +- * Attempt to parse a (token / quoted-string ) lexical construct. +- */ +- bool quotedStringOrToken(SBuf &value, const bool http1p0 = false); +- +-private: +- /// parse the internal component of a quote-string, and terminal DQUOTE +- bool qdText(SBuf &value, const bool http1p0); +- +- void checkpoint() { savedCheckpoint_ = buf(); savedStats_ = parsedSize(); } +- void restoreLastCheckpoint() { undoParse(savedCheckpoint_, savedStats_); } +- +- SBuf savedCheckpoint_; +- SBuf::size_type savedStats_; +-}; ++SBuf tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0 = false); + + } // namespace One + } // namespace Http +diff --git a/src/http/one/forward.h b/src/http/one/forward.h +index c90dc34..2b4ad28 100644 +--- a/src/http/one/forward.h ++++ b/src/http/one/forward.h +@@ -10,6 +10,7 @@ + #define SQUID_SRC_HTTP_ONE_FORWARD_H + + #include "base/RefCount.h" ++#include "parser/forward.h" + #include "sbuf/forward.h" + + namespace Http { +@@ -31,6 +32,8 @@ typedef RefCount ResponseParserPointer; + /// CRLF textual representation + const SBuf &CrLf(); + ++using ::Parser::InsufficientInput; ++ + } // namespace One + } // namespace Http + +diff --git a/src/parser/BinaryTokenizer.h b/src/parser/BinaryTokenizer.h +index acebd4d..24042d4 100644 +--- a/src/parser/BinaryTokenizer.h ++++ b/src/parser/BinaryTokenizer.h +@@ -9,6 +9,7 @@ + #ifndef SQUID_SRC_PARSER_BINARYTOKENIZER_H + #define SQUID_SRC_PARSER_BINARYTOKENIZER_H + ++#include "parser/forward.h" + #include "sbuf/SBuf.h" + + namespace Parser +@@ -44,7 +45,7 @@ public: + class BinaryTokenizer + { + public: +- class InsufficientInput {}; // thrown when a method runs out of data ++ typedef ::Parser::InsufficientInput InsufficientInput; + typedef uint64_t size_type; // enough for the largest supported offset + + BinaryTokenizer(); +diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am +index af2b759..0daa5a8 100644 +--- a/src/parser/Makefile.am ++++ b/src/parser/Makefile.am +@@ -13,6 +13,7 @@ noinst_LTLIBRARIES = libparser.la + libparser_la_SOURCES = \ + BinaryTokenizer.h \ + BinaryTokenizer.cc \ ++ forward.h \ + Tokenizer.h \ + Tokenizer.cc + +diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc +index 7e73e04..68f4aec 100644 +--- a/src/parser/Tokenizer.cc ++++ b/src/parser/Tokenizer.cc +@@ -10,7 +10,9 @@ + + #include "squid.h" + #include "Debug.h" ++#include "parser/forward.h" + #include "parser/Tokenizer.h" ++#include "sbuf/Stream.h" + + #include + #if HAVE_CTYPE_H +@@ -96,6 +98,23 @@ Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, c + return true; + } + ++SBuf ++Parser::Tokenizer::prefix(const char *description, const CharacterSet &tokenChars, const SBuf::size_type limit) ++{ ++ if (atEnd()) ++ throw InsufficientInput(); ++ ++ SBuf result; ++ ++ if (!prefix(result, tokenChars, limit)) ++ throw TexcHere(ToSBuf("cannot parse ", description)); ++ ++ if (atEnd()) ++ throw InsufficientInput(); ++ ++ return result; ++} ++ + bool + Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit) + { +@@ -283,3 +302,24 @@ Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf: + return success(s - range.rawContent()); + } + ++int64_t ++Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit) ++{ ++ if (atEnd()) ++ throw InsufficientInput(); ++ ++ int64_t result = 0; ++ ++ // Since we only support unsigned decimals, a parsing failure with a ++ // non-empty input always implies invalid/malformed input (or a buggy ++ // limit=0 caller). TODO: Support signed and non-decimal integers by ++ // refactoring int64() to detect insufficient input. ++ if (!int64(result, 10, false, limit)) ++ throw TexcHere(ToSBuf("cannot parse ", description)); ++ ++ if (atEnd()) ++ throw InsufficientInput(); // more digits may be coming ++ ++ return result; ++} ++ +diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h +index 54414be..03a8388 100644 +--- a/src/parser/Tokenizer.h ++++ b/src/parser/Tokenizer.h +@@ -143,6 +143,19 @@ public: + */ + bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos); + ++ /* ++ * The methods below mimic their counterparts documented above, but they ++ * throw on errors, including InsufficientInput. The field description ++ * parameter is used for error reporting and debugging. ++ */ ++ ++ /// prefix() wrapper but throws InsufficientInput if input contains ++ /// nothing but the prefix (i.e. if the prefix is not "terminated") ++ SBuf prefix(const char *description, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos); ++ ++ /// int64() wrapper but limited to unsigned decimal integers (for now) ++ int64_t udec64(const char *description, SBuf::size_type limit = SBuf::npos); ++ + protected: + SBuf consume(const SBuf::size_type n); + SBuf::size_type success(const SBuf::size_type n); +diff --git a/src/parser/forward.h b/src/parser/forward.h +new file mode 100644 +index 0000000..5a95b7a +--- /dev/null ++++ b/src/parser/forward.h +@@ -0,0 +1,22 @@ ++/* ++ * Copyright (C) 1996-2019 The Squid Software Foundation and contributors ++ * ++ * Squid software is distributed under GPLv2+ license and includes ++ * contributions from numerous individuals and organizations. ++ * Please see the COPYING and CONTRIBUTORS files for details. ++ */ ++ ++#ifndef SQUID_PARSER_FORWARD_H ++#define SQUID_PARSER_FORWARD_H ++ ++namespace Parser { ++class Tokenizer; ++class BinaryTokenizer; ++ ++// TODO: Move this declaration (to parser/Elements.h) if we need more like it. ++/// thrown by modern "incremental" parsers when they need more data ++class InsufficientInput {}; ++} // namespace Parser ++ ++#endif /* SQUID_PARSER_FORWARD_H */ ++ +-- +2.39.3 + diff --git a/SOURCES/squid-4.15-CVE-2023-46846.patch b/SOURCES/squid-4.15-CVE-2023-46846.patch new file mode 100644 index 0000000..ff7a0ec --- /dev/null +++ b/SOURCES/squid-4.15-CVE-2023-46846.patch @@ -0,0 +1,179 @@ +From 1b17fc9ee99b822dcca6149d1b30e3aaf7d661cf Mon Sep 17 00:00:00 2001 +From: Amos Jeffries +Date: Fri, 13 Oct 2023 08:44:16 +0000 +Subject: [PATCH 2/2] RFC 9112: Improve HTTP chunked encoding compliance + (#1498) + +--- + src/http/one/Parser.cc | 8 +------- + src/http/one/Parser.h | 4 +--- + src/http/one/TeChunkedParser.cc | 23 ++++++++++++++++++----- + src/parser/Tokenizer.cc | 12 ++++++++++++ + src/parser/Tokenizer.h | 7 +++++++ + 5 files changed, 39 insertions(+), 15 deletions(-) + +diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc +index affe0b1..05591fe 100644 +--- a/src/http/one/Parser.cc ++++ b/src/http/one/Parser.cc +@@ -65,16 +65,10 @@ Http::One::Parser::DelimiterCharacters() + void + Http::One::Parser::skipLineTerminator(Tokenizer &tok) const + { +- if (tok.skip(Http1::CrLf())) +- return; +- + if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF)) + return; + +- if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r')) +- throw InsufficientInput(); +- +- throw TexcHere("garbage instead of CRLF line terminator"); ++ tok.skipRequired("line-terminating CRLF", Http1::CrLf()); + } + + /// all characters except the LF line terminator +diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h +index 40e281b..9a2a4ad 100644 +--- a/src/http/one/Parser.h ++++ b/src/http/one/Parser.h +@@ -120,9 +120,7 @@ protected: + * detect and skip the CRLF or (if tolerant) LF line terminator + * consume from the tokenizer. + * +- * \throws exception on bad or InsuffientInput. +- * \retval true only if line terminator found. +- * \retval false incomplete or missing line terminator, need more data. ++ * \throws exception on bad or InsufficientInput + */ + void skipLineTerminator(Tokenizer &) const; + +diff --git a/src/http/one/TeChunkedParser.cc b/src/http/one/TeChunkedParser.cc +index 6d2f8ea..3bff6c7 100644 +--- a/src/http/one/TeChunkedParser.cc ++++ b/src/http/one/TeChunkedParser.cc +@@ -91,6 +91,11 @@ Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok) + { + Must(theChunkSize <= 0); // Should(), really + ++ static const SBuf bannedHexPrefixLower("0x"); ++ static const SBuf bannedHexPrefixUpper("0X"); ++ if (tok.skip(bannedHexPrefixLower) || tok.skip(bannedHexPrefixUpper)) ++ throw TextException("chunk starts with 0x", Here()); ++ + int64_t size = -1; + if (tok.int64(size, 16, false) && !tok.atEnd()) { + if (size < 0) +@@ -121,7 +126,7 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok) + // bad or insufficient input, like in the code below. TODO: Expand up. + try { + parseChunkExtensions(tok); // a possibly empty chunk-ext list +- skipLineTerminator(tok); ++ tok.skipRequired("CRLF after [chunk-ext]", Http1::CrLf()); + buf_ = tok.remaining(); + parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME; + return true; +@@ -132,12 +137,14 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok) + // other exceptions bubble up to kill message parsing + } + +-/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667): ++/// Parses the chunk-ext list (RFC 9112 section 7.1.1: + /// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) + void +-Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok) ++Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &callerTok) + { + do { ++ auto tok = callerTok; ++ + ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size + + if (!tok.skip(';')) +@@ -145,6 +152,7 @@ Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok) + + parseOneChunkExtension(tok); + buf_ = tok.remaining(); // got one extension ++ callerTok = tok; + } while (true); + } + +@@ -158,11 +166,14 @@ Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName + /// Parses a single chunk-ext list element: + /// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) + void +-Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok) ++Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &callerTok) + { ++ auto tok = callerTok; ++ + ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name + + const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR); ++ callerTok = tok; // in case we determine that this is a valueless chunk-ext + + ParseBws(tok); + +@@ -176,6 +187,8 @@ Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok) + customExtensionValueParser->parse(tok, extName); + else + ChunkExtensionValueParser::Ignore(tok, extName); ++ ++ callerTok = tok; + } + + bool +@@ -209,7 +222,7 @@ Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok) + Must(theLeftBodySize == 0); // Should(), really + + try { +- skipLineTerminator(tok); ++ tok.skipRequired("chunk CRLF", Http1::CrLf()); + buf_ = tok.remaining(); // parse checkpoint + theChunkSize = 0; // done with the current chunk + parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ; +diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc +index 68f4aec..8516869 100644 +--- a/src/parser/Tokenizer.cc ++++ b/src/parser/Tokenizer.cc +@@ -147,6 +147,18 @@ Parser::Tokenizer::skipAll(const CharacterSet &tokenChars) + return success(prefixLen); + } + ++void ++Parser::Tokenizer::skipRequired(const char *description, const SBuf &tokenToSkip) ++{ ++ if (skip(tokenToSkip) || tokenToSkip.isEmpty()) ++ return; ++ ++ if (tokenToSkip.startsWith(buf_)) ++ throw InsufficientInput(); ++ ++ throw TextException(ToSBuf("cannot skip ", description), Here()); ++} ++ + bool + Parser::Tokenizer::skipOne(const CharacterSet &chars) + { +diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h +index 03a8388..78ab9e7 100644 +--- a/src/parser/Tokenizer.h ++++ b/src/parser/Tokenizer.h +@@ -115,6 +115,13 @@ public: + */ + SBuf::size_type skipAll(const CharacterSet &discardables); + ++ /** skips a given character sequence (string); ++ * does nothing if the sequence is empty ++ * ++ * \throws exception on mismatching prefix or InsufficientInput ++ */ ++ void skipRequired(const char *description, const SBuf &tokenToSkip); ++ + /** Removes a single trailing character from the set. + * + * \return whether a character was removed +-- +2.39.3 + diff --git a/SOURCES/squid-4.15-CVE-2023-46847.patch b/SOURCES/squid-4.15-CVE-2023-46847.patch new file mode 100644 index 0000000..98b5611 --- /dev/null +++ b/SOURCES/squid-4.15-CVE-2023-46847.patch @@ -0,0 +1,39 @@ +From dc0e10bec3334053c1a5297e50dd7052ea18aef0 Mon Sep 17 00:00:00 2001 +From: Alex Bason +Date: Sun, 15 Oct 2023 13:04:47 +0000 +Subject: [PATCH] Fix stack buffer overflow when parsing Digest Authorization + (#1517) + +The bug was discovered and detailed by Joshua Rogers at +https://megamansec.github.io/Squid-Security-Audit/digest-overflow.html +where it was filed as "Stack Buffer Overflow in Digest Authentication". +--- + src/auth/digest/Config.cc | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/src/auth/digest/Config.cc b/src/auth/digest/Config.cc +index f00e2ba68..3c070d242 100644 +--- a/src/auth/digest/Config.cc ++++ b/src/auth/digest/Config.cc +@@ -827,11 +827,15 @@ Auth::Digest::Config::decode(char const *proxy_auth, const HttpRequest *request, + break; + + case DIGEST_NC: +- if (value.size() != 8) { ++ if (value.size() == 8) { ++ // for historical reasons, the nc value MUST be exactly 8 bytes ++ static_assert(sizeof(digest_request->nc) == 8 + 1); ++ xstrncpy(digest_request->nc, value.rawBuf(), value.size() + 1); ++ debugs(29, 9, "Found noncecount '" << digest_request->nc << "'"); ++ } else { + debugs(29, 9, "Invalid nc '" << value << "' in '" << temp << "'"); ++ digest_request->nc[0] = 0; + } +- xstrncpy(digest_request->nc, value.rawBuf(), value.size() + 1); +- debugs(29, 9, "Found noncecount '" << digest_request->nc << "'"); + break; + + case DIGEST_CNONCE: +-- +2.25.1 + diff --git a/SPECS/squid.spec b/SPECS/squid.spec index 617f96a..6768c5f 100644 --- a/SPECS/squid.spec +++ b/SPECS/squid.spec @@ -2,7 +2,7 @@ Name: squid Version: 4.15 -Release: 6%{?dist} +Release: 6%{?dist}.1.alma.1 Summary: The Squid proxy caching server Epoch: 7 # See CREDITS for breakdown of non GPLv2+ code @@ -38,6 +38,7 @@ Patch206: squid-4.11-active-ftp.patch Patch208: squid-4.11-convert-ipv4.patch # https://bugzilla.redhat.com/show_bug.cgi?id=2006121 Patch209: squid-4.15-ftp-filename-extraction.patch +Patch210: 0001-Fix-incremental-parsing-of-chunked-quoted-extensions.patch # Security fixes # https://bugzilla.redhat.com/show_bug.cgi?id=1941506 @@ -46,6 +47,10 @@ Patch300: squid-4.15-CVE-2021-28116.patch Patch301: squid-4.15-CVE-2021-46784.patch # https://bugzilla.redhat.com/show_bug.cgi?id=2129771 Patch302: squid-4.15-CVE-2022-41318.patch +# OL squid-4.15-6.0.1.module+el8.8.0+90059+985ac402 +Patch303: squid-4.15-CVE-2023-46846.patch +# OL squid-4.15-6.0.1.module+el8.8.0+90059+985ac402 +Patch304: squid-4.15-CVE-2023-46847.patch Requires: bash >= 2.0 Requires(pre): shadow-utils @@ -107,11 +112,14 @@ lookup program (dnsserver), a program for retrieving FTP data %patch206 -p1 -b .active-ftp %patch208 -p1 -b .convert-ipv4 %patch209 -p1 -b .ftp-fn-extraction +%patch210 -p1 -b .fix-incremental-parsing # Security patches %patch300 -p1 -b .CVE-2021-28116 %patch301 -p1 -b .CVE-2021-46784 %patch302 -p1 -b .CVE-2022-41318 +%patch303 -p1 -b .CVE-2023-46846 +%patch304 -p1 -b .CVE-2023-46847 # https://bugzilla.redhat.com/show_bug.cgi?id=1679526 # Patch in the vendor documentation and used different location for documentation @@ -328,6 +336,10 @@ fi %changelog +* Thu Nov 09 2023 Andrew Lukoshko - 4.15-6.1 +- CVE-2023-46846 RFC 9112: Improve HTTP chunked encoding compliance +- CVE-2023-46847 Fix stack buffer overflow when parsing Digest Authorization + * Thu Dec 08 2022 Tomas Korbar - 4.15-6 - Resolves: #2072988 - [RFE] Add the "IP_BIND_ADDRESS_NO_PORT" flag to sockets created for outgoing connections in the squid source code.