From 05f6af2f4c85cc99323cfff6149c3d74af661b6d Mon Sep 17 00:00:00 2001 From: Amos Jeffries Date: Fri, 13 Oct 2023 08:44:16 +0000 Subject: [PATCH] RFC 9112: Improve HTTP chunked encoding compliance (#1498) --- src/http/one/Parser.cc | 8 +------- src/http/one/Parser.h | 4 +--- src/http/one/TeChunkedParser.cc | 23 ++++++++++++++++++----- src/parser/Tokenizer.cc | 12 ++++++++++++ src/parser/Tokenizer.h | 7 +++++++ 5 files changed, 39 insertions(+), 15 deletions(-) diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc index c78ddd7f0..291ae39f0 100644 --- a/src/http/one/Parser.cc +++ b/src/http/one/Parser.cc @@ -65,16 +65,10 @@ Http::One::Parser::DelimiterCharacters() void Http::One::Parser::skipLineTerminator(Tokenizer &tok) const { - if (tok.skip(Http1::CrLf())) - return; - if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF)) return; - if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r')) - throw InsufficientInput(); - - throw TexcHere("garbage instead of CRLF line terminator"); + tok.skipRequired("line-terminating CRLF", Http1::CrLf()); } /// all characters except the LF line terminator diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h index f83c01a9a..aab895583 100644 --- a/src/http/one/Parser.h +++ b/src/http/one/Parser.h @@ -124,9 +124,7 @@ protected: * detect and skip the CRLF or (if tolerant) LF line terminator * consume from the tokenizer. * - * \throws exception on bad or InsuffientInput. - * \retval true only if line terminator found. - * \retval false incomplete or missing line terminator, need more data. + * \throws exception on bad or InsufficientInput */ void skipLineTerminator(Tokenizer &) const; diff --git a/src/http/one/TeChunkedParser.cc b/src/http/one/TeChunkedParser.cc index 1434100b6..8bdb65abb 100644 --- a/src/http/one/TeChunkedParser.cc +++ b/src/http/one/TeChunkedParser.cc @@ -91,6 +91,11 @@ Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok) { Must(theChunkSize <= 0); // Should(), really + static const SBuf bannedHexPrefixLower("0x"); + static const SBuf bannedHexPrefixUpper("0X"); + if (tok.skip(bannedHexPrefixLower) || tok.skip(bannedHexPrefixUpper)) + throw TextException("chunk starts with 0x", Here()); + int64_t size = -1; if (tok.int64(size, 16, false) && !tok.atEnd()) { if (size < 0) @@ -121,7 +126,7 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok) // bad or insufficient input, like in the code below. TODO: Expand up. try { parseChunkExtensions(tok); // a possibly empty chunk-ext list - skipLineTerminator(tok); + tok.skipRequired("CRLF after [chunk-ext]", Http1::CrLf()); buf_ = tok.remaining(); parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME; return true; @@ -132,12 +137,14 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok) // other exceptions bubble up to kill message parsing } -/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667): +/// Parses the chunk-ext list (RFC 9112 section 7.1.1: /// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) void -Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok) +Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &callerTok) { do { + auto tok = callerTok; + ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size if (!tok.skip(';')) @@ -145,6 +152,7 @@ Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok) parseOneChunkExtension(tok); buf_ = tok.remaining(); // got one extension + callerTok = tok; } while (true); } @@ -158,11 +166,14 @@ Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName /// Parses a single chunk-ext list element: /// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] ) void -Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok) +Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &callerTok) { + auto tok = callerTok; + ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR); + callerTok = tok; // in case we determine that this is a valueless chunk-ext ParseBws(tok); @@ -176,6 +187,8 @@ Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok) customExtensionValueParser->parse(tok, extName); else ChunkExtensionValueParser::Ignore(tok, extName); + + callerTok = tok; } bool @@ -209,7 +222,7 @@ Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok) Must(theLeftBodySize == 0); // Should(), really try { - skipLineTerminator(tok); + tok.skipRequired("chunk CRLF", Http1::CrLf()); buf_ = tok.remaining(); // parse checkpoint theChunkSize = 0; // done with the current chunk parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ; diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc index edaffd8d3..15df793b8 100644 --- a/src/parser/Tokenizer.cc +++ b/src/parser/Tokenizer.cc @@ -147,6 +147,18 @@ Parser::Tokenizer::skipAll(const CharacterSet &tokenChars) return success(prefixLen); } +void +Parser::Tokenizer::skipRequired(const char *description, const SBuf &tokenToSkip) +{ + if (skip(tokenToSkip) || tokenToSkip.isEmpty()) + return; + + if (tokenToSkip.startsWith(buf_)) + throw InsufficientInput(); + + throw TextException(ToSBuf("cannot skip ", description), Here()); +} + bool Parser::Tokenizer::skipOne(const CharacterSet &chars) { diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h index 7bae1ccbb..3cfa7dd6c 100644 --- a/src/parser/Tokenizer.h +++ b/src/parser/Tokenizer.h @@ -115,6 +115,13 @@ public: */ SBuf::size_type skipAll(const CharacterSet &discardables); + /** skips a given character sequence (string); + * does nothing if the sequence is empty + * + * \throws exception on mismatching prefix or InsufficientInput + */ + void skipRequired(const char *description, const SBuf &tokenToSkip); + /** Removes a single trailing character from the set. * * \return whether a character was removed -- 2.25.1