squid/squid-4.15-CVE-2023-46846.patch
Luboš Uhliarik 42ed4e34d5 Resolves: RHEL-18351 - squid:4/squid: Buffer over-read in the HTTP Message
processing feature (CVE-2023-49285)
Resolves: RHEL-18342 - squid:4/squid: Incorrect Check of Function Return
  Value In Helper Process management (CVE-2023-49286)
Resolves: RHEL-18230 - squid:4/squid: Denial of Service in SSL Certificate
  validation (CVE-2023-46724)
Resolves: RHEL-15911 - squid:4/squid: NULL pointer dereference in the gopher
  protocol code (CVE-2023-46728)
Resolves: RHEL-18251 - squid crashes in assertion when a parent peer exists
Resolves: RHEL-14794 - squid: squid multiple issues in HTTP response caching
  (CVE-2023-5824)
Resolves: RHEL-14803 - squid: squid: Denial of Service in HTTP Digest
  Authentication (CVE-2023-46847)
Resolves: RHEL-14777 - squid: squid: Request/Response smuggling in HTTP/1.1
  and ICAP (CVE-2023-46846)
2024-02-02 05:07:35 +01:00

1282 lines
44 KiB
Diff

diff --git a/src/adaptation/icap/ModXact.cc b/src/adaptation/icap/ModXact.cc
index 2db0a68..370f077 100644
--- a/src/adaptation/icap/ModXact.cc
+++ b/src/adaptation/icap/ModXact.cc
@@ -25,12 +25,13 @@
#include "comm.h"
#include "comm/Connection.h"
#include "err_detail_type.h"
-#include "http/one/TeChunkedParser.h"
#include "HttpHeaderTools.h"
#include "HttpMsg.h"
#include "HttpReply.h"
#include "HttpRequest.h"
#include "MasterXaction.h"
+#include "parser/Tokenizer.h"
+#include "sbuf/Stream.h"
#include "SquidTime.h"
// flow and terminology:
@@ -44,6 +45,8 @@ CBDATA_NAMESPACED_CLASS_INIT(Adaptation::Icap, ModXactLauncher);
static const size_t TheBackupLimit = BodyPipe::MaxCapacity;
+const SBuf Adaptation::Icap::ChunkExtensionValueParser::UseOriginalBodyName("use-original-body");
+
Adaptation::Icap::ModXact::State::State()
{
memset(this, 0, sizeof(*this));
@@ -1108,6 +1111,7 @@ void Adaptation::Icap::ModXact::decideOnParsingBody()
state.parsing = State::psBody;
replyHttpBodySize = 0;
bodyParser = new Http1::TeChunkedParser;
+ bodyParser->parseExtensionValuesWith(&extensionParser);
makeAdaptedBodyPipe("adapted response from the ICAP server");
Must(state.sending == State::sendingAdapted);
} else {
@@ -1142,9 +1146,8 @@ void Adaptation::Icap::ModXact::parseBody()
}
if (parsed) {
- if (state.readyForUob && bodyParser->useOriginBody >= 0) {
- prepPartialBodyEchoing(
- static_cast<uint64_t>(bodyParser->useOriginBody));
+ if (state.readyForUob && extensionParser.sawUseOriginalBody()) {
+ prepPartialBodyEchoing(extensionParser.useOriginalBody());
stopParsing();
return;
}
@@ -2014,3 +2017,14 @@ void Adaptation::Icap::ModXactLauncher::updateHistory(bool doStart)
}
}
+void
+Adaptation::Icap::ChunkExtensionValueParser::parse(Tokenizer &tok, const SBuf &extName)
+{
+ if (extName == UseOriginalBodyName) {
+ useOriginalBody_ = tok.udec64("use-original-body");
+ assert(useOriginalBody_ >= 0);
+ } else {
+ Ignore(tok, extName);
+ }
+}
+
diff --git a/src/adaptation/icap/ModXact.h b/src/adaptation/icap/ModXact.h
index f7afa69..fb4dec0 100644
--- a/src/adaptation/icap/ModXact.h
+++ b/src/adaptation/icap/ModXact.h
@@ -15,6 +15,7 @@
#include "adaptation/icap/Xaction.h"
#include "BodyPipe.h"
#include "http/one/forward.h"
+#include "http/one/TeChunkedParser.h"
/*
* ICAPModXact implements ICAP REQMOD and RESPMOD transaction using
@@ -105,6 +106,23 @@ private:
enum State { stDisabled, stWriting, stIeof, stDone } theState;
};
+/// handles ICAP-specific chunk extensions supported by Squid
+class ChunkExtensionValueParser: public Http1::ChunkExtensionValueParser
+{
+public:
+ /* Http1::ChunkExtensionValueParser API */
+ virtual void parse(Tokenizer &tok, const SBuf &extName) override;
+
+ bool sawUseOriginalBody() const { return useOriginalBody_ >= 0; }
+ uint64_t useOriginalBody() const { assert(sawUseOriginalBody()); return static_cast<uint64_t>(useOriginalBody_); }
+
+private:
+ static const SBuf UseOriginalBodyName;
+
+ /// the value of the parsed use-original-body chunk extension (or -1)
+ int64_t useOriginalBody_ = -1;
+};
+
class ModXact: public Xaction, public BodyProducer, public BodyConsumer
{
CBDATA_CLASS(ModXact);
@@ -270,6 +288,8 @@ private:
int adaptHistoryId; ///< adaptation history slot reservation
+ ChunkExtensionValueParser extensionParser;
+
class State
{
diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc
index 0c86733..affe0b1 100644
--- a/src/http/one/Parser.cc
+++ b/src/http/one/Parser.cc
@@ -7,10 +7,11 @@
*/
#include "squid.h"
+#include "base/CharacterSet.h"
#include "Debug.h"
#include "http/one/Parser.h"
-#include "http/one/Tokenizer.h"
#include "mime_header.h"
+#include "parser/Tokenizer.h"
#include "SquidConfig.h"
/// RFC 7230 section 2.6 - 7 magic octets
@@ -61,20 +62,19 @@ Http::One::Parser::DelimiterCharacters()
RelaxedDelimiterCharacters() : CharacterSet::SP;
}
-bool
-Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const
+void
+Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
{
if (tok.skip(Http1::CrLf()))
- return true;
+ return;
if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
- return true;
+ return;
if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
- return false; // need more data
+ throw InsufficientInput();
throw TexcHere("garbage instead of CRLF line terminator");
- return false; // unreachable, but make naive compilers happy
}
/// all characters except the LF line terminator
@@ -102,7 +102,7 @@ LineCharacters()
void
Http::One::Parser::cleanMimePrefix()
{
- Http1::Tokenizer tok(mimeHeaderBlock_);
+ Tokenizer tok(mimeHeaderBlock_);
while (tok.skipOne(RelaxedDelimiterCharacters())) {
(void)tok.skipAll(LineCharacters()); // optional line content
// LF terminator is required.
@@ -137,7 +137,7 @@ Http::One::Parser::cleanMimePrefix()
void
Http::One::Parser::unfoldMime()
{
- Http1::Tokenizer tok(mimeHeaderBlock_);
+ Tokenizer tok(mimeHeaderBlock_);
const auto szLimit = mimeHeaderBlock_.length();
mimeHeaderBlock_.clear();
// prevent the mime sender being able to make append() realloc/grow multiple times.
@@ -228,7 +228,7 @@ Http::One::Parser::getHostHeaderField()
debugs(25, 5, "looking for " << name);
// while we can find more LF in the SBuf
- Http1::Tokenizer tok(mimeHeaderBlock_);
+ Tokenizer tok(mimeHeaderBlock_);
SBuf p;
while (tok.prefix(p, LineCharacters())) {
@@ -250,7 +250,7 @@ Http::One::Parser::getHostHeaderField()
p.consume(namelen + 1);
// TODO: optimize SBuf::trim to take CharacterSet directly
- Http1::Tokenizer t(p);
+ Tokenizer t(p);
t.skipAll(CharacterSet::WSP);
p = t.remaining();
@@ -278,10 +278,15 @@ Http::One::ErrorLevel()
}
// BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
-bool
-Http::One::ParseBws(Tokenizer &tok)
+void
+Http::One::ParseBws(Parser::Tokenizer &tok)
{
- if (const auto count = tok.skipAll(Parser::WhitespaceCharacters())) {
+ const auto count = tok.skipAll(Parser::WhitespaceCharacters());
+
+ if (tok.atEnd())
+ throw InsufficientInput(); // even if count is positive
+
+ if (count) {
// Generating BWS is a MUST-level violation so warn about it as needed.
debugs(33, ErrorLevel(), "found " << count << " BWS octets");
// RFC 7230 says we MUST parse BWS, so we fall through even if
@@ -289,6 +294,6 @@ Http::One::ParseBws(Tokenizer &tok)
}
// else we successfully "parsed" an empty BWS sequence
- return true;
+ // success: no more BWS characters expected
}
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h
index 58a5cae..40e281b 100644
--- a/src/http/one/Parser.h
+++ b/src/http/one/Parser.h
@@ -12,6 +12,7 @@
#include "anyp/ProtocolVersion.h"
#include "http/one/forward.h"
#include "http/StatusCode.h"
+#include "parser/forward.h"
#include "sbuf/SBuf.h"
namespace Http {
@@ -40,6 +41,7 @@ class Parser : public RefCountable
{
public:
typedef SBuf::size_type size_type;
+ typedef ::Parser::Tokenizer Tokenizer;
Parser() : parseStatusCode(Http::scNone), parsingStage_(HTTP_PARSE_NONE), hackExpectsMime_(false) {}
virtual ~Parser() {}
@@ -118,11 +120,11 @@ protected:
* detect and skip the CRLF or (if tolerant) LF line terminator
* consume from the tokenizer.
*
- * throws if non-terminator is detected.
+ * \throws exception on bad or InsuffientInput.
* \retval true only if line terminator found.
* \retval false incomplete or missing line terminator, need more data.
*/
- bool skipLineTerminator(Http1::Tokenizer &tok) const;
+ void skipLineTerminator(Tokenizer &) const;
/**
* Scan to find the mime headers block for current message.
@@ -159,8 +161,8 @@ private:
};
/// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
-/// \returns true (always; unlike all the skip*() functions)
-bool ParseBws(Tokenizer &tok);
+/// \throws InsufficientInput when the end of BWS cannot be confirmed
+void ParseBws(Parser::Tokenizer &);
/// the right debugs() level for logging HTTP violation messages
int ErrorLevel();
diff --git a/src/http/one/RequestParser.cc b/src/http/one/RequestParser.cc
index a325f7d..0f13c92 100644
--- a/src/http/one/RequestParser.cc
+++ b/src/http/one/RequestParser.cc
@@ -9,8 +9,8 @@
#include "squid.h"
#include "Debug.h"
#include "http/one/RequestParser.h"
-#include "http/one/Tokenizer.h"
#include "http/ProtocolVersion.h"
+#include "parser/Tokenizer.h"
#include "profiler/Profiler.h"
#include "SquidConfig.h"
@@ -64,7 +64,7 @@ Http::One::RequestParser::skipGarbageLines()
* RFC 7230 section 2.6, 3.1 and 3.5
*/
bool
-Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
+Http::One::RequestParser::parseMethodField(Tokenizer &tok)
{
// method field is a sequence of TCHAR.
// Limit to 32 characters to prevent overly long sequences of non-HTTP
@@ -145,7 +145,7 @@ Http::One::RequestParser::RequestTargetCharacters()
}
bool
-Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
+Http::One::RequestParser::parseUriField(Tokenizer &tok)
{
/* Arbitrary 64KB URI upper length limit.
*
@@ -178,7 +178,7 @@ Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
}
bool
-Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
+Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok)
{
static const SBuf http1p0("HTTP/1.0");
static const SBuf http1p1("HTTP/1.1");
@@ -253,7 +253,7 @@ Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
/// Parse CRs at the end of request-line, just before the terminating LF.
bool
-Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
+Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok)
{
if (Config.onoff.relaxed_header_parser) {
(void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
@@ -289,12 +289,12 @@ Http::One::RequestParser::parseRequestFirstLine()
// Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
// Now, the request line has to end at the first LF.
static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
- ::Parser::Tokenizer lineTok(buf_);
+ Tokenizer lineTok(buf_);
if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
if (buf_.length() >= Config.maxRequestHeaderSize) {
/* who should we blame for our failure to parse this line? */
- Http1::Tokenizer methodTok(buf_);
+ Tokenizer methodTok(buf_);
if (!parseMethodField(methodTok))
return -1; // blame a bad method (or its delimiter)
@@ -308,7 +308,7 @@ Http::One::RequestParser::parseRequestFirstLine()
return 0;
}
- Http1::Tokenizer tok(line);
+ Tokenizer tok(line);
if (!parseMethodField(tok))
return -1;
diff --git a/src/http/one/RequestParser.h b/src/http/one/RequestParser.h
index 7086548..26697cd 100644
--- a/src/http/one/RequestParser.h
+++ b/src/http/one/RequestParser.h
@@ -54,11 +54,11 @@ private:
bool doParse(const SBuf &aBuf);
/* all these return false and set parseStatusCode on parsing failures */
- bool parseMethodField(Http1::Tokenizer &);
- bool parseUriField(Http1::Tokenizer &);
- bool parseHttpVersionField(Http1::Tokenizer &);
+ bool parseMethodField(Tokenizer &);
+ bool parseUriField(Tokenizer &);
+ bool parseHttpVersionField(Tokenizer &);
bool skipDelimiter(const size_t count, const char *where);
- bool skipTrailingCrs(Http1::Tokenizer &tok);
+ bool skipTrailingCrs(Tokenizer &tok);
bool http0() const {return !msgProtocol_.major;}
static const CharacterSet &RequestTargetCharacters();
diff --git a/src/http/one/ResponseParser.cc b/src/http/one/ResponseParser.cc
index 24af849..65baf09 100644
--- a/src/http/one/ResponseParser.cc
+++ b/src/http/one/ResponseParser.cc
@@ -9,8 +9,8 @@
#include "squid.h"
#include "Debug.h"
#include "http/one/ResponseParser.h"
-#include "http/one/Tokenizer.h"
#include "http/ProtocolVersion.h"
+#include "parser/Tokenizer.h"
#include "profiler/Profiler.h"
#include "SquidConfig.h"
@@ -47,7 +47,7 @@ Http::One::ResponseParser::firstLineSize() const
// NP: we found the protocol version and consumed it already.
// just need the status code and reason phrase
int
-Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, const CharacterSet &WspDelim)
+Http::One::ResponseParser::parseResponseStatusAndReason(Tokenizer &tok, const CharacterSet &WspDelim)
{
if (!completedStatus_) {
debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
@@ -87,14 +87,13 @@ Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, c
static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
(void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
try {
- if (skipLineTerminator(tok)) {
- debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
- buf_ = tok.remaining(); // resume checkpoint
- return 1;
- }
+ skipLineTerminator(tok);
+ buf_ = tok.remaining(); // resume checkpoint
+ debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length()));
+ return 1;
+ } catch (const InsufficientInput &) {
reasonPhrase_.clear();
return 0; // need more to be sure we have it all
-
} catch (const std::exception &ex) {
debugs(74, 6, "invalid status-line: " << ex.what());
}
@@ -119,7 +118,7 @@ Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, c
int
Http::One::ResponseParser::parseResponseFirstLine()
{
- Http1::Tokenizer tok(buf_);
+ Tokenizer tok(buf_);
const CharacterSet &WspDelim = DelimiterCharacters();
diff --git a/src/http/one/ResponseParser.h b/src/http/one/ResponseParser.h
index 15db4a0..cf13b4d 100644
--- a/src/http/one/ResponseParser.h
+++ b/src/http/one/ResponseParser.h
@@ -43,7 +43,7 @@ public:
private:
int parseResponseFirstLine();
- int parseResponseStatusAndReason(Http1::Tokenizer&, const CharacterSet &);
+ int parseResponseStatusAndReason(Tokenizer&, const CharacterSet &);
/// magic prefix for identifying ICY response messages
static const SBuf IcyMagic;
diff --git a/src/http/one/TeChunkedParser.cc b/src/http/one/TeChunkedParser.cc
index 754086e..6d2f8ea 100644
--- a/src/http/one/TeChunkedParser.cc
+++ b/src/http/one/TeChunkedParser.cc
@@ -13,10 +13,13 @@
#include "http/one/Tokenizer.h"
#include "http/ProtocolVersion.h"
#include "MemBuf.h"
+#include "parser/Tokenizer.h"
#include "Parsing.h"
+#include "sbuf/Stream.h"
#include "SquidConfig.h"
-Http::One::TeChunkedParser::TeChunkedParser()
+Http::One::TeChunkedParser::TeChunkedParser():
+ customExtensionValueParser(nullptr)
{
// chunked encoding only exists in HTTP/1.1
Http1::Parser::msgProtocol_ = Http::ProtocolVersion(1,1);
@@ -31,7 +34,11 @@ Http::One::TeChunkedParser::clear()
buf_.clear();
theChunkSize = theLeftBodySize = 0;
theOut = NULL;
- useOriginBody = -1;
+ // XXX: We do not reset customExtensionValueParser here. Based on the
+ // clear() API description, we must, but it makes little sense and could
+ // break method callers if they appear because some of them may forget to
+ // reset customExtensionValueParser. TODO: Remove Http1::Parser as our
+ // parent class and this unnecessary method with it.
}
bool
@@ -49,14 +56,14 @@ Http::One::TeChunkedParser::parse(const SBuf &aBuf)
if (parsingStage_ == Http1::HTTP_PARSE_NONE)
parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
- Http1::Tokenizer tok(buf_);
+ Tokenizer tok(buf_);
// loop for as many chunks as we can
// use do-while instead of while so that we can incrementally
// restart in the middle of a chunk/frame
do {
- if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkExtension(tok, theChunkSize))
+ if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkMetadataSuffix(tok))
return false;
if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok))
@@ -80,7 +87,7 @@ Http::One::TeChunkedParser::needsMoreSpace() const
/// RFC 7230 section 4.1 chunk-size
bool
-Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok)
+Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok)
{
Must(theChunkSize <= 0); // Should(), really
@@ -104,66 +111,75 @@ Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok)
return false; // should not be reachable
}
-/**
- * Parses chunk metadata suffix, looking for interesting extensions and/or
- * getting to the line terminator. RFC 7230 section 4.1.1 and its Errata #4667:
- *
- * chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
- * chunk-ext-name = token
- * chunk-ext-val = token / quoted-string
- *
- * ICAP 'use-original-body=N' extension is supported.
- */
+/// Parses "[chunk-ext] CRLF" from RFC 7230 section 4.1.1:
+/// chunk = chunk-size [ chunk-ext ] CRLF chunk-data CRLF
+/// last-chunk = 1*"0" [ chunk-ext ] CRLF
bool
-Http::One::TeChunkedParser::parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown)
+Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
{
- SBuf ext;
- SBuf value;
- while (
- ParseBws(tok) && // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
- tok.skip(';') &&
- ParseBws(tok) && // Bug 4492: ICAP servers send SP before chunk-ext-name
- tok.prefix(ext, CharacterSet::TCHAR)) { // chunk-ext-name
-
- // whole value part is optional. if no '=' expect next chunk-ext
- if (ParseBws(tok) && tok.skip('=') && ParseBws(tok)) {
-
- if (!skipKnown) {
- if (ext.cmp("use-original-body",17) == 0 && tok.int64(useOriginBody, 10)) {
- debugs(94, 3, "Found chunk extension " << ext << "=" << useOriginBody);
- buf_ = tok.remaining(); // parse checkpoint
- continue;
- }
- }
-
- debugs(94, 5, "skipping unknown chunk extension " << ext);
-
- // unknown might have a value token or quoted-string
- if (tok.quotedStringOrToken(value) && !tok.atEnd()) {
- buf_ = tok.remaining(); // parse checkpoint
- continue;
- }
-
- // otherwise need more data OR corrupt syntax
- break;
- }
-
- if (!tok.atEnd())
- buf_ = tok.remaining(); // parse checkpoint (unless there might be more token name)
- }
-
- if (skipLineTerminator(tok)) {
- buf_ = tok.remaining(); // checkpoint
- // non-0 chunk means data, 0-size means optional Trailer follows
+ // Code becomes much simpler when incremental parsing functions throw on
+ // bad or insufficient input, like in the code below. TODO: Expand up.
+ try {
+ parseChunkExtensions(tok); // a possibly empty chunk-ext list
+ skipLineTerminator(tok);
+ buf_ = tok.remaining();
parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
return true;
+ } catch (const InsufficientInput &) {
+ tok.reset(buf_); // backtrack to the last commit point
+ return false;
}
+ // other exceptions bubble up to kill message parsing
+}
- return false;
+/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667):
+/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+void
+Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
+{
+ do {
+ ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
+
+ if (!tok.skip(';'))
+ return; // reached the end of extensions (if any)
+
+ parseOneChunkExtension(tok);
+ buf_ = tok.remaining(); // got one extension
+ } while (true);
+}
+
+void
+Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName)
+{
+ const auto ignoredValue = tokenOrQuotedString(tok);
+ debugs(94, 5, extName << " with value " << ignoredValue);
+}
+
+/// Parses a single chunk-ext list element:
+/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+void
+Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
+{
+ ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name
+
+ const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
+
+ ParseBws(tok);
+
+ if (!tok.skip('='))
+ return; // parsed a valueless chunk-ext
+
+ ParseBws(tok);
+
+ // optimization: the only currently supported extension needs last-chunk
+ if (!theChunkSize && customExtensionValueParser)
+ customExtensionValueParser->parse(tok, extName);
+ else
+ ChunkExtensionValueParser::Ignore(tok, extName);
}
bool
-Http::One::TeChunkedParser::parseChunkBody(Http1::Tokenizer &tok)
+Http::One::TeChunkedParser::parseChunkBody(Tokenizer &tok)
{
if (theLeftBodySize > 0) {
buf_ = tok.remaining(); // sync buffers before buf_ use
@@ -188,17 +204,20 @@ Http::One::TeChunkedParser::parseChunkBody(Http1::Tokenizer &tok)
}
bool
-Http::One::TeChunkedParser::parseChunkEnd(Http1::Tokenizer &tok)
+Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok)
{
Must(theLeftBodySize == 0); // Should(), really
- if (skipLineTerminator(tok)) {
+ try {
+ skipLineTerminator(tok);
buf_ = tok.remaining(); // parse checkpoint
theChunkSize = 0; // done with the current chunk
parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
return true;
}
-
- return false;
+ catch (const InsufficientInput &) {
+ return false;
+ }
+ // other exceptions bubble up to kill message parsing
}
diff --git a/src/http/one/TeChunkedParser.h b/src/http/one/TeChunkedParser.h
index 1b0319e..2ca8988 100644
--- a/src/http/one/TeChunkedParser.h
+++ b/src/http/one/TeChunkedParser.h
@@ -18,6 +18,26 @@ namespace Http
namespace One
{
+using ::Parser::InsufficientInput;
+
+// TODO: Move this class into http/one/ChunkExtensionValueParser.*
+/// A customizable parser of a single chunk extension value (chunk-ext-val).
+/// From RFC 7230 section 4.1.1 and its Errata #4667:
+/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
+/// chunk-ext-name = token
+/// chunk-ext-val = token / quoted-string
+class ChunkExtensionValueParser
+{
+public:
+ typedef ::Parser::Tokenizer Tokenizer;
+
+ /// extracts and ignores the value of a named extension
+ static void Ignore(Tokenizer &tok, const SBuf &extName);
+
+ /// extracts and then interprets (or ignores) the extension value
+ virtual void parse(Tokenizer &tok, const SBuf &extName) = 0;
+};
+
/**
* An incremental parser for chunked transfer coding
* defined in RFC 7230 section 4.1.
@@ -25,7 +45,7 @@ namespace One
*
* The parser shovels content bytes from the raw
* input buffer into the content output buffer, both caller-supplied.
- * Ignores chunk extensions except for ICAP's ieof.
+ * Chunk extensions like use-original-body are handled via parseExtensionValuesWith().
* Trailers are available via mimeHeader() if wanted.
*/
class TeChunkedParser : public Http1::Parser
@@ -37,6 +57,10 @@ public:
/// set the buffer to be used to store decoded chunk data
void setPayloadBuffer(MemBuf *parsedContent) {theOut = parsedContent;}
+ /// Instead of ignoring all chunk extension values, give the supplied
+ /// parser a chance to handle them. Only applied to last-chunk (for now).
+ void parseExtensionValuesWith(ChunkExtensionValueParser *parser) { customExtensionValueParser = parser; }
+
bool needsMoreSpace() const;
/* Http1::Parser API */
@@ -45,17 +69,20 @@ public:
virtual Parser::size_type firstLineSize() const {return 0;} // has no meaning with multiple chunks
private:
- bool parseChunkSize(Http1::Tokenizer &tok);
- bool parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown);
- bool parseChunkBody(Http1::Tokenizer &tok);
- bool parseChunkEnd(Http1::Tokenizer &tok);
+ bool parseChunkSize(Tokenizer &tok);
+ bool parseChunkMetadataSuffix(Tokenizer &);
+ void parseChunkExtensions(Tokenizer &);
+ void parseOneChunkExtension(Tokenizer &);
+ bool parseChunkBody(Tokenizer &tok);
+ bool parseChunkEnd(Tokenizer &tok);
MemBuf *theOut;
uint64_t theChunkSize;
uint64_t theLeftBodySize;
-public:
- int64_t useOriginBody;
+ /// An optional plugin for parsing and interpreting custom chunk-ext-val.
+ /// This "visitor" object is owned by our creator.
+ ChunkExtensionValueParser *customExtensionValueParser;
};
} // namespace One
diff --git a/src/http/one/Tokenizer.cc b/src/http/one/Tokenizer.cc
index 804b8e1..3a6bef3 100644
--- a/src/http/one/Tokenizer.cc
+++ b/src/http/one/Tokenizer.cc
@@ -8,35 +8,18 @@
#include "squid.h"
#include "Debug.h"
+#include "http/one/Parser.h"
#include "http/one/Tokenizer.h"
-
-bool
-Http::One::Tokenizer::quotedString(SBuf &returnedToken, const bool http1p0)
-{
- checkpoint();
-
- if (!skip('"'))
- return false;
-
- return qdText(returnedToken, http1p0);
-}
-
-bool
-Http::One::Tokenizer::quotedStringOrToken(SBuf &returnedToken, const bool http1p0)
+#include "parser/Tokenizer.h"
+#include "sbuf/Stream.h"
+
+/// Extracts quoted-string after the caller removes the initial '"'.
+/// \param http1p0 whether to prohibit \-escaped characters in quoted strings
+/// \throws InsufficientInput when input can be a token _prefix_
+/// \returns extracted quoted string (without quotes and with chars unescaped)
+static SBuf
+parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0)
{
- checkpoint();
-
- if (!skip('"'))
- return prefix(returnedToken, CharacterSet::TCHAR);
-
- return qdText(returnedToken, http1p0);
-}
-
-bool
-Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
-{
- // the initial DQUOTE has been skipped by the caller
-
/*
* RFC 1945 - defines qdtext:
* inclusive of LWS (which includes CR and LF)
@@ -61,12 +44,17 @@ Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
// best we can do is a conditional reference since http1p0 value may change per-client
const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
- for (;;) {
- SBuf::size_type prefixLen = buf().findFirstNotOf(tokenChars);
- returnedToken.append(consume(prefixLen));
+ SBuf parsedToken;
+
+ while (!tok.atEnd()) {
+ SBuf qdText;
+ if (tok.prefix(qdText, tokenChars))
+ parsedToken.append(qdText);
+
+ if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
+ if (tok.atEnd())
+ break;
- // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
- if (!http1p0 && skip('\\')) {
/* RFC 7230 section 3.2.6
*
* The backslash octet ("\") can be used as a single-octet quoting
@@ -78,32 +66,42 @@ Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
*/
static const CharacterSet qPairChars = CharacterSet::HTAB + CharacterSet::SP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
SBuf escaped;
- if (!prefix(escaped, qPairChars, 1)) {
- returnedToken.clear();
- restoreLastCheckpoint();
- return false;
- }
- returnedToken.append(escaped);
+ if (!tok.prefix(escaped, qPairChars, 1))
+ throw TexcHere("invalid escaped character in quoted-pair");
+
+ parsedToken.append(escaped);
continue;
+ }
- } else if (skip('"')) {
- break; // done
+ if (tok.skip('"'))
+ return parsedToken; // may be empty
- } else if (atEnd()) {
- // need more data
- returnedToken.clear();
- restoreLastCheckpoint();
- return false;
- }
+ if (tok.atEnd())
+ break;
- // else, we have an error
- debugs(24, 8, "invalid bytes for set " << tokenChars.name);
- returnedToken.clear();
- restoreLastCheckpoint();
- return false;
+ throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name));
}
- // found the whole string
- return true;
+ throw Http::One::InsufficientInput();
+}
+
+SBuf
+Http::One::tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0)
+{
+ if (tok.skip('"'))
+ return parseQuotedStringSuffix(tok, http1p0);
+
+ if (tok.atEnd())
+ throw InsufficientInput();
+
+ SBuf parsedToken;
+ if (!tok.prefix(parsedToken, CharacterSet::TCHAR))
+ throw TexcHere("invalid input while expecting an HTTP token");
+
+ if (tok.atEnd())
+ throw InsufficientInput();
+
+ // got the complete token
+ return parsedToken;
}
diff --git a/src/http/one/Tokenizer.h b/src/http/one/Tokenizer.h
index 658875f..2d40574 100644
--- a/src/http/one/Tokenizer.h
+++ b/src/http/one/Tokenizer.h
@@ -9,68 +9,47 @@
#ifndef SQUID_SRC_HTTP_ONE_TOKENIZER_H
#define SQUID_SRC_HTTP_ONE_TOKENIZER_H
-#include "parser/Tokenizer.h"
+#include "parser/forward.h"
+#include "sbuf/forward.h"
namespace Http {
namespace One {
/**
- * Lexical processor extended to tokenize HTTP/1.x syntax.
+ * Extracts either an HTTP/1 token or quoted-string while dealing with
+ * possibly incomplete input typical for incremental text parsers.
+ * Unescapes escaped characters in HTTP/1.1 quoted strings.
*
- * \see ::Parser::Tokenizer for more detail
+ * \param http1p0 whether to prohibit \-escaped characters in quoted strings
+ * \throws InsufficientInput as appropriate, including on unterminated tokens
+ * \returns extracted token or quoted string (without quotes)
+ *
+ * Governed by:
+ * - RFC 1945 section 2.1
+ * "
+ * A string of text is parsed as a single word if it is quoted using
+ * double-quote marks.
+ *
+ * quoted-string = ( <"> *(qdtext) <"> )
+ *
+ * qdtext = <any CHAR except <"> and CTLs,
+ * but including LWS>
+ *
+ * Single-character quoting using the backslash ("\") character is not
+ * permitted in HTTP/1.0.
+ * "
+ *
+ * - RFC 7230 section 3.2.6
+ * "
+ * A string of text is parsed as a single value if it is quoted using
+ * double-quote marks.
+ *
+ * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
+ * qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
+ * obs-text = %x80-FF
+ * "
*/
-class Tokenizer : public ::Parser::Tokenizer
-{
-public:
- Tokenizer(SBuf &s) : ::Parser::Tokenizer(s), savedStats_(0) {}
-
- /**
- * Attempt to parse a quoted-string lexical construct.
- *
- * Governed by:
- * - RFC 1945 section 2.1
- * "
- * A string of text is parsed as a single word if it is quoted using
- * double-quote marks.
- *
- * quoted-string = ( <"> *(qdtext) <"> )
- *
- * qdtext = <any CHAR except <"> and CTLs,
- * but including LWS>
- *
- * Single-character quoting using the backslash ("\") character is not
- * permitted in HTTP/1.0.
- * "
- *
- * - RFC 7230 section 3.2.6
- * "
- * A string of text is parsed as a single value if it is quoted using
- * double-quote marks.
- *
- * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
- * qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
- * obs-text = %x80-FF
- * "
- *
- * \param escaped HTTP/1.0 does not permit \-escaped characters
- */
- bool quotedString(SBuf &value, const bool http1p0 = false);
-
- /**
- * Attempt to parse a (token / quoted-string ) lexical construct.
- */
- bool quotedStringOrToken(SBuf &value, const bool http1p0 = false);
-
-private:
- /// parse the internal component of a quote-string, and terminal DQUOTE
- bool qdText(SBuf &value, const bool http1p0);
-
- void checkpoint() { savedCheckpoint_ = buf(); savedStats_ = parsedSize(); }
- void restoreLastCheckpoint() { undoParse(savedCheckpoint_, savedStats_); }
-
- SBuf savedCheckpoint_;
- SBuf::size_type savedStats_;
-};
+SBuf tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0 = false);
} // namespace One
} // namespace Http
diff --git a/src/http/one/forward.h b/src/http/one/forward.h
index c90dc34..2b4ad28 100644
--- a/src/http/one/forward.h
+++ b/src/http/one/forward.h
@@ -10,6 +10,7 @@
#define SQUID_SRC_HTTP_ONE_FORWARD_H
#include "base/RefCount.h"
+#include "parser/forward.h"
#include "sbuf/forward.h"
namespace Http {
@@ -31,6 +32,8 @@ typedef RefCount<Http::One::ResponseParser> ResponseParserPointer;
/// CRLF textual representation
const SBuf &CrLf();
+using ::Parser::InsufficientInput;
+
} // namespace One
} // namespace Http
diff --git a/src/parser/BinaryTokenizer.h b/src/parser/BinaryTokenizer.h
index acebd4d..24042d4 100644
--- a/src/parser/BinaryTokenizer.h
+++ b/src/parser/BinaryTokenizer.h
@@ -9,6 +9,7 @@
#ifndef SQUID_SRC_PARSER_BINARYTOKENIZER_H
#define SQUID_SRC_PARSER_BINARYTOKENIZER_H
+#include "parser/forward.h"
#include "sbuf/SBuf.h"
namespace Parser
@@ -44,7 +45,7 @@ public:
class BinaryTokenizer
{
public:
- class InsufficientInput {}; // thrown when a method runs out of data
+ typedef ::Parser::InsufficientInput InsufficientInput;
typedef uint64_t size_type; // enough for the largest supported offset
BinaryTokenizer();
diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am
index af2b759..0daa5a8 100644
--- a/src/parser/Makefile.am
+++ b/src/parser/Makefile.am
@@ -13,6 +13,7 @@ noinst_LTLIBRARIES = libparser.la
libparser_la_SOURCES = \
BinaryTokenizer.h \
BinaryTokenizer.cc \
+ forward.h \
Tokenizer.h \
Tokenizer.cc
diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc
index 7e73e04..68f4aec 100644
--- a/src/parser/Tokenizer.cc
+++ b/src/parser/Tokenizer.cc
@@ -10,7 +10,9 @@
#include "squid.h"
#include "Debug.h"
+#include "parser/forward.h"
#include "parser/Tokenizer.h"
+#include "sbuf/Stream.h"
#include <cerrno>
#if HAVE_CTYPE_H
@@ -96,6 +98,23 @@ Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, c
return true;
}
+SBuf
+Parser::Tokenizer::prefix(const char *description, const CharacterSet &tokenChars, const SBuf::size_type limit)
+{
+ if (atEnd())
+ throw InsufficientInput();
+
+ SBuf result;
+
+ if (!prefix(result, tokenChars, limit))
+ throw TexcHere(ToSBuf("cannot parse ", description));
+
+ if (atEnd())
+ throw InsufficientInput();
+
+ return result;
+}
+
bool
Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
{
@@ -283,3 +302,24 @@ Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf:
return success(s - range.rawContent());
}
+int64_t
+Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit)
+{
+ if (atEnd())
+ throw InsufficientInput();
+
+ int64_t result = 0;
+
+ // Since we only support unsigned decimals, a parsing failure with a
+ // non-empty input always implies invalid/malformed input (or a buggy
+ // limit=0 caller). TODO: Support signed and non-decimal integers by
+ // refactoring int64() to detect insufficient input.
+ if (!int64(result, 10, false, limit))
+ throw TexcHere(ToSBuf("cannot parse ", description));
+
+ if (atEnd())
+ throw InsufficientInput(); // more digits may be coming
+
+ return result;
+}
+
diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h
index 54414be..03a8388 100644
--- a/src/parser/Tokenizer.h
+++ b/src/parser/Tokenizer.h
@@ -143,6 +143,19 @@ public:
*/
bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos);
+ /*
+ * The methods below mimic their counterparts documented above, but they
+ * throw on errors, including InsufficientInput. The field description
+ * parameter is used for error reporting and debugging.
+ */
+
+ /// prefix() wrapper but throws InsufficientInput if input contains
+ /// nothing but the prefix (i.e. if the prefix is not "terminated")
+ SBuf prefix(const char *description, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos);
+
+ /// int64() wrapper but limited to unsigned decimal integers (for now)
+ int64_t udec64(const char *description, SBuf::size_type limit = SBuf::npos);
+
protected:
SBuf consume(const SBuf::size_type n);
SBuf::size_type success(const SBuf::size_type n);
diff --git a/src/parser/forward.h b/src/parser/forward.h
new file mode 100644
index 0000000..5a95b7a
--- /dev/null
+++ b/src/parser/forward.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 1996-2019 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_PARSER_FORWARD_H
+#define SQUID_PARSER_FORWARD_H
+
+namespace Parser {
+class Tokenizer;
+class BinaryTokenizer;
+
+// TODO: Move this declaration (to parser/Elements.h) if we need more like it.
+/// thrown by modern "incremental" parsers when they need more data
+class InsufficientInput {};
+} // namespace Parser
+
+#endif /* SQUID_PARSER_FORWARD_H */
+
diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc
index affe0b1..05591fe 100644
--- a/src/http/one/Parser.cc
+++ b/src/http/one/Parser.cc
@@ -65,16 +65,10 @@ Http::One::Parser::DelimiterCharacters()
void
Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
{
- if (tok.skip(Http1::CrLf()))
- return;
-
if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
return;
- if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
- throw InsufficientInput();
-
- throw TexcHere("garbage instead of CRLF line terminator");
+ tok.skipRequired("line-terminating CRLF", Http1::CrLf());
}
/// all characters except the LF line terminator
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h
index 40e281b..9a2a4ad 100644
--- a/src/http/one/Parser.h
+++ b/src/http/one/Parser.h
@@ -120,9 +120,7 @@ protected:
* detect and skip the CRLF or (if tolerant) LF line terminator
* consume from the tokenizer.
*
- * \throws exception on bad or InsuffientInput.
- * \retval true only if line terminator found.
- * \retval false incomplete or missing line terminator, need more data.
+ * \throws exception on bad or InsufficientInput
*/
void skipLineTerminator(Tokenizer &) const;
diff --git a/src/http/one/TeChunkedParser.cc b/src/http/one/TeChunkedParser.cc
index 6d2f8ea..3bff6c7 100644
--- a/src/http/one/TeChunkedParser.cc
+++ b/src/http/one/TeChunkedParser.cc
@@ -91,6 +91,11 @@ Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok)
{
Must(theChunkSize <= 0); // Should(), really
+ static const SBuf bannedHexPrefixLower("0x");
+ static const SBuf bannedHexPrefixUpper("0X");
+ if (tok.skip(bannedHexPrefixLower) || tok.skip(bannedHexPrefixUpper))
+ throw TextException("chunk starts with 0x", Here());
+
int64_t size = -1;
if (tok.int64(size, 16, false) && !tok.atEnd()) {
if (size < 0)
@@ -121,7 +126,7 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
// bad or insufficient input, like in the code below. TODO: Expand up.
try {
parseChunkExtensions(tok); // a possibly empty chunk-ext list
- skipLineTerminator(tok);
+ tok.skipRequired("CRLF after [chunk-ext]", Http1::CrLf());
buf_ = tok.remaining();
parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
return true;
@@ -132,12 +137,14 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
// other exceptions bubble up to kill message parsing
}
-/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667):
+/// Parses the chunk-ext list (RFC 9112 section 7.1.1:
/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
void
-Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
+Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &callerTok)
{
do {
+ auto tok = callerTok;
+
ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
if (!tok.skip(';'))
@@ -145,6 +152,7 @@ Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
parseOneChunkExtension(tok);
buf_ = tok.remaining(); // got one extension
+ callerTok = tok;
} while (true);
}
@@ -158,11 +166,14 @@ Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName
/// Parses a single chunk-ext list element:
/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
void
-Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
+Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &callerTok)
{
+ auto tok = callerTok;
+
ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name
const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
+ callerTok = tok; // in case we determine that this is a valueless chunk-ext
ParseBws(tok);
@@ -176,6 +187,8 @@ Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
customExtensionValueParser->parse(tok, extName);
else
ChunkExtensionValueParser::Ignore(tok, extName);
+
+ callerTok = tok;
}
bool
@@ -209,7 +222,7 @@ Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok)
Must(theLeftBodySize == 0); // Should(), really
try {
- skipLineTerminator(tok);
+ tok.skipRequired("chunk CRLF", Http1::CrLf());
buf_ = tok.remaining(); // parse checkpoint
theChunkSize = 0; // done with the current chunk
parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc
index 68f4aec..8516869 100644
--- a/src/parser/Tokenizer.cc
+++ b/src/parser/Tokenizer.cc
@@ -147,6 +147,18 @@ Parser::Tokenizer::skipAll(const CharacterSet &tokenChars)
return success(prefixLen);
}
+void
+Parser::Tokenizer::skipRequired(const char *description, const SBuf &tokenToSkip)
+{
+ if (skip(tokenToSkip) || tokenToSkip.isEmpty())
+ return;
+
+ if (tokenToSkip.startsWith(buf_))
+ throw InsufficientInput();
+
+ throw TextException(ToSBuf("cannot skip ", description), Here());
+}
+
bool
Parser::Tokenizer::skipOne(const CharacterSet &chars)
{
diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h
index 03a8388..78ab9e7 100644
--- a/src/parser/Tokenizer.h
+++ b/src/parser/Tokenizer.h
@@ -115,6 +115,13 @@ public:
*/
SBuf::size_type skipAll(const CharacterSet &discardables);
+ /** skips a given character sequence (string);
+ * does nothing if the sequence is empty
+ *
+ * \throws exception on mismatching prefix or InsufficientInput
+ */
+ void skipRequired(const char *description, const SBuf &tokenToSkip);
+
/** Removes a single trailing character from the set.
*
* \return whether a character was removed