1282 lines
44 KiB
Diff
1282 lines
44 KiB
Diff
|
diff --git a/src/adaptation/icap/ModXact.cc b/src/adaptation/icap/ModXact.cc
|
||
|
index 2db0a68..370f077 100644
|
||
|
--- a/src/adaptation/icap/ModXact.cc
|
||
|
+++ b/src/adaptation/icap/ModXact.cc
|
||
|
@@ -25,12 +25,13 @@
|
||
|
#include "comm.h"
|
||
|
#include "comm/Connection.h"
|
||
|
#include "err_detail_type.h"
|
||
|
-#include "http/one/TeChunkedParser.h"
|
||
|
#include "HttpHeaderTools.h"
|
||
|
#include "HttpMsg.h"
|
||
|
#include "HttpReply.h"
|
||
|
#include "HttpRequest.h"
|
||
|
#include "MasterXaction.h"
|
||
|
+#include "parser/Tokenizer.h"
|
||
|
+#include "sbuf/Stream.h"
|
||
|
#include "SquidTime.h"
|
||
|
|
||
|
// flow and terminology:
|
||
|
@@ -44,6 +45,8 @@ CBDATA_NAMESPACED_CLASS_INIT(Adaptation::Icap, ModXactLauncher);
|
||
|
|
||
|
static const size_t TheBackupLimit = BodyPipe::MaxCapacity;
|
||
|
|
||
|
+const SBuf Adaptation::Icap::ChunkExtensionValueParser::UseOriginalBodyName("use-original-body");
|
||
|
+
|
||
|
Adaptation::Icap::ModXact::State::State()
|
||
|
{
|
||
|
memset(this, 0, sizeof(*this));
|
||
|
@@ -1108,6 +1111,7 @@ void Adaptation::Icap::ModXact::decideOnParsingBody()
|
||
|
state.parsing = State::psBody;
|
||
|
replyHttpBodySize = 0;
|
||
|
bodyParser = new Http1::TeChunkedParser;
|
||
|
+ bodyParser->parseExtensionValuesWith(&extensionParser);
|
||
|
makeAdaptedBodyPipe("adapted response from the ICAP server");
|
||
|
Must(state.sending == State::sendingAdapted);
|
||
|
} else {
|
||
|
@@ -1142,9 +1146,8 @@ void Adaptation::Icap::ModXact::parseBody()
|
||
|
}
|
||
|
|
||
|
if (parsed) {
|
||
|
- if (state.readyForUob && bodyParser->useOriginBody >= 0) {
|
||
|
- prepPartialBodyEchoing(
|
||
|
- static_cast<uint64_t>(bodyParser->useOriginBody));
|
||
|
+ if (state.readyForUob && extensionParser.sawUseOriginalBody()) {
|
||
|
+ prepPartialBodyEchoing(extensionParser.useOriginalBody());
|
||
|
stopParsing();
|
||
|
return;
|
||
|
}
|
||
|
@@ -2014,3 +2017,14 @@ void Adaptation::Icap::ModXactLauncher::updateHistory(bool doStart)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
+void
|
||
|
+Adaptation::Icap::ChunkExtensionValueParser::parse(Tokenizer &tok, const SBuf &extName)
|
||
|
+{
|
||
|
+ if (extName == UseOriginalBodyName) {
|
||
|
+ useOriginalBody_ = tok.udec64("use-original-body");
|
||
|
+ assert(useOriginalBody_ >= 0);
|
||
|
+ } else {
|
||
|
+ Ignore(tok, extName);
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
diff --git a/src/adaptation/icap/ModXact.h b/src/adaptation/icap/ModXact.h
|
||
|
index f7afa69..fb4dec0 100644
|
||
|
--- a/src/adaptation/icap/ModXact.h
|
||
|
+++ b/src/adaptation/icap/ModXact.h
|
||
|
@@ -15,6 +15,7 @@
|
||
|
#include "adaptation/icap/Xaction.h"
|
||
|
#include "BodyPipe.h"
|
||
|
#include "http/one/forward.h"
|
||
|
+#include "http/one/TeChunkedParser.h"
|
||
|
|
||
|
/*
|
||
|
* ICAPModXact implements ICAP REQMOD and RESPMOD transaction using
|
||
|
@@ -105,6 +106,23 @@ private:
|
||
|
enum State { stDisabled, stWriting, stIeof, stDone } theState;
|
||
|
};
|
||
|
|
||
|
+/// handles ICAP-specific chunk extensions supported by Squid
|
||
|
+class ChunkExtensionValueParser: public Http1::ChunkExtensionValueParser
|
||
|
+{
|
||
|
+public:
|
||
|
+ /* Http1::ChunkExtensionValueParser API */
|
||
|
+ virtual void parse(Tokenizer &tok, const SBuf &extName) override;
|
||
|
+
|
||
|
+ bool sawUseOriginalBody() const { return useOriginalBody_ >= 0; }
|
||
|
+ uint64_t useOriginalBody() const { assert(sawUseOriginalBody()); return static_cast<uint64_t>(useOriginalBody_); }
|
||
|
+
|
||
|
+private:
|
||
|
+ static const SBuf UseOriginalBodyName;
|
||
|
+
|
||
|
+ /// the value of the parsed use-original-body chunk extension (or -1)
|
||
|
+ int64_t useOriginalBody_ = -1;
|
||
|
+};
|
||
|
+
|
||
|
class ModXact: public Xaction, public BodyProducer, public BodyConsumer
|
||
|
{
|
||
|
CBDATA_CLASS(ModXact);
|
||
|
@@ -270,6 +288,8 @@ private:
|
||
|
|
||
|
int adaptHistoryId; ///< adaptation history slot reservation
|
||
|
|
||
|
+ ChunkExtensionValueParser extensionParser;
|
||
|
+
|
||
|
class State
|
||
|
{
|
||
|
|
||
|
diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc
|
||
|
index 0c86733..affe0b1 100644
|
||
|
--- a/src/http/one/Parser.cc
|
||
|
+++ b/src/http/one/Parser.cc
|
||
|
@@ -7,10 +7,11 @@
|
||
|
*/
|
||
|
|
||
|
#include "squid.h"
|
||
|
+#include "base/CharacterSet.h"
|
||
|
#include "Debug.h"
|
||
|
#include "http/one/Parser.h"
|
||
|
-#include "http/one/Tokenizer.h"
|
||
|
#include "mime_header.h"
|
||
|
+#include "parser/Tokenizer.h"
|
||
|
#include "SquidConfig.h"
|
||
|
|
||
|
/// RFC 7230 section 2.6 - 7 magic octets
|
||
|
@@ -61,20 +62,19 @@ Http::One::Parser::DelimiterCharacters()
|
||
|
RelaxedDelimiterCharacters() : CharacterSet::SP;
|
||
|
}
|
||
|
|
||
|
-bool
|
||
|
-Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const
|
||
|
+void
|
||
|
+Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
|
||
|
{
|
||
|
if (tok.skip(Http1::CrLf()))
|
||
|
- return true;
|
||
|
+ return;
|
||
|
|
||
|
if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
|
||
|
- return true;
|
||
|
+ return;
|
||
|
|
||
|
if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
|
||
|
- return false; // need more data
|
||
|
+ throw InsufficientInput();
|
||
|
|
||
|
throw TexcHere("garbage instead of CRLF line terminator");
|
||
|
- return false; // unreachable, but make naive compilers happy
|
||
|
}
|
||
|
|
||
|
/// all characters except the LF line terminator
|
||
|
@@ -102,7 +102,7 @@ LineCharacters()
|
||
|
void
|
||
|
Http::One::Parser::cleanMimePrefix()
|
||
|
{
|
||
|
- Http1::Tokenizer tok(mimeHeaderBlock_);
|
||
|
+ Tokenizer tok(mimeHeaderBlock_);
|
||
|
while (tok.skipOne(RelaxedDelimiterCharacters())) {
|
||
|
(void)tok.skipAll(LineCharacters()); // optional line content
|
||
|
// LF terminator is required.
|
||
|
@@ -137,7 +137,7 @@ Http::One::Parser::cleanMimePrefix()
|
||
|
void
|
||
|
Http::One::Parser::unfoldMime()
|
||
|
{
|
||
|
- Http1::Tokenizer tok(mimeHeaderBlock_);
|
||
|
+ Tokenizer tok(mimeHeaderBlock_);
|
||
|
const auto szLimit = mimeHeaderBlock_.length();
|
||
|
mimeHeaderBlock_.clear();
|
||
|
// prevent the mime sender being able to make append() realloc/grow multiple times.
|
||
|
@@ -228,7 +228,7 @@ Http::One::Parser::getHostHeaderField()
|
||
|
debugs(25, 5, "looking for " << name);
|
||
|
|
||
|
// while we can find more LF in the SBuf
|
||
|
- Http1::Tokenizer tok(mimeHeaderBlock_);
|
||
|
+ Tokenizer tok(mimeHeaderBlock_);
|
||
|
SBuf p;
|
||
|
|
||
|
while (tok.prefix(p, LineCharacters())) {
|
||
|
@@ -250,7 +250,7 @@ Http::One::Parser::getHostHeaderField()
|
||
|
p.consume(namelen + 1);
|
||
|
|
||
|
// TODO: optimize SBuf::trim to take CharacterSet directly
|
||
|
- Http1::Tokenizer t(p);
|
||
|
+ Tokenizer t(p);
|
||
|
t.skipAll(CharacterSet::WSP);
|
||
|
p = t.remaining();
|
||
|
|
||
|
@@ -278,10 +278,15 @@ Http::One::ErrorLevel()
|
||
|
}
|
||
|
|
||
|
// BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
|
||
|
-bool
|
||
|
-Http::One::ParseBws(Tokenizer &tok)
|
||
|
+void
|
||
|
+Http::One::ParseBws(Parser::Tokenizer &tok)
|
||
|
{
|
||
|
- if (const auto count = tok.skipAll(Parser::WhitespaceCharacters())) {
|
||
|
+ const auto count = tok.skipAll(Parser::WhitespaceCharacters());
|
||
|
+
|
||
|
+ if (tok.atEnd())
|
||
|
+ throw InsufficientInput(); // even if count is positive
|
||
|
+
|
||
|
+ if (count) {
|
||
|
// Generating BWS is a MUST-level violation so warn about it as needed.
|
||
|
debugs(33, ErrorLevel(), "found " << count << " BWS octets");
|
||
|
// RFC 7230 says we MUST parse BWS, so we fall through even if
|
||
|
@@ -289,6 +294,6 @@ Http::One::ParseBws(Tokenizer &tok)
|
||
|
}
|
||
|
// else we successfully "parsed" an empty BWS sequence
|
||
|
|
||
|
- return true;
|
||
|
+ // success: no more BWS characters expected
|
||
|
}
|
||
|
|
||
|
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h
|
||
|
index 58a5cae..40e281b 100644
|
||
|
--- a/src/http/one/Parser.h
|
||
|
+++ b/src/http/one/Parser.h
|
||
|
@@ -12,6 +12,7 @@
|
||
|
#include "anyp/ProtocolVersion.h"
|
||
|
#include "http/one/forward.h"
|
||
|
#include "http/StatusCode.h"
|
||
|
+#include "parser/forward.h"
|
||
|
#include "sbuf/SBuf.h"
|
||
|
|
||
|
namespace Http {
|
||
|
@@ -40,6 +41,7 @@ class Parser : public RefCountable
|
||
|
{
|
||
|
public:
|
||
|
typedef SBuf::size_type size_type;
|
||
|
+ typedef ::Parser::Tokenizer Tokenizer;
|
||
|
|
||
|
Parser() : parseStatusCode(Http::scNone), parsingStage_(HTTP_PARSE_NONE), hackExpectsMime_(false) {}
|
||
|
virtual ~Parser() {}
|
||
|
@@ -118,11 +120,11 @@ protected:
|
||
|
* detect and skip the CRLF or (if tolerant) LF line terminator
|
||
|
* consume from the tokenizer.
|
||
|
*
|
||
|
- * throws if non-terminator is detected.
|
||
|
+ * \throws exception on bad or InsuffientInput.
|
||
|
* \retval true only if line terminator found.
|
||
|
* \retval false incomplete or missing line terminator, need more data.
|
||
|
*/
|
||
|
- bool skipLineTerminator(Http1::Tokenizer &tok) const;
|
||
|
+ void skipLineTerminator(Tokenizer &) const;
|
||
|
|
||
|
/**
|
||
|
* Scan to find the mime headers block for current message.
|
||
|
@@ -159,8 +161,8 @@ private:
|
||
|
};
|
||
|
|
||
|
/// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
|
||
|
-/// \returns true (always; unlike all the skip*() functions)
|
||
|
-bool ParseBws(Tokenizer &tok);
|
||
|
+/// \throws InsufficientInput when the end of BWS cannot be confirmed
|
||
|
+void ParseBws(Parser::Tokenizer &);
|
||
|
|
||
|
/// the right debugs() level for logging HTTP violation messages
|
||
|
int ErrorLevel();
|
||
|
diff --git a/src/http/one/RequestParser.cc b/src/http/one/RequestParser.cc
|
||
|
index a325f7d..0f13c92 100644
|
||
|
--- a/src/http/one/RequestParser.cc
|
||
|
+++ b/src/http/one/RequestParser.cc
|
||
|
@@ -9,8 +9,8 @@
|
||
|
#include "squid.h"
|
||
|
#include "Debug.h"
|
||
|
#include "http/one/RequestParser.h"
|
||
|
-#include "http/one/Tokenizer.h"
|
||
|
#include "http/ProtocolVersion.h"
|
||
|
+#include "parser/Tokenizer.h"
|
||
|
#include "profiler/Profiler.h"
|
||
|
#include "SquidConfig.h"
|
||
|
|
||
|
@@ -64,7 +64,7 @@ Http::One::RequestParser::skipGarbageLines()
|
||
|
* RFC 7230 section 2.6, 3.1 and 3.5
|
||
|
*/
|
||
|
bool
|
||
|
-Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
|
||
|
+Http::One::RequestParser::parseMethodField(Tokenizer &tok)
|
||
|
{
|
||
|
// method field is a sequence of TCHAR.
|
||
|
// Limit to 32 characters to prevent overly long sequences of non-HTTP
|
||
|
@@ -145,7 +145,7 @@ Http::One::RequestParser::RequestTargetCharacters()
|
||
|
}
|
||
|
|
||
|
bool
|
||
|
-Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
|
||
|
+Http::One::RequestParser::parseUriField(Tokenizer &tok)
|
||
|
{
|
||
|
/* Arbitrary 64KB URI upper length limit.
|
||
|
*
|
||
|
@@ -178,7 +178,7 @@ Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
|
||
|
}
|
||
|
|
||
|
bool
|
||
|
-Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
|
||
|
+Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok)
|
||
|
{
|
||
|
static const SBuf http1p0("HTTP/1.0");
|
||
|
static const SBuf http1p1("HTTP/1.1");
|
||
|
@@ -253,7 +253,7 @@ Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
|
||
|
|
||
|
/// Parse CRs at the end of request-line, just before the terminating LF.
|
||
|
bool
|
||
|
-Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
|
||
|
+Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok)
|
||
|
{
|
||
|
if (Config.onoff.relaxed_header_parser) {
|
||
|
(void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
|
||
|
@@ -289,12 +289,12 @@ Http::One::RequestParser::parseRequestFirstLine()
|
||
|
// Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
|
||
|
// Now, the request line has to end at the first LF.
|
||
|
static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
|
||
|
- ::Parser::Tokenizer lineTok(buf_);
|
||
|
+ Tokenizer lineTok(buf_);
|
||
|
if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
|
||
|
if (buf_.length() >= Config.maxRequestHeaderSize) {
|
||
|
/* who should we blame for our failure to parse this line? */
|
||
|
|
||
|
- Http1::Tokenizer methodTok(buf_);
|
||
|
+ Tokenizer methodTok(buf_);
|
||
|
if (!parseMethodField(methodTok))
|
||
|
return -1; // blame a bad method (or its delimiter)
|
||
|
|
||
|
@@ -308,7 +308,7 @@ Http::One::RequestParser::parseRequestFirstLine()
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
- Http1::Tokenizer tok(line);
|
||
|
+ Tokenizer tok(line);
|
||
|
|
||
|
if (!parseMethodField(tok))
|
||
|
return -1;
|
||
|
diff --git a/src/http/one/RequestParser.h b/src/http/one/RequestParser.h
|
||
|
index 7086548..26697cd 100644
|
||
|
--- a/src/http/one/RequestParser.h
|
||
|
+++ b/src/http/one/RequestParser.h
|
||
|
@@ -54,11 +54,11 @@ private:
|
||
|
bool doParse(const SBuf &aBuf);
|
||
|
|
||
|
/* all these return false and set parseStatusCode on parsing failures */
|
||
|
- bool parseMethodField(Http1::Tokenizer &);
|
||
|
- bool parseUriField(Http1::Tokenizer &);
|
||
|
- bool parseHttpVersionField(Http1::Tokenizer &);
|
||
|
+ bool parseMethodField(Tokenizer &);
|
||
|
+ bool parseUriField(Tokenizer &);
|
||
|
+ bool parseHttpVersionField(Tokenizer &);
|
||
|
bool skipDelimiter(const size_t count, const char *where);
|
||
|
- bool skipTrailingCrs(Http1::Tokenizer &tok);
|
||
|
+ bool skipTrailingCrs(Tokenizer &tok);
|
||
|
|
||
|
bool http0() const {return !msgProtocol_.major;}
|
||
|
static const CharacterSet &RequestTargetCharacters();
|
||
|
diff --git a/src/http/one/ResponseParser.cc b/src/http/one/ResponseParser.cc
|
||
|
index 24af849..65baf09 100644
|
||
|
--- a/src/http/one/ResponseParser.cc
|
||
|
+++ b/src/http/one/ResponseParser.cc
|
||
|
@@ -9,8 +9,8 @@
|
||
|
#include "squid.h"
|
||
|
#include "Debug.h"
|
||
|
#include "http/one/ResponseParser.h"
|
||
|
-#include "http/one/Tokenizer.h"
|
||
|
#include "http/ProtocolVersion.h"
|
||
|
+#include "parser/Tokenizer.h"
|
||
|
#include "profiler/Profiler.h"
|
||
|
#include "SquidConfig.h"
|
||
|
|
||
|
@@ -47,7 +47,7 @@ Http::One::ResponseParser::firstLineSize() const
|
||
|
// NP: we found the protocol version and consumed it already.
|
||
|
// just need the status code and reason phrase
|
||
|
int
|
||
|
-Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, const CharacterSet &WspDelim)
|
||
|
+Http::One::ResponseParser::parseResponseStatusAndReason(Tokenizer &tok, const CharacterSet &WspDelim)
|
||
|
{
|
||
|
if (!completedStatus_) {
|
||
|
debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
|
||
|
@@ -87,14 +87,13 @@ Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, c
|
||
|
static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
|
||
|
(void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
|
||
|
try {
|
||
|
- if (skipLineTerminator(tok)) {
|
||
|
- debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
|
||
|
- buf_ = tok.remaining(); // resume checkpoint
|
||
|
- return 1;
|
||
|
- }
|
||
|
+ skipLineTerminator(tok);
|
||
|
+ buf_ = tok.remaining(); // resume checkpoint
|
||
|
+ debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length()));
|
||
|
+ return 1;
|
||
|
+ } catch (const InsufficientInput &) {
|
||
|
reasonPhrase_.clear();
|
||
|
return 0; // need more to be sure we have it all
|
||
|
-
|
||
|
} catch (const std::exception &ex) {
|
||
|
debugs(74, 6, "invalid status-line: " << ex.what());
|
||
|
}
|
||
|
@@ -119,7 +118,7 @@ Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, c
|
||
|
int
|
||
|
Http::One::ResponseParser::parseResponseFirstLine()
|
||
|
{
|
||
|
- Http1::Tokenizer tok(buf_);
|
||
|
+ Tokenizer tok(buf_);
|
||
|
|
||
|
const CharacterSet &WspDelim = DelimiterCharacters();
|
||
|
|
||
|
diff --git a/src/http/one/ResponseParser.h b/src/http/one/ResponseParser.h
|
||
|
index 15db4a0..cf13b4d 100644
|
||
|
--- a/src/http/one/ResponseParser.h
|
||
|
+++ b/src/http/one/ResponseParser.h
|
||
|
@@ -43,7 +43,7 @@ public:
|
||
|
|
||
|
private:
|
||
|
int parseResponseFirstLine();
|
||
|
- int parseResponseStatusAndReason(Http1::Tokenizer&, const CharacterSet &);
|
||
|
+ int parseResponseStatusAndReason(Tokenizer&, const CharacterSet &);
|
||
|
|
||
|
/// magic prefix for identifying ICY response messages
|
||
|
static const SBuf IcyMagic;
|
||
|
diff --git a/src/http/one/TeChunkedParser.cc b/src/http/one/TeChunkedParser.cc
|
||
|
index 754086e..6d2f8ea 100644
|
||
|
--- a/src/http/one/TeChunkedParser.cc
|
||
|
+++ b/src/http/one/TeChunkedParser.cc
|
||
|
@@ -13,10 +13,13 @@
|
||
|
#include "http/one/Tokenizer.h"
|
||
|
#include "http/ProtocolVersion.h"
|
||
|
#include "MemBuf.h"
|
||
|
+#include "parser/Tokenizer.h"
|
||
|
#include "Parsing.h"
|
||
|
+#include "sbuf/Stream.h"
|
||
|
#include "SquidConfig.h"
|
||
|
|
||
|
-Http::One::TeChunkedParser::TeChunkedParser()
|
||
|
+Http::One::TeChunkedParser::TeChunkedParser():
|
||
|
+ customExtensionValueParser(nullptr)
|
||
|
{
|
||
|
// chunked encoding only exists in HTTP/1.1
|
||
|
Http1::Parser::msgProtocol_ = Http::ProtocolVersion(1,1);
|
||
|
@@ -31,7 +34,11 @@ Http::One::TeChunkedParser::clear()
|
||
|
buf_.clear();
|
||
|
theChunkSize = theLeftBodySize = 0;
|
||
|
theOut = NULL;
|
||
|
- useOriginBody = -1;
|
||
|
+ // XXX: We do not reset customExtensionValueParser here. Based on the
|
||
|
+ // clear() API description, we must, but it makes little sense and could
|
||
|
+ // break method callers if they appear because some of them may forget to
|
||
|
+ // reset customExtensionValueParser. TODO: Remove Http1::Parser as our
|
||
|
+ // parent class and this unnecessary method with it.
|
||
|
}
|
||
|
|
||
|
bool
|
||
|
@@ -49,14 +56,14 @@ Http::One::TeChunkedParser::parse(const SBuf &aBuf)
|
||
|
if (parsingStage_ == Http1::HTTP_PARSE_NONE)
|
||
|
parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
|
||
|
|
||
|
- Http1::Tokenizer tok(buf_);
|
||
|
+ Tokenizer tok(buf_);
|
||
|
|
||
|
// loop for as many chunks as we can
|
||
|
// use do-while instead of while so that we can incrementally
|
||
|
// restart in the middle of a chunk/frame
|
||
|
do {
|
||
|
|
||
|
- if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkExtension(tok, theChunkSize))
|
||
|
+ if (parsingStage_ == Http1::HTTP_PARSE_CHUNK_EXT && !parseChunkMetadataSuffix(tok))
|
||
|
return false;
|
||
|
|
||
|
if (parsingStage_ == Http1::HTTP_PARSE_CHUNK && !parseChunkBody(tok))
|
||
|
@@ -80,7 +87,7 @@ Http::One::TeChunkedParser::needsMoreSpace() const
|
||
|
|
||
|
/// RFC 7230 section 4.1 chunk-size
|
||
|
bool
|
||
|
-Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok)
|
||
|
+Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok)
|
||
|
{
|
||
|
Must(theChunkSize <= 0); // Should(), really
|
||
|
|
||
|
@@ -104,66 +111,75 @@ Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok)
|
||
|
return false; // should not be reachable
|
||
|
}
|
||
|
|
||
|
-/**
|
||
|
- * Parses chunk metadata suffix, looking for interesting extensions and/or
|
||
|
- * getting to the line terminator. RFC 7230 section 4.1.1 and its Errata #4667:
|
||
|
- *
|
||
|
- * chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
|
||
|
- * chunk-ext-name = token
|
||
|
- * chunk-ext-val = token / quoted-string
|
||
|
- *
|
||
|
- * ICAP 'use-original-body=N' extension is supported.
|
||
|
- */
|
||
|
+/// Parses "[chunk-ext] CRLF" from RFC 7230 section 4.1.1:
|
||
|
+/// chunk = chunk-size [ chunk-ext ] CRLF chunk-data CRLF
|
||
|
+/// last-chunk = 1*"0" [ chunk-ext ] CRLF
|
||
|
bool
|
||
|
-Http::One::TeChunkedParser::parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown)
|
||
|
+Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
|
||
|
{
|
||
|
- SBuf ext;
|
||
|
- SBuf value;
|
||
|
- while (
|
||
|
- ParseBws(tok) && // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
|
||
|
- tok.skip(';') &&
|
||
|
- ParseBws(tok) && // Bug 4492: ICAP servers send SP before chunk-ext-name
|
||
|
- tok.prefix(ext, CharacterSet::TCHAR)) { // chunk-ext-name
|
||
|
-
|
||
|
- // whole value part is optional. if no '=' expect next chunk-ext
|
||
|
- if (ParseBws(tok) && tok.skip('=') && ParseBws(tok)) {
|
||
|
-
|
||
|
- if (!skipKnown) {
|
||
|
- if (ext.cmp("use-original-body",17) == 0 && tok.int64(useOriginBody, 10)) {
|
||
|
- debugs(94, 3, "Found chunk extension " << ext << "=" << useOriginBody);
|
||
|
- buf_ = tok.remaining(); // parse checkpoint
|
||
|
- continue;
|
||
|
- }
|
||
|
- }
|
||
|
-
|
||
|
- debugs(94, 5, "skipping unknown chunk extension " << ext);
|
||
|
-
|
||
|
- // unknown might have a value token or quoted-string
|
||
|
- if (tok.quotedStringOrToken(value) && !tok.atEnd()) {
|
||
|
- buf_ = tok.remaining(); // parse checkpoint
|
||
|
- continue;
|
||
|
- }
|
||
|
-
|
||
|
- // otherwise need more data OR corrupt syntax
|
||
|
- break;
|
||
|
- }
|
||
|
-
|
||
|
- if (!tok.atEnd())
|
||
|
- buf_ = tok.remaining(); // parse checkpoint (unless there might be more token name)
|
||
|
- }
|
||
|
-
|
||
|
- if (skipLineTerminator(tok)) {
|
||
|
- buf_ = tok.remaining(); // checkpoint
|
||
|
- // non-0 chunk means data, 0-size means optional Trailer follows
|
||
|
+ // Code becomes much simpler when incremental parsing functions throw on
|
||
|
+ // bad or insufficient input, like in the code below. TODO: Expand up.
|
||
|
+ try {
|
||
|
+ parseChunkExtensions(tok); // a possibly empty chunk-ext list
|
||
|
+ skipLineTerminator(tok);
|
||
|
+ buf_ = tok.remaining();
|
||
|
parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
|
||
|
return true;
|
||
|
+ } catch (const InsufficientInput &) {
|
||
|
+ tok.reset(buf_); // backtrack to the last commit point
|
||
|
+ return false;
|
||
|
}
|
||
|
+ // other exceptions bubble up to kill message parsing
|
||
|
+}
|
||
|
|
||
|
- return false;
|
||
|
+/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667):
|
||
|
+/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
|
||
|
+void
|
||
|
+Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
|
||
|
+{
|
||
|
+ do {
|
||
|
+ ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
|
||
|
+
|
||
|
+ if (!tok.skip(';'))
|
||
|
+ return; // reached the end of extensions (if any)
|
||
|
+
|
||
|
+ parseOneChunkExtension(tok);
|
||
|
+ buf_ = tok.remaining(); // got one extension
|
||
|
+ } while (true);
|
||
|
+}
|
||
|
+
|
||
|
+void
|
||
|
+Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName)
|
||
|
+{
|
||
|
+ const auto ignoredValue = tokenOrQuotedString(tok);
|
||
|
+ debugs(94, 5, extName << " with value " << ignoredValue);
|
||
|
+}
|
||
|
+
|
||
|
+/// Parses a single chunk-ext list element:
|
||
|
+/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
|
||
|
+void
|
||
|
+Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
|
||
|
+{
|
||
|
+ ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name
|
||
|
+
|
||
|
+ const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
|
||
|
+
|
||
|
+ ParseBws(tok);
|
||
|
+
|
||
|
+ if (!tok.skip('='))
|
||
|
+ return; // parsed a valueless chunk-ext
|
||
|
+
|
||
|
+ ParseBws(tok);
|
||
|
+
|
||
|
+ // optimization: the only currently supported extension needs last-chunk
|
||
|
+ if (!theChunkSize && customExtensionValueParser)
|
||
|
+ customExtensionValueParser->parse(tok, extName);
|
||
|
+ else
|
||
|
+ ChunkExtensionValueParser::Ignore(tok, extName);
|
||
|
}
|
||
|
|
||
|
bool
|
||
|
-Http::One::TeChunkedParser::parseChunkBody(Http1::Tokenizer &tok)
|
||
|
+Http::One::TeChunkedParser::parseChunkBody(Tokenizer &tok)
|
||
|
{
|
||
|
if (theLeftBodySize > 0) {
|
||
|
buf_ = tok.remaining(); // sync buffers before buf_ use
|
||
|
@@ -188,17 +204,20 @@ Http::One::TeChunkedParser::parseChunkBody(Http1::Tokenizer &tok)
|
||
|
}
|
||
|
|
||
|
bool
|
||
|
-Http::One::TeChunkedParser::parseChunkEnd(Http1::Tokenizer &tok)
|
||
|
+Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok)
|
||
|
{
|
||
|
Must(theLeftBodySize == 0); // Should(), really
|
||
|
|
||
|
- if (skipLineTerminator(tok)) {
|
||
|
+ try {
|
||
|
+ skipLineTerminator(tok);
|
||
|
buf_ = tok.remaining(); // parse checkpoint
|
||
|
theChunkSize = 0; // done with the current chunk
|
||
|
parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
|
||
|
return true;
|
||
|
}
|
||
|
-
|
||
|
- return false;
|
||
|
+ catch (const InsufficientInput &) {
|
||
|
+ return false;
|
||
|
+ }
|
||
|
+ // other exceptions bubble up to kill message parsing
|
||
|
}
|
||
|
|
||
|
diff --git a/src/http/one/TeChunkedParser.h b/src/http/one/TeChunkedParser.h
|
||
|
index 1b0319e..2ca8988 100644
|
||
|
--- a/src/http/one/TeChunkedParser.h
|
||
|
+++ b/src/http/one/TeChunkedParser.h
|
||
|
@@ -18,6 +18,26 @@ namespace Http
|
||
|
namespace One
|
||
|
{
|
||
|
|
||
|
+using ::Parser::InsufficientInput;
|
||
|
+
|
||
|
+// TODO: Move this class into http/one/ChunkExtensionValueParser.*
|
||
|
+/// A customizable parser of a single chunk extension value (chunk-ext-val).
|
||
|
+/// From RFC 7230 section 4.1.1 and its Errata #4667:
|
||
|
+/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
|
||
|
+/// chunk-ext-name = token
|
||
|
+/// chunk-ext-val = token / quoted-string
|
||
|
+class ChunkExtensionValueParser
|
||
|
+{
|
||
|
+public:
|
||
|
+ typedef ::Parser::Tokenizer Tokenizer;
|
||
|
+
|
||
|
+ /// extracts and ignores the value of a named extension
|
||
|
+ static void Ignore(Tokenizer &tok, const SBuf &extName);
|
||
|
+
|
||
|
+ /// extracts and then interprets (or ignores) the extension value
|
||
|
+ virtual void parse(Tokenizer &tok, const SBuf &extName) = 0;
|
||
|
+};
|
||
|
+
|
||
|
/**
|
||
|
* An incremental parser for chunked transfer coding
|
||
|
* defined in RFC 7230 section 4.1.
|
||
|
@@ -25,7 +45,7 @@ namespace One
|
||
|
*
|
||
|
* The parser shovels content bytes from the raw
|
||
|
* input buffer into the content output buffer, both caller-supplied.
|
||
|
- * Ignores chunk extensions except for ICAP's ieof.
|
||
|
+ * Chunk extensions like use-original-body are handled via parseExtensionValuesWith().
|
||
|
* Trailers are available via mimeHeader() if wanted.
|
||
|
*/
|
||
|
class TeChunkedParser : public Http1::Parser
|
||
|
@@ -37,6 +57,10 @@ public:
|
||
|
/// set the buffer to be used to store decoded chunk data
|
||
|
void setPayloadBuffer(MemBuf *parsedContent) {theOut = parsedContent;}
|
||
|
|
||
|
+ /// Instead of ignoring all chunk extension values, give the supplied
|
||
|
+ /// parser a chance to handle them. Only applied to last-chunk (for now).
|
||
|
+ void parseExtensionValuesWith(ChunkExtensionValueParser *parser) { customExtensionValueParser = parser; }
|
||
|
+
|
||
|
bool needsMoreSpace() const;
|
||
|
|
||
|
/* Http1::Parser API */
|
||
|
@@ -45,17 +69,20 @@ public:
|
||
|
virtual Parser::size_type firstLineSize() const {return 0;} // has no meaning with multiple chunks
|
||
|
|
||
|
private:
|
||
|
- bool parseChunkSize(Http1::Tokenizer &tok);
|
||
|
- bool parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown);
|
||
|
- bool parseChunkBody(Http1::Tokenizer &tok);
|
||
|
- bool parseChunkEnd(Http1::Tokenizer &tok);
|
||
|
+ bool parseChunkSize(Tokenizer &tok);
|
||
|
+ bool parseChunkMetadataSuffix(Tokenizer &);
|
||
|
+ void parseChunkExtensions(Tokenizer &);
|
||
|
+ void parseOneChunkExtension(Tokenizer &);
|
||
|
+ bool parseChunkBody(Tokenizer &tok);
|
||
|
+ bool parseChunkEnd(Tokenizer &tok);
|
||
|
|
||
|
MemBuf *theOut;
|
||
|
uint64_t theChunkSize;
|
||
|
uint64_t theLeftBodySize;
|
||
|
|
||
|
-public:
|
||
|
- int64_t useOriginBody;
|
||
|
+ /// An optional plugin for parsing and interpreting custom chunk-ext-val.
|
||
|
+ /// This "visitor" object is owned by our creator.
|
||
|
+ ChunkExtensionValueParser *customExtensionValueParser;
|
||
|
};
|
||
|
|
||
|
} // namespace One
|
||
|
diff --git a/src/http/one/Tokenizer.cc b/src/http/one/Tokenizer.cc
|
||
|
index 804b8e1..3a6bef3 100644
|
||
|
--- a/src/http/one/Tokenizer.cc
|
||
|
+++ b/src/http/one/Tokenizer.cc
|
||
|
@@ -8,35 +8,18 @@
|
||
|
|
||
|
#include "squid.h"
|
||
|
#include "Debug.h"
|
||
|
+#include "http/one/Parser.h"
|
||
|
#include "http/one/Tokenizer.h"
|
||
|
-
|
||
|
-bool
|
||
|
-Http::One::Tokenizer::quotedString(SBuf &returnedToken, const bool http1p0)
|
||
|
-{
|
||
|
- checkpoint();
|
||
|
-
|
||
|
- if (!skip('"'))
|
||
|
- return false;
|
||
|
-
|
||
|
- return qdText(returnedToken, http1p0);
|
||
|
-}
|
||
|
-
|
||
|
-bool
|
||
|
-Http::One::Tokenizer::quotedStringOrToken(SBuf &returnedToken, const bool http1p0)
|
||
|
+#include "parser/Tokenizer.h"
|
||
|
+#include "sbuf/Stream.h"
|
||
|
+
|
||
|
+/// Extracts quoted-string after the caller removes the initial '"'.
|
||
|
+/// \param http1p0 whether to prohibit \-escaped characters in quoted strings
|
||
|
+/// \throws InsufficientInput when input can be a token _prefix_
|
||
|
+/// \returns extracted quoted string (without quotes and with chars unescaped)
|
||
|
+static SBuf
|
||
|
+parseQuotedStringSuffix(Parser::Tokenizer &tok, const bool http1p0)
|
||
|
{
|
||
|
- checkpoint();
|
||
|
-
|
||
|
- if (!skip('"'))
|
||
|
- return prefix(returnedToken, CharacterSet::TCHAR);
|
||
|
-
|
||
|
- return qdText(returnedToken, http1p0);
|
||
|
-}
|
||
|
-
|
||
|
-bool
|
||
|
-Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
|
||
|
-{
|
||
|
- // the initial DQUOTE has been skipped by the caller
|
||
|
-
|
||
|
/*
|
||
|
* RFC 1945 - defines qdtext:
|
||
|
* inclusive of LWS (which includes CR and LF)
|
||
|
@@ -61,12 +44,17 @@ Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
|
||
|
// best we can do is a conditional reference since http1p0 value may change per-client
|
||
|
const CharacterSet &tokenChars = (http1p0 ? qdtext1p0 : qdtext1p1);
|
||
|
|
||
|
- for (;;) {
|
||
|
- SBuf::size_type prefixLen = buf().findFirstNotOf(tokenChars);
|
||
|
- returnedToken.append(consume(prefixLen));
|
||
|
+ SBuf parsedToken;
|
||
|
+
|
||
|
+ while (!tok.atEnd()) {
|
||
|
+ SBuf qdText;
|
||
|
+ if (tok.prefix(qdText, tokenChars))
|
||
|
+ parsedToken.append(qdText);
|
||
|
+
|
||
|
+ if (!http1p0 && tok.skip('\\')) { // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
|
||
|
+ if (tok.atEnd())
|
||
|
+ break;
|
||
|
|
||
|
- // HTTP/1.1 allows quoted-pair, HTTP/1.0 does not
|
||
|
- if (!http1p0 && skip('\\')) {
|
||
|
/* RFC 7230 section 3.2.6
|
||
|
*
|
||
|
* The backslash octet ("\") can be used as a single-octet quoting
|
||
|
@@ -78,32 +66,42 @@ Http::One::Tokenizer::qdText(SBuf &returnedToken, const bool http1p0)
|
||
|
*/
|
||
|
static const CharacterSet qPairChars = CharacterSet::HTAB + CharacterSet::SP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
|
||
|
SBuf escaped;
|
||
|
- if (!prefix(escaped, qPairChars, 1)) {
|
||
|
- returnedToken.clear();
|
||
|
- restoreLastCheckpoint();
|
||
|
- return false;
|
||
|
- }
|
||
|
- returnedToken.append(escaped);
|
||
|
+ if (!tok.prefix(escaped, qPairChars, 1))
|
||
|
+ throw TexcHere("invalid escaped character in quoted-pair");
|
||
|
+
|
||
|
+ parsedToken.append(escaped);
|
||
|
continue;
|
||
|
+ }
|
||
|
|
||
|
- } else if (skip('"')) {
|
||
|
- break; // done
|
||
|
+ if (tok.skip('"'))
|
||
|
+ return parsedToken; // may be empty
|
||
|
|
||
|
- } else if (atEnd()) {
|
||
|
- // need more data
|
||
|
- returnedToken.clear();
|
||
|
- restoreLastCheckpoint();
|
||
|
- return false;
|
||
|
- }
|
||
|
+ if (tok.atEnd())
|
||
|
+ break;
|
||
|
|
||
|
- // else, we have an error
|
||
|
- debugs(24, 8, "invalid bytes for set " << tokenChars.name);
|
||
|
- returnedToken.clear();
|
||
|
- restoreLastCheckpoint();
|
||
|
- return false;
|
||
|
+ throw TexcHere(ToSBuf("invalid bytes for set ", tokenChars.name));
|
||
|
}
|
||
|
|
||
|
- // found the whole string
|
||
|
- return true;
|
||
|
+ throw Http::One::InsufficientInput();
|
||
|
+}
|
||
|
+
|
||
|
+SBuf
|
||
|
+Http::One::tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0)
|
||
|
+{
|
||
|
+ if (tok.skip('"'))
|
||
|
+ return parseQuotedStringSuffix(tok, http1p0);
|
||
|
+
|
||
|
+ if (tok.atEnd())
|
||
|
+ throw InsufficientInput();
|
||
|
+
|
||
|
+ SBuf parsedToken;
|
||
|
+ if (!tok.prefix(parsedToken, CharacterSet::TCHAR))
|
||
|
+ throw TexcHere("invalid input while expecting an HTTP token");
|
||
|
+
|
||
|
+ if (tok.atEnd())
|
||
|
+ throw InsufficientInput();
|
||
|
+
|
||
|
+ // got the complete token
|
||
|
+ return parsedToken;
|
||
|
}
|
||
|
|
||
|
diff --git a/src/http/one/Tokenizer.h b/src/http/one/Tokenizer.h
|
||
|
index 658875f..2d40574 100644
|
||
|
--- a/src/http/one/Tokenizer.h
|
||
|
+++ b/src/http/one/Tokenizer.h
|
||
|
@@ -9,68 +9,47 @@
|
||
|
#ifndef SQUID_SRC_HTTP_ONE_TOKENIZER_H
|
||
|
#define SQUID_SRC_HTTP_ONE_TOKENIZER_H
|
||
|
|
||
|
-#include "parser/Tokenizer.h"
|
||
|
+#include "parser/forward.h"
|
||
|
+#include "sbuf/forward.h"
|
||
|
|
||
|
namespace Http {
|
||
|
namespace One {
|
||
|
|
||
|
/**
|
||
|
- * Lexical processor extended to tokenize HTTP/1.x syntax.
|
||
|
+ * Extracts either an HTTP/1 token or quoted-string while dealing with
|
||
|
+ * possibly incomplete input typical for incremental text parsers.
|
||
|
+ * Unescapes escaped characters in HTTP/1.1 quoted strings.
|
||
|
*
|
||
|
- * \see ::Parser::Tokenizer for more detail
|
||
|
+ * \param http1p0 whether to prohibit \-escaped characters in quoted strings
|
||
|
+ * \throws InsufficientInput as appropriate, including on unterminated tokens
|
||
|
+ * \returns extracted token or quoted string (without quotes)
|
||
|
+ *
|
||
|
+ * Governed by:
|
||
|
+ * - RFC 1945 section 2.1
|
||
|
+ * "
|
||
|
+ * A string of text is parsed as a single word if it is quoted using
|
||
|
+ * double-quote marks.
|
||
|
+ *
|
||
|
+ * quoted-string = ( <"> *(qdtext) <"> )
|
||
|
+ *
|
||
|
+ * qdtext = <any CHAR except <"> and CTLs,
|
||
|
+ * but including LWS>
|
||
|
+ *
|
||
|
+ * Single-character quoting using the backslash ("\") character is not
|
||
|
+ * permitted in HTTP/1.0.
|
||
|
+ * "
|
||
|
+ *
|
||
|
+ * - RFC 7230 section 3.2.6
|
||
|
+ * "
|
||
|
+ * A string of text is parsed as a single value if it is quoted using
|
||
|
+ * double-quote marks.
|
||
|
+ *
|
||
|
+ * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
|
||
|
+ * qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
|
||
|
+ * obs-text = %x80-FF
|
||
|
+ * "
|
||
|
*/
|
||
|
-class Tokenizer : public ::Parser::Tokenizer
|
||
|
-{
|
||
|
-public:
|
||
|
- Tokenizer(SBuf &s) : ::Parser::Tokenizer(s), savedStats_(0) {}
|
||
|
-
|
||
|
- /**
|
||
|
- * Attempt to parse a quoted-string lexical construct.
|
||
|
- *
|
||
|
- * Governed by:
|
||
|
- * - RFC 1945 section 2.1
|
||
|
- * "
|
||
|
- * A string of text is parsed as a single word if it is quoted using
|
||
|
- * double-quote marks.
|
||
|
- *
|
||
|
- * quoted-string = ( <"> *(qdtext) <"> )
|
||
|
- *
|
||
|
- * qdtext = <any CHAR except <"> and CTLs,
|
||
|
- * but including LWS>
|
||
|
- *
|
||
|
- * Single-character quoting using the backslash ("\") character is not
|
||
|
- * permitted in HTTP/1.0.
|
||
|
- * "
|
||
|
- *
|
||
|
- * - RFC 7230 section 3.2.6
|
||
|
- * "
|
||
|
- * A string of text is parsed as a single value if it is quoted using
|
||
|
- * double-quote marks.
|
||
|
- *
|
||
|
- * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
|
||
|
- * qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
|
||
|
- * obs-text = %x80-FF
|
||
|
- * "
|
||
|
- *
|
||
|
- * \param escaped HTTP/1.0 does not permit \-escaped characters
|
||
|
- */
|
||
|
- bool quotedString(SBuf &value, const bool http1p0 = false);
|
||
|
-
|
||
|
- /**
|
||
|
- * Attempt to parse a (token / quoted-string ) lexical construct.
|
||
|
- */
|
||
|
- bool quotedStringOrToken(SBuf &value, const bool http1p0 = false);
|
||
|
-
|
||
|
-private:
|
||
|
- /// parse the internal component of a quote-string, and terminal DQUOTE
|
||
|
- bool qdText(SBuf &value, const bool http1p0);
|
||
|
-
|
||
|
- void checkpoint() { savedCheckpoint_ = buf(); savedStats_ = parsedSize(); }
|
||
|
- void restoreLastCheckpoint() { undoParse(savedCheckpoint_, savedStats_); }
|
||
|
-
|
||
|
- SBuf savedCheckpoint_;
|
||
|
- SBuf::size_type savedStats_;
|
||
|
-};
|
||
|
+SBuf tokenOrQuotedString(Parser::Tokenizer &tok, const bool http1p0 = false);
|
||
|
|
||
|
} // namespace One
|
||
|
} // namespace Http
|
||
|
diff --git a/src/http/one/forward.h b/src/http/one/forward.h
|
||
|
index c90dc34..2b4ad28 100644
|
||
|
--- a/src/http/one/forward.h
|
||
|
+++ b/src/http/one/forward.h
|
||
|
@@ -10,6 +10,7 @@
|
||
|
#define SQUID_SRC_HTTP_ONE_FORWARD_H
|
||
|
|
||
|
#include "base/RefCount.h"
|
||
|
+#include "parser/forward.h"
|
||
|
#include "sbuf/forward.h"
|
||
|
|
||
|
namespace Http {
|
||
|
@@ -31,6 +32,8 @@ typedef RefCount<Http::One::ResponseParser> ResponseParserPointer;
|
||
|
/// CRLF textual representation
|
||
|
const SBuf &CrLf();
|
||
|
|
||
|
+using ::Parser::InsufficientInput;
|
||
|
+
|
||
|
} // namespace One
|
||
|
} // namespace Http
|
||
|
|
||
|
diff --git a/src/parser/BinaryTokenizer.h b/src/parser/BinaryTokenizer.h
|
||
|
index acebd4d..24042d4 100644
|
||
|
--- a/src/parser/BinaryTokenizer.h
|
||
|
+++ b/src/parser/BinaryTokenizer.h
|
||
|
@@ -9,6 +9,7 @@
|
||
|
#ifndef SQUID_SRC_PARSER_BINARYTOKENIZER_H
|
||
|
#define SQUID_SRC_PARSER_BINARYTOKENIZER_H
|
||
|
|
||
|
+#include "parser/forward.h"
|
||
|
#include "sbuf/SBuf.h"
|
||
|
|
||
|
namespace Parser
|
||
|
@@ -44,7 +45,7 @@ public:
|
||
|
class BinaryTokenizer
|
||
|
{
|
||
|
public:
|
||
|
- class InsufficientInput {}; // thrown when a method runs out of data
|
||
|
+ typedef ::Parser::InsufficientInput InsufficientInput;
|
||
|
typedef uint64_t size_type; // enough for the largest supported offset
|
||
|
|
||
|
BinaryTokenizer();
|
||
|
diff --git a/src/parser/Makefile.am b/src/parser/Makefile.am
|
||
|
index af2b759..0daa5a8 100644
|
||
|
--- a/src/parser/Makefile.am
|
||
|
+++ b/src/parser/Makefile.am
|
||
|
@@ -13,6 +13,7 @@ noinst_LTLIBRARIES = libparser.la
|
||
|
libparser_la_SOURCES = \
|
||
|
BinaryTokenizer.h \
|
||
|
BinaryTokenizer.cc \
|
||
|
+ forward.h \
|
||
|
Tokenizer.h \
|
||
|
Tokenizer.cc
|
||
|
|
||
|
diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc
|
||
|
index 7e73e04..68f4aec 100644
|
||
|
--- a/src/parser/Tokenizer.cc
|
||
|
+++ b/src/parser/Tokenizer.cc
|
||
|
@@ -10,7 +10,9 @@
|
||
|
|
||
|
#include "squid.h"
|
||
|
#include "Debug.h"
|
||
|
+#include "parser/forward.h"
|
||
|
#include "parser/Tokenizer.h"
|
||
|
+#include "sbuf/Stream.h"
|
||
|
|
||
|
#include <cerrno>
|
||
|
#if HAVE_CTYPE_H
|
||
|
@@ -96,6 +98,23 @@ Parser::Tokenizer::prefix(SBuf &returnedToken, const CharacterSet &tokenChars, c
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
+SBuf
|
||
|
+Parser::Tokenizer::prefix(const char *description, const CharacterSet &tokenChars, const SBuf::size_type limit)
|
||
|
+{
|
||
|
+ if (atEnd())
|
||
|
+ throw InsufficientInput();
|
||
|
+
|
||
|
+ SBuf result;
|
||
|
+
|
||
|
+ if (!prefix(result, tokenChars, limit))
|
||
|
+ throw TexcHere(ToSBuf("cannot parse ", description));
|
||
|
+
|
||
|
+ if (atEnd())
|
||
|
+ throw InsufficientInput();
|
||
|
+
|
||
|
+ return result;
|
||
|
+}
|
||
|
+
|
||
|
bool
|
||
|
Parser::Tokenizer::suffix(SBuf &returnedToken, const CharacterSet &tokenChars, const SBuf::size_type limit)
|
||
|
{
|
||
|
@@ -283,3 +302,24 @@ Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf:
|
||
|
return success(s - range.rawContent());
|
||
|
}
|
||
|
|
||
|
+int64_t
|
||
|
+Parser::Tokenizer::udec64(const char *description, const SBuf::size_type limit)
|
||
|
+{
|
||
|
+ if (atEnd())
|
||
|
+ throw InsufficientInput();
|
||
|
+
|
||
|
+ int64_t result = 0;
|
||
|
+
|
||
|
+ // Since we only support unsigned decimals, a parsing failure with a
|
||
|
+ // non-empty input always implies invalid/malformed input (or a buggy
|
||
|
+ // limit=0 caller). TODO: Support signed and non-decimal integers by
|
||
|
+ // refactoring int64() to detect insufficient input.
|
||
|
+ if (!int64(result, 10, false, limit))
|
||
|
+ throw TexcHere(ToSBuf("cannot parse ", description));
|
||
|
+
|
||
|
+ if (atEnd())
|
||
|
+ throw InsufficientInput(); // more digits may be coming
|
||
|
+
|
||
|
+ return result;
|
||
|
+}
|
||
|
+
|
||
|
diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h
|
||
|
index 54414be..03a8388 100644
|
||
|
--- a/src/parser/Tokenizer.h
|
||
|
+++ b/src/parser/Tokenizer.h
|
||
|
@@ -143,6 +143,19 @@ public:
|
||
|
*/
|
||
|
bool int64(int64_t &result, int base = 0, bool allowSign = true, SBuf::size_type limit = SBuf::npos);
|
||
|
|
||
|
+ /*
|
||
|
+ * The methods below mimic their counterparts documented above, but they
|
||
|
+ * throw on errors, including InsufficientInput. The field description
|
||
|
+ * parameter is used for error reporting and debugging.
|
||
|
+ */
|
||
|
+
|
||
|
+ /// prefix() wrapper but throws InsufficientInput if input contains
|
||
|
+ /// nothing but the prefix (i.e. if the prefix is not "terminated")
|
||
|
+ SBuf prefix(const char *description, const CharacterSet &tokenChars, SBuf::size_type limit = SBuf::npos);
|
||
|
+
|
||
|
+ /// int64() wrapper but limited to unsigned decimal integers (for now)
|
||
|
+ int64_t udec64(const char *description, SBuf::size_type limit = SBuf::npos);
|
||
|
+
|
||
|
protected:
|
||
|
SBuf consume(const SBuf::size_type n);
|
||
|
SBuf::size_type success(const SBuf::size_type n);
|
||
|
diff --git a/src/parser/forward.h b/src/parser/forward.h
|
||
|
new file mode 100644
|
||
|
index 0000000..5a95b7a
|
||
|
--- /dev/null
|
||
|
+++ b/src/parser/forward.h
|
||
|
@@ -0,0 +1,22 @@
|
||
|
+/*
|
||
|
+ * Copyright (C) 1996-2019 The Squid Software Foundation and contributors
|
||
|
+ *
|
||
|
+ * Squid software is distributed under GPLv2+ license and includes
|
||
|
+ * contributions from numerous individuals and organizations.
|
||
|
+ * Please see the COPYING and CONTRIBUTORS files for details.
|
||
|
+ */
|
||
|
+
|
||
|
+#ifndef SQUID_PARSER_FORWARD_H
|
||
|
+#define SQUID_PARSER_FORWARD_H
|
||
|
+
|
||
|
+namespace Parser {
|
||
|
+class Tokenizer;
|
||
|
+class BinaryTokenizer;
|
||
|
+
|
||
|
+// TODO: Move this declaration (to parser/Elements.h) if we need more like it.
|
||
|
+/// thrown by modern "incremental" parsers when they need more data
|
||
|
+class InsufficientInput {};
|
||
|
+} // namespace Parser
|
||
|
+
|
||
|
+#endif /* SQUID_PARSER_FORWARD_H */
|
||
|
+
|
||
|
diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc
|
||
|
index affe0b1..05591fe 100644
|
||
|
--- a/src/http/one/Parser.cc
|
||
|
+++ b/src/http/one/Parser.cc
|
||
|
@@ -65,16 +65,10 @@ Http::One::Parser::DelimiterCharacters()
|
||
|
void
|
||
|
Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
|
||
|
{
|
||
|
- if (tok.skip(Http1::CrLf()))
|
||
|
- return;
|
||
|
-
|
||
|
if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
|
||
|
return;
|
||
|
|
||
|
- if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
|
||
|
- throw InsufficientInput();
|
||
|
-
|
||
|
- throw TexcHere("garbage instead of CRLF line terminator");
|
||
|
+ tok.skipRequired("line-terminating CRLF", Http1::CrLf());
|
||
|
}
|
||
|
|
||
|
/// all characters except the LF line terminator
|
||
|
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h
|
||
|
index 40e281b..9a2a4ad 100644
|
||
|
--- a/src/http/one/Parser.h
|
||
|
+++ b/src/http/one/Parser.h
|
||
|
@@ -120,9 +120,7 @@ protected:
|
||
|
* detect and skip the CRLF or (if tolerant) LF line terminator
|
||
|
* consume from the tokenizer.
|
||
|
*
|
||
|
- * \throws exception on bad or InsuffientInput.
|
||
|
- * \retval true only if line terminator found.
|
||
|
- * \retval false incomplete or missing line terminator, need more data.
|
||
|
+ * \throws exception on bad or InsufficientInput
|
||
|
*/
|
||
|
void skipLineTerminator(Tokenizer &) const;
|
||
|
|
||
|
diff --git a/src/http/one/TeChunkedParser.cc b/src/http/one/TeChunkedParser.cc
|
||
|
index 6d2f8ea..3bff6c7 100644
|
||
|
--- a/src/http/one/TeChunkedParser.cc
|
||
|
+++ b/src/http/one/TeChunkedParser.cc
|
||
|
@@ -91,6 +91,11 @@ Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok)
|
||
|
{
|
||
|
Must(theChunkSize <= 0); // Should(), really
|
||
|
|
||
|
+ static const SBuf bannedHexPrefixLower("0x");
|
||
|
+ static const SBuf bannedHexPrefixUpper("0X");
|
||
|
+ if (tok.skip(bannedHexPrefixLower) || tok.skip(bannedHexPrefixUpper))
|
||
|
+ throw TextException("chunk starts with 0x", Here());
|
||
|
+
|
||
|
int64_t size = -1;
|
||
|
if (tok.int64(size, 16, false) && !tok.atEnd()) {
|
||
|
if (size < 0)
|
||
|
@@ -121,7 +126,7 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
|
||
|
// bad or insufficient input, like in the code below. TODO: Expand up.
|
||
|
try {
|
||
|
parseChunkExtensions(tok); // a possibly empty chunk-ext list
|
||
|
- skipLineTerminator(tok);
|
||
|
+ tok.skipRequired("CRLF after [chunk-ext]", Http1::CrLf());
|
||
|
buf_ = tok.remaining();
|
||
|
parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
|
||
|
return true;
|
||
|
@@ -132,12 +137,14 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
|
||
|
// other exceptions bubble up to kill message parsing
|
||
|
}
|
||
|
|
||
|
-/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667):
|
||
|
+/// Parses the chunk-ext list (RFC 9112 section 7.1.1:
|
||
|
/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
|
||
|
void
|
||
|
-Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
|
||
|
+Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &callerTok)
|
||
|
{
|
||
|
do {
|
||
|
+ auto tok = callerTok;
|
||
|
+
|
||
|
ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
|
||
|
|
||
|
if (!tok.skip(';'))
|
||
|
@@ -145,6 +152,7 @@ Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
|
||
|
|
||
|
parseOneChunkExtension(tok);
|
||
|
buf_ = tok.remaining(); // got one extension
|
||
|
+ callerTok = tok;
|
||
|
} while (true);
|
||
|
}
|
||
|
|
||
|
@@ -158,11 +166,14 @@ Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName
|
||
|
/// Parses a single chunk-ext list element:
|
||
|
/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
|
||
|
void
|
||
|
-Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
|
||
|
+Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &callerTok)
|
||
|
{
|
||
|
+ auto tok = callerTok;
|
||
|
+
|
||
|
ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name
|
||
|
|
||
|
const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
|
||
|
+ callerTok = tok; // in case we determine that this is a valueless chunk-ext
|
||
|
|
||
|
ParseBws(tok);
|
||
|
|
||
|
@@ -176,6 +187,8 @@ Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
|
||
|
customExtensionValueParser->parse(tok, extName);
|
||
|
else
|
||
|
ChunkExtensionValueParser::Ignore(tok, extName);
|
||
|
+
|
||
|
+ callerTok = tok;
|
||
|
}
|
||
|
|
||
|
bool
|
||
|
@@ -209,7 +222,7 @@ Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok)
|
||
|
Must(theLeftBodySize == 0); // Should(), really
|
||
|
|
||
|
try {
|
||
|
- skipLineTerminator(tok);
|
||
|
+ tok.skipRequired("chunk CRLF", Http1::CrLf());
|
||
|
buf_ = tok.remaining(); // parse checkpoint
|
||
|
theChunkSize = 0; // done with the current chunk
|
||
|
parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
|
||
|
diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc
|
||
|
index 68f4aec..8516869 100644
|
||
|
--- a/src/parser/Tokenizer.cc
|
||
|
+++ b/src/parser/Tokenizer.cc
|
||
|
@@ -147,6 +147,18 @@ Parser::Tokenizer::skipAll(const CharacterSet &tokenChars)
|
||
|
return success(prefixLen);
|
||
|
}
|
||
|
|
||
|
+void
|
||
|
+Parser::Tokenizer::skipRequired(const char *description, const SBuf &tokenToSkip)
|
||
|
+{
|
||
|
+ if (skip(tokenToSkip) || tokenToSkip.isEmpty())
|
||
|
+ return;
|
||
|
+
|
||
|
+ if (tokenToSkip.startsWith(buf_))
|
||
|
+ throw InsufficientInput();
|
||
|
+
|
||
|
+ throw TextException(ToSBuf("cannot skip ", description), Here());
|
||
|
+}
|
||
|
+
|
||
|
bool
|
||
|
Parser::Tokenizer::skipOne(const CharacterSet &chars)
|
||
|
{
|
||
|
diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h
|
||
|
index 03a8388..78ab9e7 100644
|
||
|
--- a/src/parser/Tokenizer.h
|
||
|
+++ b/src/parser/Tokenizer.h
|
||
|
@@ -115,6 +115,13 @@ public:
|
||
|
*/
|
||
|
SBuf::size_type skipAll(const CharacterSet &discardables);
|
||
|
|
||
|
+ /** skips a given character sequence (string);
|
||
|
+ * does nothing if the sequence is empty
|
||
|
+ *
|
||
|
+ * \throws exception on mismatching prefix or InsufficientInput
|
||
|
+ */
|
||
|
+ void skipRequired(const char *description, const SBuf &tokenToSkip);
|
||
|
+
|
||
|
/** Removes a single trailing character from the set.
|
||
|
*
|
||
|
* \return whether a character was removed
|