commit ea12a34d338b962707d5078d6d1fc7c6eb119a22
Author: Alex Rousskov <rousskov@measurement-factory.com>
Date:   2020-05-13 14:05:00 +0000

    Validate Content-Length value prefix (#629)
    
    The new code detects all invalid Content-Length prefixes but the old
    code was already rejecting most invalid prefixes using strtoll(). The
    newly covered (and now rejected) invalid characters are
    
    * explicit "+" sign;
    * explicit "-" sign in "-0" values;
    * isspace(3) characters that are not (relaxed) OWS characters.
    
    In most deployment environments, the last set is probably empty because
    the relaxed OWS set has all the POSIX/C isspace(3) characters but the
    new line, and the new line is unlikely to sneak in past other checks.
    
    Thank you, Amit Klein <amit.klein@safebreach.com>, for elevating the
    importance of this 2016 TODO (added in commit a1b9ec2).

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 36957f2..c10a221 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -25,6 +25,7 @@ Thank you!
     Alex Wu <alex_wu2012@hotmail.com>
     Alin Nastac <mrness@gentoo.org>
     Alter <alter@alter.org.ua>
+    Amit Klein <amit.klein@safebreach.com>
     Amos Jeffries
     Amos Jeffries <amosjeffries@squid-cache.org>
     Amos Jeffries <squid3@treenet.co.nz>
diff --git a/src/http/ContentLengthInterpreter.cc b/src/http/ContentLengthInterpreter.cc
index 3fdf7de..a3741eb 100644
--- a/src/http/ContentLengthInterpreter.cc
+++ b/src/http/ContentLengthInterpreter.cc
@@ -28,6 +28,24 @@ Http::ContentLengthInterpreter::ContentLengthInterpreter(const int aDebugLevel):
 {
 }
 
+/// checks whether all characters before the Content-Length number are allowed
+/// \returns the start of the digit sequence (or nil on errors)
+const char *
+Http::ContentLengthInterpreter::findDigits(const char *prefix, const char * const valueEnd) const
+{
+    // skip leading OWS in RFC 7230's `OWS field-value OWS`
+    const CharacterSet &whitespace = Http::One::Parser::WhitespaceCharacters();
+    while (prefix < valueEnd) {
+        const auto ch = *prefix;
+        if (CharacterSet::DIGIT[ch])
+            return prefix; // common case: a pre-trimmed field value
+        if (!whitespace[ch])
+            return nullptr; // (trimmed) length does not start with a digit
+        ++prefix;
+    }
+    return nullptr; // empty or whitespace-only value
+}
+
 /// checks whether all characters after the Content-Length are allowed
 bool
 Http::ContentLengthInterpreter::goodSuffix(const char *suffix, const char * const end) const
@@ -52,10 +70,19 @@ Http::ContentLengthInterpreter::checkValue(const char *rawValue, const int value
 {
     Must(!sawBad);
 
+    const auto valueEnd = rawValue + valueSize;
+
+    const auto digits = findDigits(rawValue, valueEnd);
+    if (!digits) {
+        debugs(55, debugLevel, "WARNING: Leading garbage or empty value in" << Raw("Content-Length", rawValue, valueSize));
+        sawBad = true;
+        return false;
+    }
+
     int64_t latestValue = -1;
     char *suffix = nullptr;
-    // TODO: Handle malformed values with leading signs (e.g., "-0" or "+1").
-    if (!httpHeaderParseOffset(rawValue, &latestValue, &suffix)) {
+
+    if (!httpHeaderParseOffset(digits, &latestValue, &suffix)) {
         debugs(55, DBG_IMPORTANT, "WARNING: Malformed" << Raw("Content-Length", rawValue, valueSize));
         sawBad = true;
         return false;
@@ -68,7 +95,7 @@ Http::ContentLengthInterpreter::checkValue(const char *rawValue, const int value
     }
 
     // check for garbage after the number
-    if (!goodSuffix(suffix, rawValue + valueSize)) {
+    if (!goodSuffix(suffix, valueEnd)) {
         debugs(55, debugLevel, "WARNING: Trailing garbage in" << Raw("Content-Length", rawValue, valueSize));
         sawBad = true;
         return false;
diff --git a/src/http/ContentLengthInterpreter.h b/src/http/ContentLengthInterpreter.h
index ce36e22..f22de91 100644
--- a/src/http/ContentLengthInterpreter.h
+++ b/src/http/ContentLengthInterpreter.h
@@ -46,6 +46,7 @@ public:
     bool sawGood;
 
 protected:
+    const char *findDigits(const char *prefix, const char *valueEnd) const;
     bool goodSuffix(const char *suffix, const char * const end) const;
     bool checkValue(const char *start, const int size);
     bool checkList(const String &list);