From 8efa99e7b5d5a37aefb476cc27ee24c2be4da0c7 Mon Sep 17 00:00:00 2001 From: Michael Saboff Date: Mon, 22 May 2023 13:40:46 -0700 Subject: [PATCH] Cherry-pick 264365@main (698c6e293734). https://bugs.webkit.org/show_bug.cgi?id=254930 [JSC] RegExpGlobalData::performMatch issue leading to OOB read https://bugs.webkit.org/show_bug.cgi?id=254930 rdar://107436732 Reviewed by Alexey Shvayka. Fixed two issues: 1) In YarrInterpreter.cpp::matchAssertionBOL() we were advancing the string position for non-BMP characters. Since it is an assertion, we shouldn't advance the character position. Made the same fix to matchAssertionEOL(). 2) In StringPrototype.cpp::replaceUsingRegExpSearch(), we need to advance past both elements of a non-BMP character for the case where the RegExp match is empty. * JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js: New test. * Source/JavaScriptCore/runtime/StringPrototype.cpp: (JSC::replaceUsingRegExpSearch): * Source/JavaScriptCore/yarr/YarrInterpreter.cpp: (JSC::Yarr::Interpreter::InputStream::readCheckedDontAdvance): (JSC::Yarr::Interpreter::matchAssertionBOL): (JSC::Yarr::Interpreter::matchAssertionEOL): Originally-landed-as: 259548.551@safari-7615-branch (e34edaa74575). rdar://107436732 Canonical link: https://commits.webkit.org/264365@main --- ...place-regexp-matchBOL-correct-advancing.js | 35 ++++++++++++++++++ .../runtime/StringPrototype.cpp | 10 ++++++ .../JavaScriptCore/yarr/YarrInterpreter.cpp | 36 +++++++++++++++++-- 3 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js diff --git a/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js b/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js new file mode 100644 index 000000000000..25b1a70b81d2 --- /dev/null +++ b/JSTests/stress/string-replace-regexp-matchBOL-correct-advancing.js @@ -0,0 +1,35 @@ +// Check that we don't advance for BOL assertions when matching a non-BMP character in the YARR interpreter +// and that we do advance in String.replace() when processing an empty match. + +let expected = "|"; + +for (let i = 0; i < 11; ++i) + expected += String.fromCodePoint(128512) + '|'; + +let str = String.fromCodePoint(128512).repeat(11); + +let result1 = str.replace(/(?!(?=^a|()+()+x)(abc))/gmu, r => { + return '|'; +}); + + +if (result1 !== expected) + print("FAILED: \"" + result1 + " !== " + expected + '"'); + +let result2= str.replace(/(?!(?=^a|x)(abc))/gmu, r => { + return '|'; +}); + +if (result2 !== expected) + print("FAILED: \"" + result2 + " !== " + expected + '"'); + +expected = "|" + String.fromCodePoint(128512); + +str = String.fromCodePoint(128512).repeat(1); + +let result3= str.replace(/(?!(?=^a|x)(abc))/mu, r => { + return '|'; +}); + +if (result3 !== expected) + print("FAILED: \"" + result3 + " !== " + expected + '"'); diff --git a/Source/JavaScriptCore/runtime/StringPrototype.cpp b/Source/JavaScriptCore/runtime/StringPrototype.cpp index 08104b1dbfa9..459295f728a7 100644 --- a/Source/JavaScriptCore/runtime/StringPrototype.cpp +++ b/Source/JavaScriptCore/runtime/StringPrototype.cpp @@ -603,6 +603,11 @@ static ALWAYS_INLINE JSString* replaceUsingRegExpSearch( startPosition++; if (startPosition > sourceLen) break; + if (U16_IS_LEAD(source[startPosition - 1]) && U16_IS_TRAIL(source[startPosition])) { + startPosition++; + if (startPosition > sourceLen) + break; + } } } } else { @@ -682,6 +687,11 @@ static ALWAYS_INLINE JSString* replaceUsingRegExpSearch( startPosition++; if (startPosition > sourceLen) break; + if (U16_IS_LEAD(source[startPosition - 1]) && U16_IS_TRAIL(source[startPosition])) { + startPosition++; + if (startPosition > sourceLen) + break; + } } } while (global); } diff --git a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp index 95a848a1a66d..b1a22b253866 100644 --- a/Source/JavaScriptCore/yarr/YarrInterpreter.cpp +++ b/Source/JavaScriptCore/yarr/YarrInterpreter.cpp @@ -209,6 +209,38 @@ public: } return result; } + + int readCheckedDontAdvance(unsigned negativePositionOffest) + { + RELEASE_ASSERT(pos >= negativePositionOffest); + unsigned p = pos - negativePositionOffest; + ASSERT(p < length); + int result = input[p]; + if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) { + if (atEnd()) + return -1; + + result = U16_GET_SUPPLEMENTARY(result, input[p + 1]); + } + return result; + } + + // readForCharacterDump() is only for use by the DUMP_CURR_CHAR macro. + // We don't want any side effects like the next() in readChecked() above. + int readForCharacterDump(unsigned negativePositionOffest) + { + RELEASE_ASSERT(pos >= negativePositionOffest); + unsigned p = pos - negativePositionOffest; + ASSERT(p < length); + int result = input[p]; + if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + 1 < length && U16_IS_TRAIL(input[p + 1])) { + if (atEnd()) + return -1; + + result = U16_GET_SUPPLEMENTARY(result, input[p + 1]); + } + return result; + } int readSurrogatePairChecked(unsigned negativePositionOffset) { @@ -482,13 +514,13 @@ public: bool matchAssertionBOL(ByteTerm& term) { - return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1))); + return (input.atStart(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readCheckedDontAdvance(term.inputPosition + 1))); } bool matchAssertionEOL(ByteTerm& term) { if (term.inputPosition) - return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition))); + return (input.atEnd(term.inputPosition)) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.readCheckedDontAdvance(term.inputPosition))); return (input.atEnd()) || (pattern->multiline() && testCharacterClass(pattern->newlineCharacterClass, input.read())); } -- 2.40.1