From 070c6886f284ab52132bcddd4b6b15828695ac43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Thu, 1 Feb 2018 15:47:56 +0100 Subject: [PATCH] Fix auto-possessification at the end of a capturing group that is called recursively --- ...ification-bug-at-the-end-of-a-captur.patch | 394 ++++++++++++++++++ pcre2.spec | 10 +- 2 files changed, 403 insertions(+), 1 deletion(-) create mode 100644 pcre2-10.31-RC1-Fix-auto-possessification-bug-at-the-end-of-a-captur.patch diff --git a/pcre2-10.31-RC1-Fix-auto-possessification-bug-at-the-end-of-a-captur.patch b/pcre2-10.31-RC1-Fix-auto-possessification-bug-at-the-end-of-a-captur.patch new file mode 100644 index 0000000..6b34cbf --- /dev/null +++ b/pcre2-10.31-RC1-Fix-auto-possessification-bug-at-the-end-of-a-captur.patch @@ -0,0 +1,394 @@ +From b70b8394e79360f92c87a169c1ba982a7e22d9d6 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Wed, 31 Jan 2018 17:53:56 +0000 +Subject: [PATCH] Fix auto-possessification bug at the end of a capturing group + that is called recursively. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@912 6239d852-aaf2-0410-a92c-79f79f948069 + +Petr Písař: Ported to 10.31-RC1. + + src/pcre2_auto_possess.c | 56 ++++++++++++++++++------ + testdata/testinput1 | 30 +++++++++++++ + testdata/testinput2 | 17 ++++++++ + testdata/testoutput1 | 64 ++++++++++++++++++++++++++++ + testdata/testoutput2 | 109 ++++++++++++++++++++++++++++++++++++++++++++++- + +diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c +index ad3543f..23275a2 100644 +--- a/src/pcre2_auto_possess.c ++++ b/src/pcre2_auto_possess.c +@@ -558,47 +558,73 @@ for(;;) + continue; + } + ++ /* At the end of a branch, skip to the end of the group. */ ++ + if (c == OP_ALT) + { + do code += GET(code, 1); while (*code == OP_ALT); + c = *code; + } + ++ /* Inspect the next opcode. */ ++ + switch(c) + { +- case OP_END: +- case OP_KETRPOS: +- /* TRUE only in greedy case. The non-greedy case could be replaced by +- an OP_EXACT, but it is probably not worth it. (And note that OP_EXACT +- uses more memory, which we cannot get at this stage.) */ ++ /* We can always possessify a greedy iterator at the end of the pattern, ++ which is reached after skipping over the final OP_KET. A non-greedy ++ iterator must never be possessified. */ + ++ case OP_END: + return base_list[1] != 0; + ++ /* When an iterator is at the end of certain kinds of group we can inspect ++ what follows the group by skipping over the closing ket. Note that this ++ does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given ++ iteration is variable (could be another iteration or could be the next ++ item). As these two opcodes are not listed in the next switch, they will ++ end up as the next code to inspect, and return FALSE by virtue of being ++ unsupported. */ ++ + case OP_KET: +- /* If the bracket is capturing, and referenced by an OP_RECURSE, or +- it is an atomic sub-pattern (assert, once, etc.) the non-greedy case +- cannot be converted to a possessive form. */ ++ case OP_KETRPOS: ++ /* The non-greedy case cannot be converted to a possessive form. */ + + if (base_list[1] == 0) return FALSE; + ++ /* If the bracket is capturing it might be referenced by an OP_RECURSE ++ so its last iterator can never be possessified if the pattern contains ++ recursions. (This could be improved by keeping a list of group numbers that ++ are called by recursion.) */ ++ + switch(*(code - GET(code, 1))) + { ++ case OP_CBRA: ++ case OP_SCBRA: ++ case OP_CBRAPOS: ++ case OP_SCBRAPOS: ++ if (cb->had_recurse) return FALSE; ++ break; ++ ++ /* Atomic sub-patterns and assertions can always auto-possessify their ++ last iterator. However, if the group was entered as a result of checking ++ a previous iterator, this is not possible. */ ++ + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + case OP_ONCE: + +- /* Atomic sub-patterns and assertions can always auto-possessify their +- last iterator. However, if the group was entered as a result of checking +- a previous iterator, this is not possible. */ +- + return !entered_a_group; + } + ++ /* Skip over the bracket and inspect what comes next. */ ++ + code += PRIV(OP_lengths)[c]; + continue; + ++ /* Handle cases where the next item is a group. */ ++ + case OP_ONCE: + case OP_BRA: + case OP_CBRA: +@@ -637,11 +663,15 @@ for(;;) + code += PRIV(OP_lengths)[c]; + continue; + ++ /* The next opcode does not need special handling; fall through and use it ++ to see if the base can be possessified. */ ++ + default: + break; + } + +- /* Check for a supported opcode, and load its properties. */ ++ /* We now have the next appropriate opcode to compare with the base. Check ++ for a supported opcode, and load its properties. */ + + code = get_chr_property_list(code, utf, cb->fcc, list); + if (code == NULL) return FALSE; /* Unsupported */ +diff --git a/testdata/testinput1 b/testdata/testinput1 +index 756b4af..9a9c5fd 100644 +--- a/testdata/testinput1 ++++ b/testdata/testinput1 +@@ -6159,4 +6159,34 @@ ef) x/x,mark + /((?<=((*ACCEPT))X)\1?Y(*ACCEPT))\1/ + XYYZ + ++/(?(DEFINE)(?a?)X)^(?&optional_a)a$/ ++ aa ++ a ++ ++/^(a?)b(?1)a/ ++ abaa ++ aba ++ baa ++ ba ++ ++/^(a?)+b(?1)a/ ++ abaa ++ aba ++ baa ++ ba ++ ++/^(a?)++b(?1)a/ ++ abaa ++ aba ++ baa ++ ba ++ ++/^(a?)+b/ ++ b ++ ab ++ aaab ++ ++/(?=a+)a(a+)++b/ ++ aab ++ + # End of testinput1 +diff --git a/testdata/testinput2 b/testdata/testinput2 +index 2c2334c..5d3a80e 100644 +--- a/testdata/testinput2 ++++ b/testdata/testinput2 +@@ -5412,4 +5412,21 @@ a)"xI + \= Expect no match + \na + ++# These tests are matched in test 1 as they are Perl compatible. Here we are ++# looking at what does and does not get auto-possessified. ++ ++/(?(DEFINE)(?a?))^(?&optional_a)a$/B ++ ++/(?(DEFINE)(?a?)X)^(?&optional_a)a$/B ++ ++/^(a?)b(?1)a/B ++ ++/^(a?)+b(?1)a/B ++ ++/^(a?)++b(?1)a/B ++ ++/^(a?)+b/B ++ ++/(?=a+)a(a+)++b/B ++ + # End of testinput2 +diff --git a/testdata/testoutput1 b/testdata/testoutput1 +index 7cf647e..9c55be9 100644 +--- a/testdata/testoutput1 ++++ b/testdata/testoutput1 +@@ -9758,4 +9758,68 @@ No match + 1: Y + 2: + ++/(?(DEFINE)(?a?)X)^(?&optional_a)a$/ ++ aa ++ 0: aa ++ a ++ 0: a ++ ++/^(a?)b(?1)a/ ++ abaa ++ 0: abaa ++ 1: a ++ aba ++ 0: aba ++ 1: a ++ baa ++ 0: baa ++ 1: ++ ba ++ 0: ba ++ 1: ++ ++/^(a?)+b(?1)a/ ++ abaa ++ 0: abaa ++ 1: ++ aba ++ 0: aba ++ 1: ++ baa ++ 0: baa ++ 1: ++ ba ++ 0: ba ++ 1: ++ ++/^(a?)++b(?1)a/ ++ abaa ++ 0: abaa ++ 1: ++ aba ++ 0: aba ++ 1: ++ baa ++ 0: baa ++ 1: ++ ba ++ 0: ba ++ 1: ++ ++/^(a?)+b/ ++ b ++ 0: b ++ 1: ++ ab ++ 0: ab ++ 1: ++ aaab ++ 0: aaab ++ 1: ++ ++/(?=a+)a(a+)++b/ ++ aab ++ 0: aab ++ 1: a ++ + # End of testinput1 +diff --git a/testdata/testoutput2 b/testdata/testoutput2 +index f783320..fcaac8f 100644 +--- a/testdata/testoutput2 ++++ b/testdata/testoutput2 +@@ -12701,7 +12701,7 @@ Subject length lower bound = 5 + Ket + a + CBraPos 1 +- a++ ++ a+ + KetRpos + a + Ket +@@ -16468,6 +16468,113 @@ No match + \na + No match + ++# These tests are matched in test 1 as they are Perl compatible. Here we are ++# looking at what does and does not get auto-possessified. ++ ++/(?(DEFINE)(?a?))^(?&optional_a)a$/B ++------------------------------------------------------------------ ++ Bra ++ Cond ++ Cond false ++ CBra 1 ++ a? ++ Ket ++ Ket ++ ^ ++ Recurse ++ a ++ $ ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/(?(DEFINE)(?a?)X)^(?&optional_a)a$/B ++------------------------------------------------------------------ ++ Bra ++ Cond ++ Cond false ++ CBra 1 ++ a? ++ Ket ++ X ++ Ket ++ ^ ++ Recurse ++ a ++ $ ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/^(a?)b(?1)a/B ++------------------------------------------------------------------ ++ Bra ++ ^ ++ CBra 1 ++ a? ++ Ket ++ b ++ Recurse ++ a ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/^(a?)+b(?1)a/B ++------------------------------------------------------------------ ++ Bra ++ ^ ++ SCBra 1 ++ a? ++ KetRmax ++ b ++ Recurse ++ a ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/^(a?)++b(?1)a/B ++------------------------------------------------------------------ ++ Bra ++ ^ ++ SCBraPos 1 ++ a? ++ KetRpos ++ b ++ Recurse ++ a ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/^(a?)+b/B ++------------------------------------------------------------------ ++ Bra ++ ^ ++ SCBra 1 ++ a? ++ KetRmax ++ b ++ Ket ++ End ++------------------------------------------------------------------ ++ ++/(?=a+)a(a+)++b/B ++------------------------------------------------------------------ ++ Bra ++ Assert ++ a++ ++ Ket ++ a ++ CBraPos 1 ++ a++ ++ KetRpos ++ b ++ Ket ++ End ++------------------------------------------------------------------ ++ + # End of testinput2 + Error -65: PCRE2_ERROR_BADDATA (unknown error number) + Error -62: bad serialized data +-- +2.13.6 + diff --git a/pcre2.spec b/pcre2.spec index 122fbed..45b29ba 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -9,7 +9,7 @@ %global rcversion RC1 Name: pcre2 Version: 10.31 -Release: %{?rcversion:0.}2%{?rcversion:.%rcversion}%{?dist} +Release: %{?rcversion:0.}3%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library # the library: BSD with exceptions @@ -51,6 +51,9 @@ Patch0: pcre2-10.10-Fix-multilib.patch # Enlarge ovector array match data structure to be large enough in all cases, # in upstream after 10.31-RC1, oss-fuzz #5415 Patch1: pcre2-10.31-RC1-Increment-dummy-ovector-size-in-internal-structures-.patch +# Fix auto-possessification at the end of a capturing group that is called, +# recursively, in upstream after 10.31-RC1, upstream bug #2232 +Patch2: pcre2-10.31-RC1-Fix-auto-possessification-bug-at-the-end-of-a-captur.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -126,6 +129,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %setup -q -n %{name}-%{myversion} %patch0 -p1 %patch1 -p1 +%patch2 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -233,6 +237,10 @@ make %{?_smp_mflags} check VERBOSE=yes %{_mandir}/man1/pcre2test.* %changelog +* Thu Feb 01 2018 Petr Pisar - 10.31-0.3.RC1 +- Fix auto-possessification at the end of a capturing group that is called + recursively (upstream bug #2232) + * Tue Jan 30 2018 Petr Pisar - 10.31-0.2.RC1 - Enlarge ovector array match data structure to be large enough in all cases (oss-fuzz #5415)