10.34-RC1 bump

This commit is contained in:
Petr Písař 2019-10-30 09:45:12 +01:00
parent 96360c9835
commit db0a3cc7dd
22 changed files with 10 additions and 2269 deletions

2
.gitignore vendored
View File

@ -16,3 +16,5 @@
/pcre2-10.33-RC1.tar.bz2
/pcre2-10.33.tar.bz2
/pcre2-10.33.tar.bz2.sig
/pcre2-10.34-RC1.tar.bz2
/pcre2-10.34-RC1.tar.bz2.sig

View File

@ -1,39 +0,0 @@
From bc7fb8964ca3a422f472189b0eff751c1cc377b0 Mon Sep 17 00:00:00 2001
From: zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 9 Sep 2019 07:12:00 +0000
Subject: [PATCH] Add underflow check in JIT.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1167 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
---
src/pcre2_jit_compile.c | 8 ++++++--
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 8cbd8f9..79a27fd 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -5793,12 +5793,16 @@ if (common->match_end_ptr != 0)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
- OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
+ OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
+ add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
}
else
- OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
+ add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
+ }
SLJIT_ASSERT(range_right >= 0);
--
2.21.0

View File

@ -1,54 +0,0 @@
From bcf39c1828399ebc33fb92c4edaf2bdd5f891a58 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Fri, 5 Jul 2019 15:49:37 +0000
Subject: [PATCH] Additional overflow test.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1127 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
testdata/testinput2 | 4 ++++
testdata/testoutput2 | 5 +++++
2 files changed, 9 insertions(+)
diff --git a/testdata/testinput2 b/testdata/testinput2
index 079d6d8..9412bf6 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5591,6 +5591,10 @@ a)"xI
/\[()]{65535}(?<A>)/expand
+# Addition overflow
/( {32742} {42})(?<!\1{65481})/
+# Multiplication overflow
+/(X{65535})(?<=\1{32770})/
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index bfe61a3..950095f 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16940,9 +16940,14 @@ Failed: error 197 at offset 131071: too many capturing groups (maximum 65535)
/\[()]{65535}(?<A>)/expand
Failed: error 197 at offset 131075: too many capturing groups (maximum 65535)
+# Addition overflow
/( {32742} {42})(?<!\1{65481})/
Failed: error 187 at offset 15: lookbehind assertion is too long
+# Multiplication overflow
+/(X{65535})(?<=\1{32770})/
+Failed: error 187 at offset 10: lookbehind assertion is too long
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.20.1

View File

@ -1,108 +0,0 @@
From cdefe642dc2e6b5b8e6703773934813f317bc488 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Thu, 4 Jul 2019 17:01:53 +0000
Subject: [PATCH] Check for integer overflow when computing lookbehind lengths.
Fixes Clusterfuzz issue 13656.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1126 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_compile.c | 38 ++++++++++++++++++++++++++++----------
testdata/testinput2 | 2 ++
testdata/testoutput2 | 3 +++
3 files changed, 33 insertions(+), 10 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index c82c6ca..f6e0a0b 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -9197,8 +9197,26 @@ for (;; pptr++)
case META_MINMAX_QUERY:
if (pptr[1] == pptr[2])
{
- if (pptr[1] == 0) branchlength -= lastitemlength;
- else itemlength = (pptr[1] - 1) * lastitemlength;
+ switch(pptr[1])
+ {
+ case 0:
+ branchlength -= lastitemlength;
+ break;
+
+ case 1:
+ itemlength = 0;
+ break;
+
+ default: /* Check for integer overflow */
+ if (lastitemlength != 0 && /* Should not occur, but just in case */
+ INT_MAX/lastitemlength < pptr[1] - 1)
+ {
+ *errcodeptr = ERR87; /* Integer overflow; lookbehind too big */
+ return -1;
+ }
+ itemlength = (pptr[1] - 1) * lastitemlength;
+ break;
+ }
pptr += 2;
break;
}
@@ -9212,19 +9230,19 @@ for (;; pptr++)
return -1;
}
- /* Add the item length to the branchlength, and save it for use if the next
- thing is a quantifier. */
-
- branchlength += itemlength;
- lastitemlength = itemlength;
-
- /* Ensure that the length does not overflow the limit. */
+ /* Add the item length to the branchlength, checking for integer overflow and
+ for the branch length exceeding the limit. */
- if (branchlength > LOOKBEHIND_MAX)
+ if (INT_MAX - branchlength < (int)itemlength ||
+ (branchlength += itemlength) > LOOKBEHIND_MAX)
{
*errcodeptr = ERR87;
return -1;
}
+
+ /* Save this item length for use if the next item is a quantifier. */
+
+ lastitemlength = itemlength;
}
EXIT:
diff --git a/testdata/testinput2 b/testdata/testinput2
index 8a98f94..079d6d8 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5591,4 +5591,6 @@ a)"xI
/\[()]{65535}(?<A>)/expand
+/( {32742} {42})(?<!\1{65481})/
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 158fbad..bfe61a3 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16940,6 +16940,9 @@ Failed: error 197 at offset 131071: too many capturing groups (maximum 65535)
/\[()]{65535}(?<A>)/expand
Failed: error 197 at offset 131075: too many capturing groups (maximum 65535)
+/( {32742} {42})(?<!\1{65481})/
+Failed: error 187 at offset 15: lookbehind assertion is too long
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.20.1

View File

@ -1,160 +0,0 @@
From 76d59bdbc2d30bad1d11e0490b767058dc33d39c Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Wed, 19 Jun 2019 16:27:50 +0000
Subject: [PATCH] Don't ignore {1}+ when it is applied to a parenthesized item.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1111 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_compile.c | 29 +++++++++++++++++++----------
testdata/testinput1 | 14 ++++++++++++++
testdata/testoutput1 | 18 ++++++++++++++++++
3 files changed, 51 insertions(+), 10 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index cd6fbea..c82c6ca 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -6723,10 +6723,6 @@ for (;; pptr++)
reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
op_type = 0;
- /* If the repeat is {1} we can ignore it. */
-
- if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
-
/* Adjust first and required code units for a zero repeat. */
if (repeat_min == 0)
@@ -6769,7 +6765,10 @@ for (;; pptr++)
tempcode = previous;
op_previous = *previous;
- /* Now handle repetition for the different types of item. */
+ /* Now handle repetition for the different types of item. If the repeat
+ minimum and the repeat maximum are both 1, we can ignore the quantifier for
+ non-parenthesized items, as they have only one alternative. For anything in
+ parentheses, we must not ignore if {1} is possessive. */
switch (op_previous)
{
@@ -6783,6 +6782,7 @@ for (;; pptr++)
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
+ if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
op_type = chartypeoffset[op_previous - OP_CHAR];
/* Deal with UTF characters that take up more than one code unit. */
@@ -6829,6 +6829,7 @@ for (;; pptr++)
code = previous;
goto END_REPEAT;
}
+ if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
if (repeat_min == 0 && repeat_max == REPEAT_UNLIMITED)
*code++ = OP_CRSTAR + repeat_type;
@@ -6863,6 +6864,8 @@ for (;; pptr++)
repetition. */
case OP_RECURSE:
+ if (repeat_max == 1 && repeat_min == 1 && !possessive_quantifier)
+ goto END_REPEAT;
/* Generate unwrapped repeats for a non-zero minimum, except when the
minimum is 1 and the maximum unlimited, because that can be handled with
@@ -6945,6 +6948,9 @@ for (;; pptr++)
PCRE2_UCHAR *bralink = NULL;
PCRE2_UCHAR *brazeroptr = NULL;
+ if (repeat_max == 1 && repeat_min == 1 && !possessive_quantifier)
+ goto END_REPEAT;
+
/* Repeating a DEFINE group (or any group where the condition is always
FALSE and there is only one branch) is pointless, but Perl allows the
syntax, so we just ignore the repeat. */
@@ -7161,11 +7167,12 @@ for (;; pptr++)
and SCRIPT_RUN groups at runtime, but in a different way.]
Then, if the quantifier was possessive and the bracket is not a
- conditional, we convert the BRA code to the POS form, and the KET code to
- KETRPOS. (It turns out to be convenient at runtime to detect this kind of
- subpattern at both the start and at the end.) The use of special opcodes
- makes it possible to reduce greatly the stack usage in pcre2_match(). If
- the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
+ conditional, we convert the BRA code to the POS form, and the KET code
+ to KETRPOS. (It turns out to be convenient at runtime to detect this
+ kind of subpattern at both the start and at the end.) The use of
+ special opcodes makes it possible to reduce greatly the stack usage in
+ pcre2_match(). If the group is preceded by OP_BRAZERO, convert this to
+ OP_BRAPOSZERO.
Then, if the minimum number of matches is 1 or 0, cancel the possessive
flag so that the default action below, of wrapping everything inside
@@ -7266,6 +7273,8 @@ for (;; pptr++)
int prop_type, prop_value;
PCRE2_UCHAR *oldcode;
+ if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
+
op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
mclength = 0; /* Not a character */
diff --git a/testdata/testinput1 b/testdata/testinput1
index 7b6918a..4d9ec5a 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -6351,4 +6351,18 @@ ef) x/x,mark
acb
abc
+/(?:a|ab){1}+c/
+\= Expect no match
+ abc
+
+/(a|ab){1}+c/
+ abc
+
+/(a+){1}+a/
+\= Expect no match
+ aaaa
+
+/(?(DEFINE)(a|ab))(?1){1}+c/
+ abc
+
# End of testinput1
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index d9f8c3b..fffb8ec 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -10063,4 +10063,22 @@ MK: 2
0: a
MK: 2
+/(?:a|ab){1}+c/
+\= Expect no match
+ abc
+No match
+
+/(a|ab){1}+c/
+ abc
+No match
+
+/(a+){1}+a/
+\= Expect no match
+ aaaa
+No match
+
+/(?(DEFINE)(a|ab))(?1){1}+c/
+ abc
+No match
+
# End of testinput1
--
2.20.1

View File

@ -1,37 +0,0 @@
From 9835bbc7fbb3423163dc49e7d822dad2b135e192 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Tue, 15 Oct 2019 10:46:36 +0000
Subject: [PATCH] Ensure regexec is thread safe to avoid sanitizer warnings.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1176 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
---
src/pcre2posix.c | 3 +--
diff --git a/src/pcre2posix.c b/src/pcre2posix.c
index 34a8d80..b24620a 100644
--- a/src/pcre2posix.c
+++ b/src/pcre2posix.c
@@ -323,6 +323,7 @@ if (preg->re_pcre2_code == NULL)
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
preg->re_nsub = (size_t)re_nsub;
preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);
+preg->re_erroffset = (size_t)(-1); /* No meaning after successful compile */
if (preg->re_match_data == NULL)
{
@@ -356,8 +357,6 @@ if ((eflags & REG_NOTBOL) != 0) options |= PCRE2_NOTBOL;
if ((eflags & REG_NOTEOL) != 0) options |= PCRE2_NOTEOL;
if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE2_NOTEMPTY;
-((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
-
/* When REG_NOSUB was specified, or if no vector has been passed in which to
put captured strings, ensure that nmatch is zero. This will stop any attempt to
write to pmatch. */
--
2.21.0

View File

@ -1,89 +0,0 @@
From ec098f6b898334be0674dbadc9fd67a0532fa0eb Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 10 Aug 2019 11:34:50 +0000
Subject: [PATCH] Fix allusedtext bug, rightmost consulted character incorrect
in negative lookaheads.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1157 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
---
src/pcre2_match.c | 1 +
testdata/testinput15 | 4 ++++
testdata/testinput2 | 2 +-
testdata/testoutput15 | 8 ++++++++
testdata/testoutput2 | 2 +-
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 26fc01e..4471183 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -5971,6 +5971,7 @@ in rrc. */
#define LBL(val) case val: goto L_RM##val;
RETURN_SWITCH:
+if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
if (Frdepth == 0) return rrc; /* Exit from the top level */
F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
diff --git a/testdata/testinput15 b/testdata/testinput15
index 2cb712d..5dd6897 100644
--- a/testdata/testinput15
+++ b/testdata/testinput15
@@ -231,4 +231,8 @@
/(*LIMIT_HEAP=21)\[(a)]{60}/expand
\[a]{60}
+/b(?<!ax)(?!cx)/allusedtext
+ abc
+ abcz
+
# End of testinput15
diff --git a/testdata/testinput2 b/testdata/testinput2
index 4377f80..7b44fb0 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4584,7 +4584,7 @@ B)x/alt_verbnames,mark
/abcd/null_context
abcd\=null_context
-\= Expect error
+\= Expect error - not allowed together
abcd\=null_context,find_limits
abcd\=allusedtext,startchar
diff --git a/testdata/testoutput15 b/testdata/testoutput15
index c51cda7..d854412 100644
--- a/testdata/testoutput15
+++ b/testdata/testoutput15
@@ -525,4 +525,12 @@ No match
\[a]{60}
Failed: error -63: heap limit exceeded
+/b(?<!ax)(?!cx)/allusedtext
+ abc
+ 0: abc
+ < >
+ abcz
+ 0: abcz
+ < >>
+
# End of testinput15
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 0fd2187..0803d9e 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14803,7 +14803,7 @@ No match
/abcd/null_context
abcd\=null_context
0: abcd
-\= Expect error
+\= Expect error - not allowed together
abcd\=null_context,find_limits
** Not allowed together: find_limits null_context
abcd\=allusedtext,startchar
--
2.21.0

View File

@ -1,114 +0,0 @@
From 007b635b6788f8317747842b02f9c85137277c20 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Thu, 18 Jul 2019 17:20:29 +0000
Subject: [PATCH] Fix bug in recent patch for lookbehinds within lookaheads.
Fixes ClusterFuzz 15933.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1138 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_compile.c | 22 +++++++++++++---------
testdata/testinput2 | 3 +++
testdata/testoutput2 | 4 ++++
3 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 2ae95ed..b68c154 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -136,7 +136,8 @@ static BOOL
compile_block *);
static int
- check_lookbehinds(uint32_t *, uint32_t **, compile_block *);
+ check_lookbehinds(uint32_t *, uint32_t **, parsed_recurse_check *,
+ compile_block *);
/*************************************************
@@ -9004,7 +9005,7 @@ for (;; pptr++)
case META_LOOKAHEAD:
case META_LOOKAHEADNOT:
- *errcodeptr = check_lookbehinds(pptr + 1, &pptr, cb);
+ *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb);
if (*errcodeptr != 0) return -1;
/* Ignore any qualifiers that follow a lookahead assertion. */
@@ -9326,15 +9327,17 @@ order to process any lookbehinds that they may contain. It stops when it hits a
non-nested closing parenthesis in this case, returning a pointer to it.
Arguments
- pptr points to where to start (start of pattern or start of lookahead)
- retptr if not NULL, return the ket pointer here
- cb points to the compile block
+ pptr points to where to start (start of pattern or start of lookahead)
+ retptr if not NULL, return the ket pointer here
+ recurses chain of recurse_check to catch mutual recursion
+ cb points to the compile block
-Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
+Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
*/
static int
-check_lookbehinds(uint32_t *pptr, uint32_t **retptr, compile_block *cb)
+check_lookbehinds(uint32_t *pptr, uint32_t **retptr,
+ parsed_recurse_check *recurses, compile_block *cb)
{
int errorcode = 0;
int loopcount = 0;
@@ -9449,7 +9452,8 @@ for (; *pptr != META_END; pptr++)
case META_LOOKBEHIND:
case META_LOOKBEHINDNOT:
- if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount, NULL, cb))
+ if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount,
+ recurses, cb))
return errorcode;
break;
}
@@ -9899,7 +9903,7 @@ lengths. */
if (has_lookbehind)
{
- errorcode = check_lookbehinds(cb.parsed_pattern, NULL, &cb);
+ errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb);
if (errorcode != 0) goto HAD_CB_ERROR;
}
diff --git a/testdata/testinput2 b/testdata/testinput2
index d85fc5f..1bfe591 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5600,4 +5600,7 @@ a)"xI
/(?<=(?=.(?<=x)))/
ab\=ph
+# Expect error (recursion => not fixed length)
+/(\2)((?=(?<=\1)))/
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 6405e26..758b4db 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16952,6 +16952,10 @@ Failed: error 187 at offset 10: lookbehind assertion is too long
ab\=ph
No match
+# Expect error (recursion => not fixed length)
+/(\2)((?=(?<=\1)))/
+Failed: error 125 at offset 8: lookbehind assertion is not fixed length
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.20.1

View File

@ -1,210 +0,0 @@
From 8a5ce4c055808fd3a19b0da15e0e3caeb0ac3abb Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 26 Aug 2019 16:28:26 +0000
Subject: [PATCH] Fix bug introduced in commit 1133. Lookbehinds that follow a
condition were not always properly handled.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1160 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_compile.c | 12 +++++-
testdata/testinput1 | 3 ++
testdata/testinput2 | 10 +++++
testdata/testoutput1 | 6 +++
testdata/testoutput2 | 93 ++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 122 insertions(+), 2 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index b68c154..5cae730 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -9411,13 +9411,22 @@ for (; *pptr != META_END; pptr++)
break;
case META_BACKREF_BYNAME:
+ case META_RECURSE_BYNAME:
+ pptr += 1 + SIZEOFFSET;
+ break;
+
case META_COND_DEFINE:
case META_COND_NAME:
case META_COND_NUMBER:
case META_COND_RNAME:
case META_COND_RNUMBER:
- case META_RECURSE_BYNAME:
pptr += 1 + SIZEOFFSET;
+ nestlevel++;
+ break;
+
+ case META_COND_VERSION:
+ pptr += 3;
+ nestlevel++;
break;
case META_CALLOUT_STRING:
@@ -9438,7 +9447,6 @@ for (; *pptr != META_END; pptr++)
break;
case META_CALLOUT_NUMBER:
- case META_COND_VERSION:
pptr += 3;
break;
diff --git a/testdata/testinput1 b/testdata/testinput1
index ee9354b..db1706b 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -6371,4 +6371,7 @@ ef) x/x,mark
/(?<=(?=(?<=a)))b/
ab
+/^(?<A>a)(?(<A>)b)((?<=b).*)$/
+ abc
+
# End of testinput1
diff --git a/testdata/testinput2 b/testdata/testinput2
index 2b1aced..a5f59b9 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5611,4 +5611,14 @@ a)"xI
/\A\s*((?:[^`]{28500}){4}|a)/I
a
+/(?<A>a)(?(<A>)b)((?<=b).*)/B
+
+/(?(1)b)((?<=b).*)/B
+
+/(?(R1)b)((?<=b).*)/B
+
+/(?(DEFINE)b)((?<=b).*)/B
+
+/(?(VERSION=10.4)b)((?<=b).*)/B
+
# End of testinput2
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index c9bfea8..3f872e7 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -10090,4 +10090,10 @@ No match
ab
0: b
+/^(?<A>a)(?(<A>)b)((?<=b).*)$/
+ abc
+ 0: abc
+ 1: a
+ 2: c
+
# End of testinput1
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index d2415c3..6e41ed8 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16983,6 +16983,99 @@ Subject length lower bound = 1
0: a
1: a
+/(?<A>a)(?(<A>)b)((?<=b).*)/B
+------------------------------------------------------------------
+ Bra
+ CBra 1
+ a
+ Ket
+ Cond
+ 1 Cond ref
+ b
+ Ket
+ CBra 2
+ AssertB
+ Reverse
+ b
+ Ket
+ Any*+
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+
+/(?(1)b)((?<=b).*)/B
+------------------------------------------------------------------
+ Bra
+ Cond
+ 1 Cond ref
+ b
+ Ket
+ CBra 1
+ AssertB
+ Reverse
+ b
+ Ket
+ Any*+
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+
+/(?(R1)b)((?<=b).*)/B
+------------------------------------------------------------------
+ Bra
+ Cond
+ Cond recurse 1
+ b
+ Ket
+ CBra 1
+ AssertB
+ Reverse
+ b
+ Ket
+ Any*+
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+
+/(?(DEFINE)b)((?<=b).*)/B
+------------------------------------------------------------------
+ Bra
+ Cond
+ Cond false
+ b
+ Ket
+ CBra 1
+ AssertB
+ Reverse
+ b
+ Ket
+ Any*+
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+
+/(?(VERSION=10.4)b)((?<=b).*)/B
+------------------------------------------------------------------
+ Bra
+ Cond
+ Cond false
+ b
+ Ket
+ CBra 1
+ AssertB
+ Reverse
+ b
+ Ket
+ Any*+
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.21.0

View File

@ -1,42 +0,0 @@
From 6809752eacde104d45c5e11c3c64165857200ce7 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 13 May 2019 16:26:17 +0000
Subject: [PATCH 1/2] Fix crash when \X is used without UTF in JIT.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1091 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
---
testdata/testinput4 | 3 +++
testdata/testoutput4 | 4 ++++
diff --git a/testdata/testinput4 b/testdata/testinput4
index cccab0e..f3d498c 100644
--- a/testdata/testinput4
+++ b/testdata/testinput4
@@ -2480,4 +2480,7 @@
/^(?'אABC'...)(?&אABC)/utf
123123123456
+/\X*/
+ \xF3aaa\xE4\xEA\xEB\xFEa
+
# End of testinput4
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index 84b8b9e..53926ed 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -4012,4 +4012,8 @@ No match
0: 123123
1: 123
+/\X*/
+ \xF3aaa\xE4\xEA\xEB\xFEa
+ 0: \xf3aaa\xe4\xea\xeb\xfea
+
# End of testinput4
--
2.20.1

View File

@ -1,70 +0,0 @@
From 00acf0e2f3f01a3057fc099c60e4f530d744619b Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Wed, 16 Oct 2019 17:12:13 +0000
Subject: [PATCH] Fix error offset bug introduced at 1176.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1179 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_match.c | 4 ++++
testdata/testinput10 | 6 ++++++
testdata/testoutput10 | 10 ++++++++++
3 files changed, 20 insertions(+)
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 9f34e20..cca4d3a 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -6186,6 +6186,10 @@ if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
}
match_data->subject = NULL;
+/* Zero the error offset in case the first code unit is invalid UTF. */
+
+match_data->startchar = 0;
+
/* If the pattern was successfully studied with JIT support, run the JIT
executable instead of the rest of this function. Most options must be set at
compile time for the JIT code to be usable. Fallback to the normal code path if
diff --git a/testdata/testinput10 b/testdata/testinput10
index 4399f82..19d2f2f 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -493,4 +493,10 @@
/(?(á/utf
+/x/utf
+ abxyz
+ \x80\=startchar
+ abc\x80\=startchar
+ abc\x80\=startchar,offset=3
+
# End of testinput10
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index dfecda1..dd91c45 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1651,4 +1651,14 @@ Failed: error 142 at offset 4: syntax error in subpattern name (missing terminat
/(?(á/utf
Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?)
+/x/utf
+ abxyz
+ 0: x
+ \x80\=startchar
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
+ abc\x80\=startchar
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
+ abc\x80\=startchar,offset=3
+Error -36 (bad UTF-8 offset)
+
# End of testinput10
--
2.21.0

View File

@ -1,134 +0,0 @@
From 4c3e518bff94e5f206a63e3a1e5d7e570402786b Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 3 Aug 2019 08:30:40 +0000
Subject: [PATCH] Fix incorrect computation of group length when one branch
exceeded 65535.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1155 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_study.c | 18 ++++++++++++------
testdata/testinput2 | 8 ++++++++
testdata/testoutput2 | 27 +++++++++++++++++++++++++++
3 files changed, 47 insertions(+), 6 deletions(-)
diff --git a/src/pcre2_study.c b/src/pcre2_study.c
index e883c2e..cb5e7f1 100644
--- a/src/pcre2_study.c
+++ b/src/pcre2_study.c
@@ -103,6 +103,7 @@ find_minlength(const pcre2_real_code *re, PCRE2_SPTR code,
int *backref_cache)
{
int length = -1;
+int branchlength = 0;
int prev_cap_recno = -1;
int prev_cap_d = 0;
int prev_recurse_recno = -1;
@@ -110,9 +111,9 @@ int prev_recurse_d = 0;
uint32_t once_fudge = 0;
BOOL had_recurse = FALSE;
BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
-recurse_check this_recurse;
-int branchlength = 0;
+PCRE2_SPTR nextbranch = code + GET(code, 1);
PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
+recurse_check this_recurse;
/* If this is a "could be empty" group, its minimum length is 0. */
@@ -128,16 +129,20 @@ if ((*countptr)++ > 1000) return -1;
/* Scan along the opcodes for this branch. If we get to the end of the branch,
check the length against that of the other branches. If the accumulated length
-passes 16-bits, stop. */
+passes 16-bits, reset to that value and skip the rest of the branch. */
for (;;)
{
int d, min, recno;
- PCRE2_UCHAR *cs, *ce;
- PCRE2_UCHAR op = *cc;
+ PCRE2_UCHAR op, *cs, *ce;
- if (branchlength >= UINT16_MAX) return UINT16_MAX;
+ if (branchlength >= UINT16_MAX)
+ {
+ branchlength = UINT16_MAX;
+ cc = (PCRE2_UCHAR *)nextbranch;
+ }
+ op = *cc;
switch (op)
{
case OP_COND:
@@ -227,6 +232,7 @@ for (;;)
if (length < 0 || (!had_recurse && branchlength < length))
length = branchlength;
if (op != OP_ALT) return length;
+ nextbranch = cc + GET(cc, 1);
cc += 1 + LINK_SIZE;
branchlength = 0;
had_recurse = FALSE;
diff --git a/testdata/testinput2 b/testdata/testinput2
index 1bfe591..384239a 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5603,4 +5603,12 @@ a)"xI
# Expect error (recursion => not fixed length)
/(\2)((?=(?<=\1)))/
+/\A\s*(a|(?:[^`]{28500}){4})/I
+ a
+
+/\A\s*((?:[^`]{28500}){4})/I
+
+/\A\s*((?:[^`]{28500}){4}|a)/I
+ a
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 758b4db..0983741 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16956,6 +16956,33 @@ No match
/(\2)((?=(?<=\1)))/
Failed: error 125 at offset 8: lookbehind assertion is not fixed length
+/\A\s*(a|(?:[^`]{28500}){4})/I
+Capture group count = 1
+Max lookbehind = 1
+Compile options: <none>
+Overall options: anchored
+Subject length lower bound = 1
+ a
+ 0: a
+ 1: a
+
+/\A\s*((?:[^`]{28500}){4})/I
+Capture group count = 1
+Max lookbehind = 1
+Compile options: <none>
+Overall options: anchored
+Subject length lower bound = 65535
+
+/\A\s*((?:[^`]{28500}){4}|a)/I
+Capture group count = 1
+Max lookbehind = 1
+Compile options: <none>
+Overall options: anchored
+Subject length lower bound = 1
+ a
+ 0: a
+ 1: a
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.20.1

View File

@ -1,217 +0,0 @@
From 44c8382acfe0902b302e0d7a5b1c6d9ee9226a51 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Tue, 16 Jul 2019 15:06:21 +0000
Subject: [PATCH] Fix lookbehind within lookahead within lookbehind
misbehaviour bug.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1133 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_compile.c | 58 +++++++++++++++++++++++++++++---------------
testdata/testinput1 | 6 +++++
testdata/testinput2 | 3 +++
testdata/testoutput1 | 9 +++++++
testdata/testoutput2 | 4 +++
5 files changed, 61 insertions(+), 19 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index f6e0a0b..2ae95ed 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -135,6 +135,8 @@ static BOOL
set_lookbehind_lengths(uint32_t **, int *, int *, parsed_recurse_check *,
compile_block *);
+static int
+ check_lookbehinds(uint32_t *, uint32_t **, compile_block *);
/*************************************************
@@ -8997,15 +8999,15 @@ for (;; pptr++)
}
break;
- /* Lookaheads can be ignored, but we must start the skip inside the group
- so that it isn't treated as a group within the branch. */
+ /* Lookaheads do not contribute to the length of this branch, but they may
+ contain lookbehinds within them whose lengths need to be set. */
case META_LOOKAHEAD:
case META_LOOKAHEADNOT:
- pptr = parsed_skip(pptr + 1, PSKIP_KET);
- if (pptr == NULL) goto PARSED_SKIP_FAILED;
+ *errcodeptr = check_lookbehinds(pptr + 1, &pptr, cb);
+ if (*errcodeptr != 0) return -1;
- /* Also ignore any qualifiers that follow a lookahead assertion. */
+ /* Ignore any qualifiers that follow a lookahead assertion. */
switch (pptr[1])
{
@@ -9319,20 +9321,28 @@ set_lookbehind_lengths() for each one. At the start, the errorcode is zero and
the error offset is marked unset. The enables the functions above not to
override settings from deeper nestings.
-Arguments cb points to the compile block
-Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
+This function is called recursively from get_branchlength() for lookaheads in
+order to process any lookbehinds that they may contain. It stops when it hits a
+non-nested closing parenthesis in this case, returning a pointer to it.
+
+Arguments
+ pptr points to where to start (start of pattern or start of lookahead)
+ retptr if not NULL, return the ket pointer here
+ cb points to the compile block
+
+Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
*/
static int
-check_lookbehinds(compile_block *cb)
+check_lookbehinds(uint32_t *pptr, uint32_t **retptr, compile_block *cb)
{
-uint32_t *pptr;
int errorcode = 0;
int loopcount = 0;
+int nestlevel = 0;
cb->erroroffset = PCRE2_UNSET;
-for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
+for (; *pptr != META_END; pptr++)
{
if (*pptr < META_END) continue; /* Literal */
@@ -9346,14 +9356,30 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
pptr += 1;
break;
+ case META_KET:
+ if (--nestlevel < 0)
+ {
+ if (retptr != NULL) *retptr = pptr;
+ return 0;
+ }
+ break;
+
+ case META_ATOMIC:
+ case META_CAPTURE:
+ case META_COND_ASSERT:
+ case META_LOOKAHEAD:
+ case META_LOOKAHEADNOT:
+ case META_NOCAPTURE:
+ case META_SCRIPT_RUN:
+ nestlevel++;
+ break;
+
case META_ACCEPT:
case META_ALT:
case META_ASTERISK:
case META_ASTERISK_PLUS:
case META_ASTERISK_QUERY:
- case META_ATOMIC:
case META_BACKREF:
- case META_CAPTURE:
case META_CIRCUMFLEX:
case META_CLASS:
case META_CLASS_EMPTY:
@@ -9361,14 +9387,9 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
case META_CLASS_END:
case META_CLASS_NOT:
case META_COMMIT:
- case META_COND_ASSERT:
case META_DOLLAR:
case META_DOT:
case META_FAIL:
- case META_KET:
- case META_LOOKAHEAD:
- case META_LOOKAHEADNOT:
- case META_NOCAPTURE:
case META_PLUS:
case META_PLUS_PLUS:
case META_PLUS_QUERY:
@@ -9378,7 +9399,6 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
case META_QUERY_QUERY:
case META_RANGE_ESCAPED:
case META_RANGE_LITERAL:
- case META_SCRIPT_RUN:
case META_SKIP:
case META_THEN:
break;
@@ -9879,7 +9899,7 @@ lengths. */
if (has_lookbehind)
{
- errorcode = check_lookbehinds(&cb);
+ errorcode = check_lookbehinds(cb.parsed_pattern, NULL, &cb);
if (errorcode != 0) goto HAD_CB_ERROR;
}
diff --git a/testdata/testinput1 b/testdata/testinput1
index 4d9ec5a..ee9354b 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -6365,4 +6365,10 @@ ef) x/x,mark
/(?(DEFINE)(a|ab))(?1){1}+c/
abc
+/(?<=(?=.(?<=x)))/aftertext
+ abx
+
+/(?<=(?=(?<=a)))b/
+ ab
+
# End of testinput1
diff --git a/testdata/testinput2 b/testdata/testinput2
index 9412bf6..d85fc5f 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5597,4 +5597,7 @@ a)"xI
# Multiplication overflow
/(X{65535})(?<=\1{32770})/
+/(?<=(?=.(?<=x)))/
+ ab\=ph
+
# End of testinput2
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index fffb8ec..c9bfea8 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -10081,4 +10081,13 @@ No match
abc
No match
+/(?<=(?=.(?<=x)))/aftertext
+ abx
+ 0:
+ 0+ x
+
+/(?<=(?=(?<=a)))b/
+ ab
+ 0: b
+
# End of testinput1
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 950095f..6405e26 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16948,6 +16948,10 @@ Failed: error 187 at offset 15: lookbehind assertion is too long
/(X{65535})(?<=\1{32770})/
Failed: error 187 at offset 10: lookbehind assertion is too long
+/(?<=(?=.(?<=x)))/
+ ab\=ph
+No match
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.20.1

View File

@ -1,130 +0,0 @@
From 427e9b2fffc46d6b49a31df34c8b120bffc2ea60 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Wed, 26 Jun 2019 16:13:28 +0000
Subject: [PATCH] Fix partial matching bug in pcre2_dfa_match().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1123 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
---
src/pcre2_dfa_match.c | 11 ++++-----
testdata/testinput6 | 22 +++++++++++++++++
testdata/testoutput6 | 36 ++++++++++++++++++++++++++++
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index 911e9b9..538d15d 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -3152,8 +3152,8 @@ for (;;)
/* We have finished the processing at the current subject character. If no
new states have been set for the next character, we have found all the
- matches that we are going to find. If we are at the top level and partial
- matching has been requested, check for appropriate conditions.
+ matches that we are going to find. If partial matching has been requested,
+ check for appropriate conditions.
The "forced_ fail" variable counts the number of (*F) encountered for the
character. If it is equal to the original active_count (saved in
@@ -3165,8 +3165,7 @@ for (;;)
if (new_count <= 0)
{
- if (rlevel == 1 && /* Top level, and */
- could_continue && /* Some could go on, and */
+ if (could_continue && /* Some could go on, and */
forced_fail != workspace[1] && /* Not all forced fail & */
( /* either... */
(mb->moptions & PCRE2_PARTIAL_HARD) != 0 /* Hard partial */
@@ -3175,8 +3174,8 @@ for (;;)
match_count < 0) /* no matches */
) && /* And... */
(
- partial_newline || /* Either partial NL */
- ( /* or ... */
+ partial_newline || /* Either partial NL */
+ ( /* or ... */
ptr >= end_subject && /* End of subject and */
ptr > mb->start_used_ptr) /* Inspected non-empty string */
)
diff --git a/testdata/testinput6 b/testdata/testinput6
index 403e3fa..cc3ebd0 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -4972,4 +4972,26 @@
\= Expect no match
0
+/(?<=pqr)abc(?=xyz)/
+ 123pqrabcxy\=ps,allusedtext
+ 123pqrabcxyz\=ps,allusedtext
+
+/(?>a+b)/
+ aaaa\=ps
+ aaaab\=ps
+
+/(abc)(?1)/
+ abca\=ps
+ abcabc\=ps
+
+/(?(?=abc).*|Z)/
+ ab\=ps
+ abcxyz\=ps
+
+/(abc)++x/
+ abcab\=ps
+ abc\=ps
+ ab\=ps
+ abcx
+
# End of testinput6
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index 6a975dd..61cbfe2 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -7809,4 +7809,40 @@ No match
0
No match
+/(?<=pqr)abc(?=xyz)/
+ 123pqrabcxy\=ps,allusedtext
+Partial match: pqrabcxy
+ <<<
+ 123pqrabcxyz\=ps,allusedtext
+ 0: pqrabcxyz
+ <<< >>>
+
+/(?>a+b)/
+ aaaa\=ps
+Partial match: aaaa
+ aaaab\=ps
+ 0: aaaab
+
+/(abc)(?1)/
+ abca\=ps
+Partial match: abca
+ abcabc\=ps
+ 0: abcabc
+
+/(?(?=abc).*|Z)/
+ ab\=ps
+Partial match: ab
+ abcxyz\=ps
+ 0: abcxyz
+
+/(abc)++x/
+ abcab\=ps
+Partial match: abcab
+ abc\=ps
+Partial match: abc
+ ab\=ps
+Partial match: ab
+ abcx
+ 0: abcx
+
# End of testinput6
--
2.20.1

View File

@ -1,382 +0,0 @@
From e29388de53ea3a4f9d1c6b4932613681493ac9dc Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 15 Jun 2019 15:51:07 +0000
Subject: [PATCH] Fix pcre2grep -o bug when ovector overflows; add option to
adjust the limit; raise the default limit; give error if -o requests an
uncaptured parens.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1106 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
RunGrepTest | 7 ++++++
doc/html/pcre2api.html | 12 +++++-----
doc/html/pcre2grep.html | 28 +++++++++++++++-------
doc/html/pcre2test.html | 4 +++-
doc/pcre2grep.1 | 26 +++++++++++++-------
doc/pcre2grep.txt | 43 ++++++++++++++++++++-------------
doc/pcre2test.txt | 4 +++-
src/pcre2grep.c | 53 ++++++++++++++++++++++++++++-------------
testdata/grepoutput | 7 ++++++
9 files changed, 126 insertions(+), 58 deletions(-)
diff --git a/RunGrepTest b/RunGrepTest
index bac1f1b..ea37f70 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -653,6 +653,13 @@ printf 'ABC\0XYZ\nABCDEF\nDEFABC\n' >testtemp2grep
$valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep
echo "RC=$?" >>testtrygrep
+echo "---------------------------- Test 127 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -o --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 128 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
# Now compare the results.
diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html
index 7ca39f5..84f4442 100644
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@@ -2252,12 +2252,12 @@ segment.
PCRE2_INFO_MINLENGTH
</pre>
If a minimum length for matching subject strings was computed, its value is
-returned. Otherwise the returned value is 0. The value is a number of
-characters, which in UTF mode may be different from the number of code units.
-The third argument should point to an <b>uint32_t</b> variable. The value is a
-lower bound to the length of any matching string. There may not be any strings
-of that length that do actually match, but every string that does match is at
-least that long.
+returned. Otherwise the returned value is 0. This value is not computed when
+PCRE2_NO_START_OPTIMIZE is set. The value is a number of characters, which in
+UTF mode may be different from the number of code units. The third argument
+should point to an <b>uint32_t</b> variable. The value is a lower bound to the
+length of any matching string. There may not be any strings of that length that
+do actually match, but every string that does match is at least that long.
<pre>
PCRE2_INFO_NAMECOUNT
PCRE2_INFO_NAMEENTRYSIZE
diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html
index d66cee3..de699e7 100644
--- a/doc/html/pcre2grep.html
+++ b/doc/html/pcre2grep.html
@@ -685,20 +685,32 @@ otherwise empty line. This option is mutually exclusive with <b>--output</b>,
<P>
<b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
Show only the part of the line that matched the capturing parentheses of the
-given number. Up to 32 capturing parentheses are supported, and -o0 is
-equivalent to <b>-o</b> without a number. Because these options can be given
-without an argument (see above), if an argument is present, it must be given in
-the same shell item, for example, -o3 or --only-matching=2. The comments given
-for the non-argument case above also apply to this option. If the specified
-capturing parentheses do not exist in the pattern, or were not set in the
-match, nothing is output unless the file name or line number are being output.
+given number. Up to 50 capturing parentheses are supported by default. This
+limit can be changed via the <b>--om-capture</b> option. A pattern may contain
+any number of capturing parentheses, but only those whose number is within the
+limit can be accessed by <b>-o</b>. An error occurs if the number specified by
+<b>-o</b> is greater than the limit.
+<br>
+<br>
+-o0 is the same as <b>-o</b> without a number. Because these options can be
+given without an argument (see above), if an argument is present, it must be
+given in the same shell item, for example, -o3 or --only-matching=2. The
+comments given for the non-argument case above also apply to this option. If
+the specified capturing parentheses do not exist in the pattern, or were not
+set in the match, nothing is output unless the file name or line number are
+being output.
<br>
<br>
If this option is given multiple times, multiple substrings are output for each
match, in the order the options are given, and all on one line. For example,
-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and
then 3 again to be output. By default, there is no separator (but see the next
-option).
+but one option).
+</P>
+<P>
+<b>--om-capture</b>=<i>number</i>
+Set the number of capturing parentheses that can be accessed by <b>-o</b>. The
+default is 50.
</P>
<P>
<b>--om-separator</b>=<i>text</i>
diff --git a/doc/html/pcre2test.html b/doc/html/pcre2test.html
index 083d5cc..4be47c6 100644
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@@ -738,7 +738,9 @@ options, the line is omitted. "First code unit" is where any match must start;
if there is more than one they are listed as "starting code units". "Last code
unit" is the last literal code unit that must be present in any match. This is
not necessarily the last character. These lines are omitted if no starting or
-ending code units are recorded.
+ending code units are recorded. The subject length line is omitted when
+<b>no_start_optimize</b> is set because the minimum length is not calculated
+when it can never be used.
</P>
<P>
The <b>framesize</b> modifier shows the size, in bytes, of the storage frames
diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1
index 6b3219b..1dcdb68 100644
--- a/doc/pcre2grep.1
+++ b/doc/pcre2grep.1
@@ -596,19 +596,29 @@ otherwise empty line. This option is mutually exclusive with \fB--output\fP,
.TP
\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP
Show only the part of the line that matched the capturing parentheses of the
-given number. Up to 32 capturing parentheses are supported, and -o0 is
-equivalent to \fB-o\fP without a number. Because these options can be given
-without an argument (see above), if an argument is present, it must be given in
-the same shell item, for example, -o3 or --only-matching=2. The comments given
-for the non-argument case above also apply to this option. If the specified
-capturing parentheses do not exist in the pattern, or were not set in the
-match, nothing is output unless the file name or line number are being output.
+given number. Up to 50 capturing parentheses are supported by default. This
+limit can be changed via the \fB--om-capture\fP option. A pattern may contain
+any number of capturing parentheses, but only those whose number is within the
+limit can be accessed by \fB-o\fP. An error occurs if the number specified by
+\fB-o\fP is greater than the limit.
+.sp
+-o0 is the same as \fB-o\fP without a number. Because these options can be
+given without an argument (see above), if an argument is present, it must be
+given in the same shell item, for example, -o3 or --only-matching=2. The
+comments given for the non-argument case above also apply to this option. If
+the specified capturing parentheses do not exist in the pattern, or were not
+set in the match, nothing is output unless the file name or line number are
+being output.
.sp
If this option is given multiple times, multiple substrings are output for each
match, in the order the options are given, and all on one line. For example,
-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and
then 3 again to be output. By default, there is no separator (but see the next
-option).
+but one option).
+.TP
+\fB--om-capture\fP=\fInumber\fP
+Set the number of capturing parentheses that can be accessed by \fB-o\fP. The
+default is 50.
.TP
\fB--om-separator\fP=\fItext\fP
Specify a separating string for multiple occurrences of \fB-o\fP. The default
diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt
index cd44fe0..2920643 100644
--- a/doc/pcre2grep.txt
+++ b/doc/pcre2grep.txt
@@ -662,23 +662,32 @@ OPTIONS
-onumber, --only-matching=number
Show only the part of the line that matched the capturing
- parentheses of the given number. Up to 32 capturing parenthe-
- ses are supported, and -o0 is equivalent to -o without a num-
- ber. Because these options can be given without an argument
- (see above), if an argument is present, it must be given in
- the same shell item, for example, -o3 or --only-matching=2.
- The comments given for the non-argument case above also apply
- to this option. If the specified capturing parentheses do not
- exist in the pattern, or were not set in the match, nothing
- is output unless the file name or line number are being out-
- put.
-
- If this option is given multiple times, multiple substrings
- are output for each match, in the order the options are
- given, and all on one line. For example, -o3 -o1 -o3 causes
- the substrings matched by capturing parentheses 3 and 1 and
- then 3 again to be output. By default, there is no separator
- (but see the next option).
+ parentheses of the given number. Up to 50 capturing parenthe-
+ ses are supported by default. This limit can be changed via
+ the --om-capture option. A pattern may contain any number of
+ capturing parentheses, but only those whose number is within
+ the limit can be accessed by -o. An error occurs if the num-
+ ber specified by -o is greater than the limit.
+
+ -o0 is the same as -o without a number. Because these options
+ can be given without an argument (see above), if an argument
+ is present, it must be given in the same shell item, for
+ example, -o3 or --only-matching=2. The comments given for the
+ non-argument case above also apply to this option. If the
+ specified capturing parentheses do not exist in the pattern,
+ or were not set in the match, nothing is output unless the
+ file name or line number are being output.
+
+ If this option is given multiple times, multiple substrings
+ are output for each match, in the order the options are
+ given, and all on one line. For example, -o3 -o1 -o3 causes
+ the substrings matched by capturing parentheses 3 and 1 and
+ then 3 again to be output. By default, there is no separator
+ (but see the next but one option).
+
+ --om-capture=number
+ Set the number of capturing parentheses that can be accessed
+ by -o. The default is 50.
--om-separator=text
Specify a separating string for multiple occurrences of -o.
diff --git a/doc/pcre2test.txt b/doc/pcre2test.txt
index cbe3528..f287f6d 100644
--- a/doc/pcre2test.txt
+++ b/doc/pcre2test.txt
@@ -669,7 +669,9 @@ PATTERN MODIFIERS
as "starting code units". "Last code unit" is the last literal code
unit that must be present in any match. This is not necessarily the
last character. These lines are omitted if no starting or ending code
- units are recorded.
+ units are recorded. The subject length line is omitted when
+ no_start_optimize is set because the minimum length is not calculated
+ when it can never be used.
The framesize modifier shows the size, in bytes, of the storage frames
used by pcre2_match() for handling backtracking. The size depends on
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
index a3cc3ec..d17cd2a 100644
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@@ -115,7 +115,7 @@ MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
typedef int BOOL;
-#define OFFSET_SIZE 33
+#define DEFAULT_CAPTURE_MAX 50
#if BUFSIZ > 8192
#define MAXPATLEN BUFSIZ
@@ -242,6 +242,8 @@ static pcre2_compile_context *compile_context;
static pcre2_match_context *match_context;
static pcre2_match_data *match_data;
static PCRE2_SIZE *offsets;
+static uint32_t offset_size;
+static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
static BOOL count_only = FALSE;
static BOOL do_colour = FALSE;
@@ -391,6 +393,7 @@ used to identify them. */
#define N_INCLUDE_FROM (-21)
#define N_OM_SEPARATOR (-22)
#define N_MAX_BUFSIZE (-23)
+#define N_OM_CAPTURE (-24)
static option_item optionlist[] = {
{ OP_NODATA, N_NULL, NULL, "", "terminate options" },
@@ -437,6 +440,7 @@ static option_item optionlist[] = {
{ OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
{ OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
{ OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
+ { OP_U32NUMBER, N_OM_CAPTURE, &capture_max, "om-capture=n", "set capture count for --only-matching" },
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
{ OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
{ OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
@@ -2568,7 +2572,7 @@ while (ptr < endptr)
for (i = 0; i < jfriedl_XR; i++)
match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
- PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
+ PCRE2_NOTEMPTY, offsets, offset_size) >= 0);
if (gettimeofday(&end_time, &dummy) != 0)
perror("bad gettimeofday");
@@ -2688,7 +2692,7 @@ while (ptr < endptr)
for (om = only_matching; om != NULL; om = om->next)
{
int n = om->groupnum;
- if (n < mrc)
+ if (n == 0 || n < mrc)
{
int plen = offsets[2*n + 1] - offsets[2*n];
if (plen > 0)
@@ -3639,6 +3643,7 @@ int rc = 1;
BOOL only_one_at_top;
patstr *cp;
fnstr *fn;
+omstr *om;
const char *locale_from = "--locale";
#ifdef SUPPORT_PCRE2GREP_JIT
@@ -3655,20 +3660,6 @@ must use STDOUT_NL to terminate lines. */
_setmode(_fileno(stdout), _O_BINARY);
#endif
-/* Set up a default compile and match contexts and a match data block. */
-
-compile_context = pcre2_compile_context_create(NULL);
-match_context = pcre2_match_context_create(NULL);
-match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
-offsets = pcre2_get_ovector_pointer(match_data);
-
-/* If string (script) callouts are supported, set up the callout processing
-function. */
-
-#ifdef SUPPORT_PCRE2GREP_CALLOUT
-pcre2_set_callout(match_context, pcre2grep_callout, NULL);
-#endif
-
/* Process the options */
for (i = 1; i < argc; i++)
@@ -4015,12 +4006,40 @@ if (only_matching_count > 1)
pcre2grep_exit(usage(2));
}
+/* Check that there is a big enough ovector for all -o settings. */
+
+for (om = only_matching; om != NULL; om = om->next)
+ {
+ int n = om->groupnum;
+ if (n > (int)capture_max)
+ {
+ fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
+ fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
+ goto EXIT2;
+ }
+ }
+
/* Check the text supplied to --output for errors. */
if (output_text != NULL &&
!syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
goto EXIT2;
+/* Set up default compile and match contexts and a match data block. */
+
+offset_size = capture_max + 1;
+compile_context = pcre2_compile_context_create(NULL);
+match_context = pcre2_match_context_create(NULL);
+match_data = pcre2_match_data_create(offset_size, NULL);
+offsets = pcre2_get_ovector_pointer(match_data);
+
+/* If string (script) callouts are supported, set up the callout processing
+function. */
+
+#ifdef SUPPORT_PCRE2GREP_CALLOUT
+pcre2_set_callout(match_context, pcre2grep_callout, NULL);
+#endif
+
/* Put limits into the match data block. */
if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
diff --git a/testdata/grepoutput b/testdata/grepoutput
index 2bd69be..a9297e1 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -949,3 +949,10 @@ RC=0
---------------------------- Test 126 -----------------------------
ABCXYZ
RC=0
+---------------------------- Test 127 -----------------------------
+pattern
+RC=0
+---------------------------- Test 128 -----------------------------
+pcre2grep: Requested group 1 cannot be captured.
+pcre2grep: Use --om-capture to increase the size of the capture vector.
+RC=2
--
2.20.1

View File

@ -1,44 +0,0 @@
From 41d88497fd138f8daef7db674f21ca71e5f5822c Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 11 May 2019 11:43:39 +0000
Subject: [PATCH] Fix typo.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1090 6239d852-aaf2-0410-a92c-79f79f948069
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
doc/pcre2unicode.3 | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/doc/pcre2unicode.3 b/doc/pcre2unicode.3
index 56eb1ea..e444422 100644
--- a/doc/pcre2unicode.3
+++ b/doc/pcre2unicode.3
@@ -1,4 +1,4 @@
-.TH PCRE2UNICODE 3 "06 March 2019" "PCRE2 10.33"
+.TH PCRE2UNICODE 3 "11 May 2019" "PCRE2 10.33"
.SH NAME
PCRE - Perl-compatible regular expressions (revised API)
.SH "UNICODE AND UTF SUPPORT"
@@ -330,7 +330,7 @@ these code points are excluded by RFC 3629.
.sp
PCRE2_ERROR_UTF8_ERR13
.sp
-A 4-byte character has a value greater than 0x10fff; these code points are
+A 4-byte character has a value greater than 0x10ffff; these code points are
excluded by RFC 3629.
.sp
PCRE2_ERROR_UTF8_ERR14
@@ -400,6 +400,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 06 March 2019
+Last updated: 11 May 2019
Copyright (c) 1997-2019 University of Cambridge.
.fi
--
2.20.1

View File

@ -1,68 +0,0 @@
From 2a182963f0b897ea898693771b885a5cead47826 Mon Sep 17 00:00:00 2001
From: zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sun, 6 Oct 2019 03:36:20 +0000
Subject: [PATCH] Fix use after free and compilation error in JIT.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1175 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_jit_compile.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index bd423a7..aea563d 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -13749,7 +13749,7 @@ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
sljit_free_compiler(compiler);
SLJIT_FREE(common->optimized_cbracket, allocator_data);
SLJIT_FREE(common->private_data_ptrs, allocator_data);
- PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
+ PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
@@ -13803,7 +13803,7 @@ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
sljit_free_compiler(compiler);
SLJIT_FREE(common->optimized_cbracket, allocator_data);
SLJIT_FREE(common->private_data_ptrs, allocator_data);
- PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
+ PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
@@ -13892,7 +13892,7 @@ while (common->currententry != NULL)
sljit_free_compiler(compiler);
SLJIT_FREE(common->optimized_cbracket, allocator_data);
SLJIT_FREE(common->private_data_ptrs, allocator_data);
- PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
+ PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
flush_stubs(common);
@@ -14044,7 +14044,7 @@ while (label_addr != NULL)
sljit_free_compiler(compiler);
if (executable_func == NULL)
{
- PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
+ PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
@@ -14059,7 +14059,7 @@ else
/* This case is highly unlikely since we just recently
freed a lot of memory. Not impossible though. */
sljit_free_code(executable_func);
- PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);
+ PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
memset(functions, 0, sizeof(executable_functions));
--
2.21.0

View File

@ -1,34 +0,0 @@
From 9987891e7a0c885a66161cc1e62f94f314281fe7 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 13 May 2019 16:38:18 +0000
Subject: [PATCH 2/2] Forgot this file in previous commit. Fixes JIT non-UTF
bug.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1092 6239d852-aaf2-0410-a92c-79f79f948069
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_jit_compile.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index ae0fbcf..a19ce8b 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -8571,7 +8571,10 @@ int lgb, rgb, ricount;
PCRE2_SPTR bptr;
uint32_t c;
-GETCHARINC(c, cc);
+/* Patch by PH */
+/* GETCHARINC(c, cc); */
+
+c = *cc++;
#if PCRE2_CODE_UNIT_WIDTH == 32
if (c >= 0x110000)
return NULL;
--
2.20.1

View File

@ -1,174 +0,0 @@
From a38f1e7eb827408133178ffac9987157d82edaa2 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 22 Apr 2019 12:39:38 +0000
Subject: [PATCH] Implement a check on the number of capturing parentheses,
which for some reason has never existed. This fixes ClusterFuzz issue 14376.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1088 6239d852-aaf2-0410-a92c-79f79f948069
Petr Pisar: Ported to 10.33.
src/pcre2.h.in | 1 +
src/pcre2_compile.c | 12 +++++++++++-
src/pcre2_error.c | 1 +
testdata/testinput11 | 2 ++
testdata/testinput2 | 4 ++++
testdata/testinput9 | 2 ++
testdata/testoutput11-16 | 3 +++
testdata/testoutput11-32 | 2 ++
testdata/testoutput2 | 6 ++++++
testdata/testoutput9 | 3 +++
diff --git a/src/pcre2.h.in b/src/pcre2.h.in
index 9415d70..29f3688 100644
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@@ -305,6 +305,7 @@ pcre2_pattern_convert(). */
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
+#define PCRE2_ERROR_TOO_MANY_CAPTURES 197
/* "Expected" matching error codes: no match and partial match. */
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 068735a..cd6fbea 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -781,7 +781,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
- ERR91, ERR92, ERR93, ERR94, ERR95, ERR96 };
+ ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97 };
/* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@@ -3611,6 +3611,11 @@ while (ptr < ptrend)
nest_depth++;
if ((options & PCRE2_NO_AUTO_CAPTURE) == 0)
{
+ if (cb->bracount >= MAX_GROUP_NUMBER)
+ {
+ errorcode = ERR97;
+ goto FAILED;
+ }
cb->bracount++;
*parsed_pattern++ = META_CAPTURE | cb->bracount;
}
@@ -4435,6 +4440,11 @@ while (ptr < ptrend)
/* We have a name for this capturing group. It is also assigned a number,
which is its primary means of identification. */
+ if (cb->bracount >= MAX_GROUP_NUMBER)
+ {
+ errorcode = ERR97;
+ goto FAILED;
+ }
cb->bracount++;
*parsed_pattern++ = META_CAPTURE | cb->bracount;
nest_depth++;
diff --git a/src/pcre2_error.c b/src/pcre2_error.c
index 1d02cf1..5517e74 100644
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@@ -184,6 +184,7 @@ static const unsigned char compile_error_texts[] =
/* 95 */
"(*alpha_assertion) not recognized\0"
"script runs require Unicode support, which this version of PCRE2 does not have\0"
+ "too many capturing groups (maximum 65535)\0"
;
/* Match-time and UTF error texts are in the same format. */
diff --git a/testdata/testinput11 b/testdata/testinput11
index 2d267d6..fca6042 100644
--- a/testdata/testinput11
+++ b/testdata/testinput11
@@ -368,4 +368,6 @@
abÿAz
ab\x{80000041}z
+/\[()]{65535}/expand
+
# End of testinput11
diff --git a/testdata/testinput2 b/testdata/testinput2
index 9e59b62..8a98f94 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5587,4 +5587,8 @@ a)"xI
\= Expect error message
abc\=null_context
+/\[()]{65535}()/expand
+
+/\[()]{65535}(?<A>)/expand
+
# End of testinput2
diff --git a/testdata/testinput9 b/testdata/testinput9
index 7be4b15..792d610 100644
--- a/testdata/testinput9
+++ b/testdata/testinput9
@@ -260,4 +260,6 @@
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/
+/\[()]{65535}/expand
+
# End of testinput9
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
index 78bf7fb..f2b9637 100644
--- a/testdata/testoutput11-16
+++ b/testdata/testoutput11-16
@@ -661,4 +661,7 @@ Subject length lower bound = 1
abÿAz
ab\x{80000041}z
+/\[()]{65535}/expand
+Failed: error 120 at offset 131070: regular expression is too large
+
# End of testinput11
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
index 4b00384..1908ab7 100644
--- a/testdata/testoutput11-32
+++ b/testdata/testoutput11-32
@@ -667,4 +667,6 @@ Subject length lower bound = 1
ab\x{80000041}z
0: ab\x{80000041}z
+/\[()]{65535}/expand
+
# End of testinput11
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 2f91c38..158fbad 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16934,6 +16934,12 @@ Subject length lower bound = 0
abc\=null_context
** Replacement callouts are not supported with null_context.
+/\[()]{65535}()/expand
+Failed: error 197 at offset 131071: too many capturing groups (maximum 65535)
+
+/\[()]{65535}(?<A>)/expand
+Failed: error 197 at offset 131075: too many capturing groups (maximum 65535)
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
diff --git a/testdata/testoutput9 b/testdata/testoutput9
index f98f276..f66ca3d 100644
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@@ -367,4 +367,7 @@ Failed: error 134 at offset 14: character code point value in \x{} or \o{} is to
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':Æ¿)/
Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
+/\[()]{65535}/expand
+Failed: error 120 at offset 131070: regular expression is too large
+
# End of testinput9
--
2.20.1

View File

@ -1,83 +0,0 @@
From 993928201df0c42258f32da6c882b791b1fab441 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Thu, 13 Jun 2019 16:49:40 +0000
Subject: [PATCH] Make pcre2_match() return (*MARK) names from successful
conditional assertions, as Perl and the JIT do.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1104 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.
---
src/pcre2_match.c | 7 ++++---
testdata/testinput1 | 8 ++++++++
testdata/testoutput1 | 14 ++++++++++++++
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 849bb58..c7730a2 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -5472,15 +5472,16 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* If we are at the end of an assertion that is a condition, return a
match, discarding any intermediate backtracking points. Copy back the
- captures into the frame before N so that they are set on return. Doing
- this for all assertions, both positive and negative, seems to match what
- Perl does. */
+ mark setting and the captures into the frame before N so that they are
+ set on return. Doing this for all assertions, both positive and negative,
+ seems to match what Perl does. */
if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
{
memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
Foffset_top * sizeof(PCRE2_SIZE));
P->offset_top = Foffset_top;
+ P->mark = Fmark;
Fback_frame = (char *)F - (char *)P;
RRETURN(MATCH_MATCH);
}
diff --git a/testdata/testinput1 b/testdata/testinput1
index 3fd5d2a..7b6918a 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -6343,4 +6343,12 @@ ef) x/x,mark
/(?&word)* \. (?<word> \w+ )/xi
pokus.hokus
+/a(?(?=(*:2)b).)/mark
+ abc
+ acb
+
+/a(?(?!(*:2)b).)/mark
+ acb
+ abc
+
# End of testinput1
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 8fd67b3..d9f8c3b 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -10049,4 +10049,18 @@ No match
0: pokus.hokus
1: hokus
+/a(?(?=(*:2)b).)/mark
+ abc
+ 0: ab
+MK: 2
+ acb
+ 0: a
+
+/a(?(?!(*:2)b).)/mark
+ acb
+ 0: ac
+ abc
+ 0: a
+MK: 2
+
# End of testinput1
--
2.20.1

View File

@ -6,10 +6,10 @@
%bcond_with pcre2_enables_sealloc
# This is stable release:
#%%global rcversion RC1
%global rcversion RC1
Name: pcre2
Version: 10.33
Release: %{?rcversion:0.}15%{?rcversion:.%rcversion}%{?dist}
Version: 10.34
Release: %{?rcversion:0.}1%{?rcversion:.%rcversion}%{?dist}
%global myversion %{version}%{?rcversion:-%rcversion}
Summary: Perl-compatible regular expression library
# the library: BSD with exceptions
@ -51,61 +51,6 @@ Source1: https://ftp.pcre.org/pub/pcre/%{?rcversion:Testing/}%{name}-%{myvers
Source2: https://ftp.pcre.org/pub/pcre/Public-Key
# Do no set RPATH if libdir is not /usr/lib
Patch0: pcre2-10.10-Fix-multilib.patch
# Validate number of capturing parentheses, in upstream after 10.33
Patch1: pcre2-10.33-Implement-a-check-on-the-number-of-capturing-parenth.patch
# Correct a misspelling in a documentation, in upstream after 10.33
Patch2: pcre2-10.33-Fix-typo.patch
# 1/2 Fix a crash when \X is used without UTF mode in a JIT, upstream bug #2399,
# in upstream after 10.33
Patch3: pcre2-10.33-Fix-crash-when-X-is-used-without-UTF-in-JIT.patch
# 2/2 Fix a crash when \X is used without UTF mode in a JIT, upstream bug #2399,
# in upstream after 10.33
Patch4: pcre2-10.33-Forgot-this-file-in-previous-commit.-Fixes-JIT-non-U.patch
# Fix a non-JIT match to return (*MARK) names from a successful conditional
# assertion, in upstream after 10.33
Patch5: pcre2-10.33-Make-pcre2_match-return-MARK-names-from-successful-c.patch
# Fix pcre2grep --only-matching output when number of capturing groups exceeds
# 32, upstream bug #2407, in upstream after 10.33
Patch6: pcre2-10.33-Fix-pcre2grep-o-bug-when-ovector-overflows-add-optio.patch
# Do not ignore {1} quantifier when it is applied to a non-possessive group
# with more alternatives, in upstream after 10.33
Patch7: pcre2-10.33-Don-t-ignore-1-when-it-is-applied-to-a-parenthesized.patch
# Fix a DFA to recognize a partial match if the end of a subject is encountered
# in a lookahead, an atomic group, or a recursion, in upstream after 10.33
Patch8: pcre2-10.33-Fix-partial-matching-bug-in-pcre2_dfa_match.patch
# 1/2 Fix an integer overflow when checking a lookbehind length,
# in upstream after 10.33
Patch9: pcre2-10.33-Check-for-integer-overflow-when-computing-lookbehind.patch
# 2/2 Fix an integer overflow when checking a lookbehind length,
# in upstream after 10.33
Patch10: pcre2-10.33-Additional-overflow-test.patch
# 1/2 Fix a mismatch with a lookbehind within a lookahead within a lookbehind,
# upstream bug #2412, in upstream after 10.33
Patch11: pcre2-10.33-Fix-lookbehind-within-lookahead-within-lookbehind-mi.patch
# 2/2 Fix a mismatch with a lookbehind within a lookahead within a lookbehind,
# upstream bug #2412, in upstream after 10.33
Patch12: pcre2-10.33-Fix-bug-in-recent-patch-for-lookbehinds-within-looka.patch
# Fix an incorrect computation of a group length when a branch exceeds 65535,
# upstream bug #2428, in upstream after 10.33
Patch13: pcre2-10.33-Fix-incorrect-computation-of-group-length-when-one-b.patch
# Fix reporting rightmost consulted characters, in upstream after 10.33
Patch14: pcre2-10.33-Fix-allusedtext-bug-rightmost-consulted-character-in.patch
# Fix a mismatch with a lookbehind after a condition, bug #1743863,
# upstream bug #2433, in upstream after 10.33, fixes a bug introduced in
# Fix-lookbehind-within-lookahead-within-lookbehind-mi.patch
Patch15: pcre2-10.33-Fix-bug-introduced-in-commit-1133.-Lookbehinds-that-.patch
# Fix a crash in JIT match when a subject has a zero length and an invalid
# pointer, upstream bug #2440, in upstream after 10.33
Patch16: pcre2-10.33-Add-underflow-check-in-JIT.patch
# Fix a use after free when freeing JIT memory, upstream bug #2453,
# in upstream after 10.33
Patch17: pcre2-10.33-Fix-use-after-free-and-compilation-error-in-JIT.patch
# 1/2 Fix thread-safeness in regexec(), upstream bug #2447,
# in upstream after 10.33
Patch18: pcre2-10.33-Ensure-regexec-is-thread-safe-to-avoid-sanitizer-war.patch
# 2/2 Fix thread-safeness in regexec(), upstream bug #2447,
# in upstream after 10.33
Patch19: pcre2-10.33-Fix-error-offset-bug-introduced-at-1176.patch
BuildRequires: autoconf
BuildRequires: automake
BuildRequires: coreutils
@ -184,25 +129,6 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
%{gpgverify} --keyring='%{SOURCE2}' --signature='%{SOURCE1}' --data='%{SOURCE0}'
%setup -q -n %{name}-%{myversion}
%patch0 -p1
%patch1 -p1
%patch2 -p1
%patch3 -p1
%patch4 -p1
%patch5 -p1
%patch6 -p1
%patch7 -p1
%patch8 -p1
%patch9 -p1
%patch10 -p1
%patch11 -p1
%patch12 -p1
%patch13 -p1
%patch14 -p1
%patch15 -p1
%patch16 -p1
%patch17 -p1
%patch18 -p1
%patch19 -p1
# Because of multilib patch
libtoolize --copy --force
autoreconf -vif
@ -291,7 +217,6 @@ make %{?_smp_mflags} check VERBOSE=yes
%files static
%{_libdir}/*.a
%{!?_licensedir:%global license %%doc}
%license COPYING LICENCE
%files tools
@ -301,6 +226,9 @@ make %{?_smp_mflags} check VERBOSE=yes
%{_mandir}/man1/pcre2test.*
%changelog
* Wed Oct 30 2019 Petr Pisar <ppisar@redhat.com> - 10.34-0.1.RC1
- 10.34-RC1 bump
* Tue Oct 29 2019 Petr Pisar <ppisar@redhat.com> - 10.33-15
- Fix a use after free when freeing JIT memory (upstream bug #2453)
- Fix thread-safeness in regexec() (upstream bug #2447)

View File

@ -1,2 +1,2 @@
SHA512 (pcre2-10.33.tar.bz2) = c6aa52cdbb0c906b02e9a9f255697a1b9b99d9c7dccdaa484ffdb04fe3582d232ba3cb3c421d6df589a387b415dedb0817f58ec5391488b25b4e7bd9da6bab8f
SHA512 (pcre2-10.33.tar.bz2.sig) = e5bf3e7d6df4f5f9d2975b1cf97d47d82863637e9caeeb5677c81eba488063f4d9a161acf5738e104c3f71f2cef10aa9639601080f0fd60ec41dfbbc3c99e5dd
SHA512 (pcre2-10.34-RC1.tar.bz2) = 9421ff823e13cdc9598819c3d4777a648f8d3ce3d13eaa9c5e64df676adf9c77fbaf439784138d1e265edaabe71ab863d1fc84e1fac7a9cfa3a4f7b867b6a47a
SHA512 (pcre2-10.34-RC1.tar.bz2.sig) = 04f6c1b20b378ba831f73a6a19439fef3b763144a3fa4864bfdd29b6380a6ab676006044fe19195feba59a4047420e2ce3dce354211f441a4d0abd2c50ee100a