10.34-RC1 bump
This commit is contained in:
parent
96360c9835
commit
db0a3cc7dd
2
.gitignore
vendored
2
.gitignore
vendored
@ -16,3 +16,5 @@
|
||||
/pcre2-10.33-RC1.tar.bz2
|
||||
/pcre2-10.33.tar.bz2
|
||||
/pcre2-10.33.tar.bz2.sig
|
||||
/pcre2-10.34-RC1.tar.bz2
|
||||
/pcre2-10.34-RC1.tar.bz2.sig
|
||||
|
@ -1,39 +0,0 @@
|
||||
From bc7fb8964ca3a422f472189b0eff751c1cc377b0 Mon Sep 17 00:00:00 2001
|
||||
From: zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Mon, 9 Sep 2019 07:12:00 +0000
|
||||
Subject: [PATCH] Add underflow check in JIT.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1167 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
---
|
||||
src/pcre2_jit_compile.c | 8 ++++++--
|
||||
|
||||
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
|
||||
index 8cbd8f9..79a27fd 100644
|
||||
--- a/src/pcre2_jit_compile.c
|
||||
+++ b/src/pcre2_jit_compile.c
|
||||
@@ -5793,12 +5793,16 @@ if (common->match_end_ptr != 0)
|
||||
{
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
||||
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
||||
- OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
|
||||
+ OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
|
||||
+ add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
|
||||
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
|
||||
CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
|
||||
}
|
||||
else
|
||||
- OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
|
||||
+ {
|
||||
+ OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
|
||||
+ add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
|
||||
+ }
|
||||
|
||||
SLJIT_ASSERT(range_right >= 0);
|
||||
|
||||
--
|
||||
2.21.0
|
||||
|
@ -1,54 +0,0 @@
|
||||
From bcf39c1828399ebc33fb92c4edaf2bdd5f891a58 Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Fri, 5 Jul 2019 15:49:37 +0000
|
||||
Subject: [PATCH] Additional overflow test.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1127 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
testdata/testinput2 | 4 ++++
|
||||
testdata/testoutput2 | 5 +++++
|
||||
2 files changed, 9 insertions(+)
|
||||
|
||||
diff --git a/testdata/testinput2 b/testdata/testinput2
|
||||
index 079d6d8..9412bf6 100644
|
||||
--- a/testdata/testinput2
|
||||
+++ b/testdata/testinput2
|
||||
@@ -5591,6 +5591,10 @@ a)"xI
|
||||
|
||||
/\[()]{65535}(?<A>)/expand
|
||||
|
||||
+# Addition overflow
|
||||
/( {32742} {42})(?<!\1{65481})/
|
||||
|
||||
+# Multiplication overflow
|
||||
+/(X{65535})(?<=\1{32770})/
|
||||
+
|
||||
# End of testinput2
|
||||
diff --git a/testdata/testoutput2 b/testdata/testoutput2
|
||||
index bfe61a3..950095f 100644
|
||||
--- a/testdata/testoutput2
|
||||
+++ b/testdata/testoutput2
|
||||
@@ -16940,9 +16940,14 @@ Failed: error 197 at offset 131071: too many capturing groups (maximum 65535)
|
||||
/\[()]{65535}(?<A>)/expand
|
||||
Failed: error 197 at offset 131075: too many capturing groups (maximum 65535)
|
||||
|
||||
+# Addition overflow
|
||||
/( {32742} {42})(?<!\1{65481})/
|
||||
Failed: error 187 at offset 15: lookbehind assertion is too long
|
||||
|
||||
+# Multiplication overflow
|
||||
+/(X{65535})(?<=\1{32770})/
|
||||
+Failed: error 187 at offset 10: lookbehind assertion is too long
|
||||
+
|
||||
# End of testinput2
|
||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,108 +0,0 @@
|
||||
From cdefe642dc2e6b5b8e6703773934813f317bc488 Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Thu, 4 Jul 2019 17:01:53 +0000
|
||||
Subject: [PATCH] Check for integer overflow when computing lookbehind lengths.
|
||||
Fixes Clusterfuzz issue 13656.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1126 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
src/pcre2_compile.c | 38 ++++++++++++++++++++++++++++----------
|
||||
testdata/testinput2 | 2 ++
|
||||
testdata/testoutput2 | 3 +++
|
||||
3 files changed, 33 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||||
index c82c6ca..f6e0a0b 100644
|
||||
--- a/src/pcre2_compile.c
|
||||
+++ b/src/pcre2_compile.c
|
||||
@@ -9197,8 +9197,26 @@ for (;; pptr++)
|
||||
case META_MINMAX_QUERY:
|
||||
if (pptr[1] == pptr[2])
|
||||
{
|
||||
- if (pptr[1] == 0) branchlength -= lastitemlength;
|
||||
- else itemlength = (pptr[1] - 1) * lastitemlength;
|
||||
+ switch(pptr[1])
|
||||
+ {
|
||||
+ case 0:
|
||||
+ branchlength -= lastitemlength;
|
||||
+ break;
|
||||
+
|
||||
+ case 1:
|
||||
+ itemlength = 0;
|
||||
+ break;
|
||||
+
|
||||
+ default: /* Check for integer overflow */
|
||||
+ if (lastitemlength != 0 && /* Should not occur, but just in case */
|
||||
+ INT_MAX/lastitemlength < pptr[1] - 1)
|
||||
+ {
|
||||
+ *errcodeptr = ERR87; /* Integer overflow; lookbehind too big */
|
||||
+ return -1;
|
||||
+ }
|
||||
+ itemlength = (pptr[1] - 1) * lastitemlength;
|
||||
+ break;
|
||||
+ }
|
||||
pptr += 2;
|
||||
break;
|
||||
}
|
||||
@@ -9212,19 +9230,19 @@ for (;; pptr++)
|
||||
return -1;
|
||||
}
|
||||
|
||||
- /* Add the item length to the branchlength, and save it for use if the next
|
||||
- thing is a quantifier. */
|
||||
-
|
||||
- branchlength += itemlength;
|
||||
- lastitemlength = itemlength;
|
||||
-
|
||||
- /* Ensure that the length does not overflow the limit. */
|
||||
+ /* Add the item length to the branchlength, checking for integer overflow and
|
||||
+ for the branch length exceeding the limit. */
|
||||
|
||||
- if (branchlength > LOOKBEHIND_MAX)
|
||||
+ if (INT_MAX - branchlength < (int)itemlength ||
|
||||
+ (branchlength += itemlength) > LOOKBEHIND_MAX)
|
||||
{
|
||||
*errcodeptr = ERR87;
|
||||
return -1;
|
||||
}
|
||||
+
|
||||
+ /* Save this item length for use if the next item is a quantifier. */
|
||||
+
|
||||
+ lastitemlength = itemlength;
|
||||
}
|
||||
|
||||
EXIT:
|
||||
diff --git a/testdata/testinput2 b/testdata/testinput2
|
||||
index 8a98f94..079d6d8 100644
|
||||
--- a/testdata/testinput2
|
||||
+++ b/testdata/testinput2
|
||||
@@ -5591,4 +5591,6 @@ a)"xI
|
||||
|
||||
/\[()]{65535}(?<A>)/expand
|
||||
|
||||
+/( {32742} {42})(?<!\1{65481})/
|
||||
+
|
||||
# End of testinput2
|
||||
diff --git a/testdata/testoutput2 b/testdata/testoutput2
|
||||
index 158fbad..bfe61a3 100644
|
||||
--- a/testdata/testoutput2
|
||||
+++ b/testdata/testoutput2
|
||||
@@ -16940,6 +16940,9 @@ Failed: error 197 at offset 131071: too many capturing groups (maximum 65535)
|
||||
/\[()]{65535}(?<A>)/expand
|
||||
Failed: error 197 at offset 131075: too many capturing groups (maximum 65535)
|
||||
|
||||
+/( {32742} {42})(?<!\1{65481})/
|
||||
+Failed: error 187 at offset 15: lookbehind assertion is too long
|
||||
+
|
||||
# End of testinput2
|
||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,160 +0,0 @@
|
||||
From 76d59bdbc2d30bad1d11e0490b767058dc33d39c Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Wed, 19 Jun 2019 16:27:50 +0000
|
||||
Subject: [PATCH] Don't ignore {1}+ when it is applied to a parenthesized item.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1111 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
src/pcre2_compile.c | 29 +++++++++++++++++++----------
|
||||
testdata/testinput1 | 14 ++++++++++++++
|
||||
testdata/testoutput1 | 18 ++++++++++++++++++
|
||||
3 files changed, 51 insertions(+), 10 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||||
index cd6fbea..c82c6ca 100644
|
||||
--- a/src/pcre2_compile.c
|
||||
+++ b/src/pcre2_compile.c
|
||||
@@ -6723,10 +6723,6 @@ for (;; pptr++)
|
||||
reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
|
||||
op_type = 0;
|
||||
|
||||
- /* If the repeat is {1} we can ignore it. */
|
||||
-
|
||||
- if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
|
||||
-
|
||||
/* Adjust first and required code units for a zero repeat. */
|
||||
|
||||
if (repeat_min == 0)
|
||||
@@ -6769,7 +6765,10 @@ for (;; pptr++)
|
||||
tempcode = previous;
|
||||
op_previous = *previous;
|
||||
|
||||
- /* Now handle repetition for the different types of item. */
|
||||
+ /* Now handle repetition for the different types of item. If the repeat
|
||||
+ minimum and the repeat maximum are both 1, we can ignore the quantifier for
|
||||
+ non-parenthesized items, as they have only one alternative. For anything in
|
||||
+ parentheses, we must not ignore if {1} is possessive. */
|
||||
|
||||
switch (op_previous)
|
||||
{
|
||||
@@ -6783,6 +6782,7 @@ for (;; pptr++)
|
||||
case OP_CHARI:
|
||||
case OP_NOT:
|
||||
case OP_NOTI:
|
||||
+ if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
|
||||
op_type = chartypeoffset[op_previous - OP_CHAR];
|
||||
|
||||
/* Deal with UTF characters that take up more than one code unit. */
|
||||
@@ -6829,6 +6829,7 @@ for (;; pptr++)
|
||||
code = previous;
|
||||
goto END_REPEAT;
|
||||
}
|
||||
+ if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
|
||||
|
||||
if (repeat_min == 0 && repeat_max == REPEAT_UNLIMITED)
|
||||
*code++ = OP_CRSTAR + repeat_type;
|
||||
@@ -6863,6 +6864,8 @@ for (;; pptr++)
|
||||
repetition. */
|
||||
|
||||
case OP_RECURSE:
|
||||
+ if (repeat_max == 1 && repeat_min == 1 && !possessive_quantifier)
|
||||
+ goto END_REPEAT;
|
||||
|
||||
/* Generate unwrapped repeats for a non-zero minimum, except when the
|
||||
minimum is 1 and the maximum unlimited, because that can be handled with
|
||||
@@ -6945,6 +6948,9 @@ for (;; pptr++)
|
||||
PCRE2_UCHAR *bralink = NULL;
|
||||
PCRE2_UCHAR *brazeroptr = NULL;
|
||||
|
||||
+ if (repeat_max == 1 && repeat_min == 1 && !possessive_quantifier)
|
||||
+ goto END_REPEAT;
|
||||
+
|
||||
/* Repeating a DEFINE group (or any group where the condition is always
|
||||
FALSE and there is only one branch) is pointless, but Perl allows the
|
||||
syntax, so we just ignore the repeat. */
|
||||
@@ -7161,11 +7167,12 @@ for (;; pptr++)
|
||||
and SCRIPT_RUN groups at runtime, but in a different way.]
|
||||
|
||||
Then, if the quantifier was possessive and the bracket is not a
|
||||
- conditional, we convert the BRA code to the POS form, and the KET code to
|
||||
- KETRPOS. (It turns out to be convenient at runtime to detect this kind of
|
||||
- subpattern at both the start and at the end.) The use of special opcodes
|
||||
- makes it possible to reduce greatly the stack usage in pcre2_match(). If
|
||||
- the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
|
||||
+ conditional, we convert the BRA code to the POS form, and the KET code
|
||||
+ to KETRPOS. (It turns out to be convenient at runtime to detect this
|
||||
+ kind of subpattern at both the start and at the end.) The use of
|
||||
+ special opcodes makes it possible to reduce greatly the stack usage in
|
||||
+ pcre2_match(). If the group is preceded by OP_BRAZERO, convert this to
|
||||
+ OP_BRAPOSZERO.
|
||||
|
||||
Then, if the minimum number of matches is 1 or 0, cancel the possessive
|
||||
flag so that the default action below, of wrapping everything inside
|
||||
@@ -7266,6 +7273,8 @@ for (;; pptr++)
|
||||
int prop_type, prop_value;
|
||||
PCRE2_UCHAR *oldcode;
|
||||
|
||||
+ if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
|
||||
+
|
||||
op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
|
||||
mclength = 0; /* Not a character */
|
||||
|
||||
diff --git a/testdata/testinput1 b/testdata/testinput1
|
||||
index 7b6918a..4d9ec5a 100644
|
||||
--- a/testdata/testinput1
|
||||
+++ b/testdata/testinput1
|
||||
@@ -6351,4 +6351,18 @@ ef) x/x,mark
|
||||
acb
|
||||
abc
|
||||
|
||||
+/(?:a|ab){1}+c/
|
||||
+\= Expect no match
|
||||
+ abc
|
||||
+
|
||||
+/(a|ab){1}+c/
|
||||
+ abc
|
||||
+
|
||||
+/(a+){1}+a/
|
||||
+\= Expect no match
|
||||
+ aaaa
|
||||
+
|
||||
+/(?(DEFINE)(a|ab))(?1){1}+c/
|
||||
+ abc
|
||||
+
|
||||
# End of testinput1
|
||||
diff --git a/testdata/testoutput1 b/testdata/testoutput1
|
||||
index d9f8c3b..fffb8ec 100644
|
||||
--- a/testdata/testoutput1
|
||||
+++ b/testdata/testoutput1
|
||||
@@ -10063,4 +10063,22 @@ MK: 2
|
||||
0: a
|
||||
MK: 2
|
||||
|
||||
+/(?:a|ab){1}+c/
|
||||
+\= Expect no match
|
||||
+ abc
|
||||
+No match
|
||||
+
|
||||
+/(a|ab){1}+c/
|
||||
+ abc
|
||||
+No match
|
||||
+
|
||||
+/(a+){1}+a/
|
||||
+\= Expect no match
|
||||
+ aaaa
|
||||
+No match
|
||||
+
|
||||
+/(?(DEFINE)(a|ab))(?1){1}+c/
|
||||
+ abc
|
||||
+No match
|
||||
+
|
||||
# End of testinput1
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,37 +0,0 @@
|
||||
From 9835bbc7fbb3423163dc49e7d822dad2b135e192 Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Tue, 15 Oct 2019 10:46:36 +0000
|
||||
Subject: [PATCH] Ensure regexec is thread safe to avoid sanitizer warnings.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1176 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
---
|
||||
src/pcre2posix.c | 3 +--
|
||||
|
||||
diff --git a/src/pcre2posix.c b/src/pcre2posix.c
|
||||
index 34a8d80..b24620a 100644
|
||||
--- a/src/pcre2posix.c
|
||||
+++ b/src/pcre2posix.c
|
||||
@@ -323,6 +323,7 @@ if (preg->re_pcre2_code == NULL)
|
||||
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
|
||||
preg->re_nsub = (size_t)re_nsub;
|
||||
preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL);
|
||||
+preg->re_erroffset = (size_t)(-1); /* No meaning after successful compile */
|
||||
|
||||
if (preg->re_match_data == NULL)
|
||||
{
|
||||
@@ -356,8 +357,6 @@ if ((eflags & REG_NOTBOL) != 0) options |= PCRE2_NOTBOL;
|
||||
if ((eflags & REG_NOTEOL) != 0) options |= PCRE2_NOTEOL;
|
||||
if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE2_NOTEMPTY;
|
||||
|
||||
-((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
|
||||
-
|
||||
/* When REG_NOSUB was specified, or if no vector has been passed in which to
|
||||
put captured strings, ensure that nmatch is zero. This will stop any attempt to
|
||||
write to pmatch. */
|
||||
--
|
||||
2.21.0
|
||||
|
@ -1,89 +0,0 @@
|
||||
From ec098f6b898334be0674dbadc9fd67a0532fa0eb Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Sat, 10 Aug 2019 11:34:50 +0000
|
||||
Subject: [PATCH] Fix allusedtext bug, rightmost consulted character incorrect
|
||||
in negative lookaheads.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1157 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
---
|
||||
src/pcre2_match.c | 1 +
|
||||
testdata/testinput15 | 4 ++++
|
||||
testdata/testinput2 | 2 +-
|
||||
testdata/testoutput15 | 8 ++++++++
|
||||
testdata/testoutput2 | 2 +-
|
||||
|
||||
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
|
||||
index 26fc01e..4471183 100644
|
||||
--- a/src/pcre2_match.c
|
||||
+++ b/src/pcre2_match.c
|
||||
@@ -5971,6 +5971,7 @@ in rrc. */
|
||||
#define LBL(val) case val: goto L_RM##val;
|
||||
|
||||
RETURN_SWITCH:
|
||||
+if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
|
||||
if (Frdepth == 0) return rrc; /* Exit from the top level */
|
||||
F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */
|
||||
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
|
||||
diff --git a/testdata/testinput15 b/testdata/testinput15
|
||||
index 2cb712d..5dd6897 100644
|
||||
--- a/testdata/testinput15
|
||||
+++ b/testdata/testinput15
|
||||
@@ -231,4 +231,8 @@
|
||||
/(*LIMIT_HEAP=21)\[(a)]{60}/expand
|
||||
\[a]{60}
|
||||
|
||||
+/b(?<!ax)(?!cx)/allusedtext
|
||||
+ abc
|
||||
+ abcz
|
||||
+
|
||||
# End of testinput15
|
||||
diff --git a/testdata/testinput2 b/testdata/testinput2
|
||||
index 4377f80..7b44fb0 100644
|
||||
--- a/testdata/testinput2
|
||||
+++ b/testdata/testinput2
|
||||
@@ -4584,7 +4584,7 @@ B)x/alt_verbnames,mark
|
||||
|
||||
/abcd/null_context
|
||||
abcd\=null_context
|
||||
-\= Expect error
|
||||
+\= Expect error - not allowed together
|
||||
abcd\=null_context,find_limits
|
||||
abcd\=allusedtext,startchar
|
||||
|
||||
diff --git a/testdata/testoutput15 b/testdata/testoutput15
|
||||
index c51cda7..d854412 100644
|
||||
--- a/testdata/testoutput15
|
||||
+++ b/testdata/testoutput15
|
||||
@@ -525,4 +525,12 @@ No match
|
||||
\[a]{60}
|
||||
Failed: error -63: heap limit exceeded
|
||||
|
||||
+/b(?<!ax)(?!cx)/allusedtext
|
||||
+ abc
|
||||
+ 0: abc
|
||||
+ < >
|
||||
+ abcz
|
||||
+ 0: abcz
|
||||
+ < >>
|
||||
+
|
||||
# End of testinput15
|
||||
diff --git a/testdata/testoutput2 b/testdata/testoutput2
|
||||
index 0fd2187..0803d9e 100644
|
||||
--- a/testdata/testoutput2
|
||||
+++ b/testdata/testoutput2
|
||||
@@ -14803,7 +14803,7 @@ No match
|
||||
/abcd/null_context
|
||||
abcd\=null_context
|
||||
0: abcd
|
||||
-\= Expect error
|
||||
+\= Expect error - not allowed together
|
||||
abcd\=null_context,find_limits
|
||||
** Not allowed together: find_limits null_context
|
||||
abcd\=allusedtext,startchar
|
||||
--
|
||||
2.21.0
|
||||
|
@ -1,114 +0,0 @@
|
||||
From 007b635b6788f8317747842b02f9c85137277c20 Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Thu, 18 Jul 2019 17:20:29 +0000
|
||||
Subject: [PATCH] Fix bug in recent patch for lookbehinds within lookaheads.
|
||||
Fixes ClusterFuzz 15933.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1138 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
src/pcre2_compile.c | 22 +++++++++++++---------
|
||||
testdata/testinput2 | 3 +++
|
||||
testdata/testoutput2 | 4 ++++
|
||||
3 files changed, 20 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||||
index 2ae95ed..b68c154 100644
|
||||
--- a/src/pcre2_compile.c
|
||||
+++ b/src/pcre2_compile.c
|
||||
@@ -136,7 +136,8 @@ static BOOL
|
||||
compile_block *);
|
||||
|
||||
static int
|
||||
- check_lookbehinds(uint32_t *, uint32_t **, compile_block *);
|
||||
+ check_lookbehinds(uint32_t *, uint32_t **, parsed_recurse_check *,
|
||||
+ compile_block *);
|
||||
|
||||
|
||||
/*************************************************
|
||||
@@ -9004,7 +9005,7 @@ for (;; pptr++)
|
||||
|
||||
case META_LOOKAHEAD:
|
||||
case META_LOOKAHEADNOT:
|
||||
- *errcodeptr = check_lookbehinds(pptr + 1, &pptr, cb);
|
||||
+ *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb);
|
||||
if (*errcodeptr != 0) return -1;
|
||||
|
||||
/* Ignore any qualifiers that follow a lookahead assertion. */
|
||||
@@ -9326,15 +9327,17 @@ order to process any lookbehinds that they may contain. It stops when it hits a
|
||||
non-nested closing parenthesis in this case, returning a pointer to it.
|
||||
|
||||
Arguments
|
||||
- pptr points to where to start (start of pattern or start of lookahead)
|
||||
- retptr if not NULL, return the ket pointer here
|
||||
- cb points to the compile block
|
||||
+ pptr points to where to start (start of pattern or start of lookahead)
|
||||
+ retptr if not NULL, return the ket pointer here
|
||||
+ recurses chain of recurse_check to catch mutual recursion
|
||||
+ cb points to the compile block
|
||||
|
||||
-Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
|
||||
+Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
|
||||
*/
|
||||
|
||||
static int
|
||||
-check_lookbehinds(uint32_t *pptr, uint32_t **retptr, compile_block *cb)
|
||||
+check_lookbehinds(uint32_t *pptr, uint32_t **retptr,
|
||||
+ parsed_recurse_check *recurses, compile_block *cb)
|
||||
{
|
||||
int errorcode = 0;
|
||||
int loopcount = 0;
|
||||
@@ -9449,7 +9452,8 @@ for (; *pptr != META_END; pptr++)
|
||||
|
||||
case META_LOOKBEHIND:
|
||||
case META_LOOKBEHINDNOT:
|
||||
- if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount, NULL, cb))
|
||||
+ if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount,
|
||||
+ recurses, cb))
|
||||
return errorcode;
|
||||
break;
|
||||
}
|
||||
@@ -9899,7 +9903,7 @@ lengths. */
|
||||
|
||||
if (has_lookbehind)
|
||||
{
|
||||
- errorcode = check_lookbehinds(cb.parsed_pattern, NULL, &cb);
|
||||
+ errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb);
|
||||
if (errorcode != 0) goto HAD_CB_ERROR;
|
||||
}
|
||||
|
||||
diff --git a/testdata/testinput2 b/testdata/testinput2
|
||||
index d85fc5f..1bfe591 100644
|
||||
--- a/testdata/testinput2
|
||||
+++ b/testdata/testinput2
|
||||
@@ -5600,4 +5600,7 @@ a)"xI
|
||||
/(?<=(?=.(?<=x)))/
|
||||
ab\=ph
|
||||
|
||||
+# Expect error (recursion => not fixed length)
|
||||
+/(\2)((?=(?<=\1)))/
|
||||
+
|
||||
# End of testinput2
|
||||
diff --git a/testdata/testoutput2 b/testdata/testoutput2
|
||||
index 6405e26..758b4db 100644
|
||||
--- a/testdata/testoutput2
|
||||
+++ b/testdata/testoutput2
|
||||
@@ -16952,6 +16952,10 @@ Failed: error 187 at offset 10: lookbehind assertion is too long
|
||||
ab\=ph
|
||||
No match
|
||||
|
||||
+# Expect error (recursion => not fixed length)
|
||||
+/(\2)((?=(?<=\1)))/
|
||||
+Failed: error 125 at offset 8: lookbehind assertion is not fixed length
|
||||
+
|
||||
# End of testinput2
|
||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,210 +0,0 @@
|
||||
From 8a5ce4c055808fd3a19b0da15e0e3caeb0ac3abb Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Mon, 26 Aug 2019 16:28:26 +0000
|
||||
Subject: [PATCH] Fix bug introduced in commit 1133. Lookbehinds that follow a
|
||||
condition were not always properly handled.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1160 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
src/pcre2_compile.c | 12 +++++-
|
||||
testdata/testinput1 | 3 ++
|
||||
testdata/testinput2 | 10 +++++
|
||||
testdata/testoutput1 | 6 +++
|
||||
testdata/testoutput2 | 93 ++++++++++++++++++++++++++++++++++++++++++++
|
||||
5 files changed, 122 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||||
index b68c154..5cae730 100644
|
||||
--- a/src/pcre2_compile.c
|
||||
+++ b/src/pcre2_compile.c
|
||||
@@ -9411,13 +9411,22 @@ for (; *pptr != META_END; pptr++)
|
||||
break;
|
||||
|
||||
case META_BACKREF_BYNAME:
|
||||
+ case META_RECURSE_BYNAME:
|
||||
+ pptr += 1 + SIZEOFFSET;
|
||||
+ break;
|
||||
+
|
||||
case META_COND_DEFINE:
|
||||
case META_COND_NAME:
|
||||
case META_COND_NUMBER:
|
||||
case META_COND_RNAME:
|
||||
case META_COND_RNUMBER:
|
||||
- case META_RECURSE_BYNAME:
|
||||
pptr += 1 + SIZEOFFSET;
|
||||
+ nestlevel++;
|
||||
+ break;
|
||||
+
|
||||
+ case META_COND_VERSION:
|
||||
+ pptr += 3;
|
||||
+ nestlevel++;
|
||||
break;
|
||||
|
||||
case META_CALLOUT_STRING:
|
||||
@@ -9438,7 +9447,6 @@ for (; *pptr != META_END; pptr++)
|
||||
break;
|
||||
|
||||
case META_CALLOUT_NUMBER:
|
||||
- case META_COND_VERSION:
|
||||
pptr += 3;
|
||||
break;
|
||||
|
||||
diff --git a/testdata/testinput1 b/testdata/testinput1
|
||||
index ee9354b..db1706b 100644
|
||||
--- a/testdata/testinput1
|
||||
+++ b/testdata/testinput1
|
||||
@@ -6371,4 +6371,7 @@ ef) x/x,mark
|
||||
/(?<=(?=(?<=a)))b/
|
||||
ab
|
||||
|
||||
+/^(?<A>a)(?(<A>)b)((?<=b).*)$/
|
||||
+ abc
|
||||
+
|
||||
# End of testinput1
|
||||
diff --git a/testdata/testinput2 b/testdata/testinput2
|
||||
index 2b1aced..a5f59b9 100644
|
||||
--- a/testdata/testinput2
|
||||
+++ b/testdata/testinput2
|
||||
@@ -5611,4 +5611,14 @@ a)"xI
|
||||
/\A\s*((?:[^`]{28500}){4}|a)/I
|
||||
a
|
||||
|
||||
+/(?<A>a)(?(<A>)b)((?<=b).*)/B
|
||||
+
|
||||
+/(?(1)b)((?<=b).*)/B
|
||||
+
|
||||
+/(?(R1)b)((?<=b).*)/B
|
||||
+
|
||||
+/(?(DEFINE)b)((?<=b).*)/B
|
||||
+
|
||||
+/(?(VERSION=10.4)b)((?<=b).*)/B
|
||||
+
|
||||
# End of testinput2
|
||||
diff --git a/testdata/testoutput1 b/testdata/testoutput1
|
||||
index c9bfea8..3f872e7 100644
|
||||
--- a/testdata/testoutput1
|
||||
+++ b/testdata/testoutput1
|
||||
@@ -10090,4 +10090,10 @@ No match
|
||||
ab
|
||||
0: b
|
||||
|
||||
+/^(?<A>a)(?(<A>)b)((?<=b).*)$/
|
||||
+ abc
|
||||
+ 0: abc
|
||||
+ 1: a
|
||||
+ 2: c
|
||||
+
|
||||
# End of testinput1
|
||||
diff --git a/testdata/testoutput2 b/testdata/testoutput2
|
||||
index d2415c3..6e41ed8 100644
|
||||
--- a/testdata/testoutput2
|
||||
+++ b/testdata/testoutput2
|
||||
@@ -16983,6 +16983,99 @@ Subject length lower bound = 1
|
||||
0: a
|
||||
1: a
|
||||
|
||||
+/(?<A>a)(?(<A>)b)((?<=b).*)/B
|
||||
+------------------------------------------------------------------
|
||||
+ Bra
|
||||
+ CBra 1
|
||||
+ a
|
||||
+ Ket
|
||||
+ Cond
|
||||
+ 1 Cond ref
|
||||
+ b
|
||||
+ Ket
|
||||
+ CBra 2
|
||||
+ AssertB
|
||||
+ Reverse
|
||||
+ b
|
||||
+ Ket
|
||||
+ Any*+
|
||||
+ Ket
|
||||
+ Ket
|
||||
+ End
|
||||
+------------------------------------------------------------------
|
||||
+
|
||||
+/(?(1)b)((?<=b).*)/B
|
||||
+------------------------------------------------------------------
|
||||
+ Bra
|
||||
+ Cond
|
||||
+ 1 Cond ref
|
||||
+ b
|
||||
+ Ket
|
||||
+ CBra 1
|
||||
+ AssertB
|
||||
+ Reverse
|
||||
+ b
|
||||
+ Ket
|
||||
+ Any*+
|
||||
+ Ket
|
||||
+ Ket
|
||||
+ End
|
||||
+------------------------------------------------------------------
|
||||
+
|
||||
+/(?(R1)b)((?<=b).*)/B
|
||||
+------------------------------------------------------------------
|
||||
+ Bra
|
||||
+ Cond
|
||||
+ Cond recurse 1
|
||||
+ b
|
||||
+ Ket
|
||||
+ CBra 1
|
||||
+ AssertB
|
||||
+ Reverse
|
||||
+ b
|
||||
+ Ket
|
||||
+ Any*+
|
||||
+ Ket
|
||||
+ Ket
|
||||
+ End
|
||||
+------------------------------------------------------------------
|
||||
+
|
||||
+/(?(DEFINE)b)((?<=b).*)/B
|
||||
+------------------------------------------------------------------
|
||||
+ Bra
|
||||
+ Cond
|
||||
+ Cond false
|
||||
+ b
|
||||
+ Ket
|
||||
+ CBra 1
|
||||
+ AssertB
|
||||
+ Reverse
|
||||
+ b
|
||||
+ Ket
|
||||
+ Any*+
|
||||
+ Ket
|
||||
+ Ket
|
||||
+ End
|
||||
+------------------------------------------------------------------
|
||||
+
|
||||
+/(?(VERSION=10.4)b)((?<=b).*)/B
|
||||
+------------------------------------------------------------------
|
||||
+ Bra
|
||||
+ Cond
|
||||
+ Cond false
|
||||
+ b
|
||||
+ Ket
|
||||
+ CBra 1
|
||||
+ AssertB
|
||||
+ Reverse
|
||||
+ b
|
||||
+ Ket
|
||||
+ Any*+
|
||||
+ Ket
|
||||
+ Ket
|
||||
+ End
|
||||
+------------------------------------------------------------------
|
||||
+
|
||||
# End of testinput2
|
||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
--
|
||||
2.21.0
|
||||
|
@ -1,42 +0,0 @@
|
||||
From 6809752eacde104d45c5e11c3c64165857200ce7 Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Mon, 13 May 2019 16:26:17 +0000
|
||||
Subject: [PATCH 1/2] Fix crash when \X is used without UTF in JIT.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1091 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
---
|
||||
testdata/testinput4 | 3 +++
|
||||
testdata/testoutput4 | 4 ++++
|
||||
|
||||
diff --git a/testdata/testinput4 b/testdata/testinput4
|
||||
index cccab0e..f3d498c 100644
|
||||
--- a/testdata/testinput4
|
||||
+++ b/testdata/testinput4
|
||||
@@ -2480,4 +2480,7 @@
|
||||
/^(?'אABC'...)(?&אABC)/utf
|
||||
123123123456
|
||||
|
||||
+/\X*/
|
||||
+ \xF3aaa\xE4\xEA\xEB\xFEa
|
||||
+
|
||||
# End of testinput4
|
||||
diff --git a/testdata/testoutput4 b/testdata/testoutput4
|
||||
index 84b8b9e..53926ed 100644
|
||||
--- a/testdata/testoutput4
|
||||
+++ b/testdata/testoutput4
|
||||
@@ -4012,4 +4012,8 @@ No match
|
||||
0: 123123
|
||||
1: 123
|
||||
|
||||
+/\X*/
|
||||
+ \xF3aaa\xE4\xEA\xEB\xFEa
|
||||
+ 0: \xf3aaa\xe4\xea\xeb\xfea
|
||||
+
|
||||
# End of testinput4
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,70 +0,0 @@
|
||||
From 00acf0e2f3f01a3057fc099c60e4f530d744619b Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Wed, 16 Oct 2019 17:12:13 +0000
|
||||
Subject: [PATCH] Fix error offset bug introduced at 1176.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1179 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
src/pcre2_match.c | 4 ++++
|
||||
testdata/testinput10 | 6 ++++++
|
||||
testdata/testoutput10 | 10 ++++++++++
|
||||
3 files changed, 20 insertions(+)
|
||||
|
||||
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
|
||||
index 9f34e20..cca4d3a 100644
|
||||
--- a/src/pcre2_match.c
|
||||
+++ b/src/pcre2_match.c
|
||||
@@ -6186,6 +6186,10 @@ if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
|
||||
}
|
||||
match_data->subject = NULL;
|
||||
|
||||
+/* Zero the error offset in case the first code unit is invalid UTF. */
|
||||
+
|
||||
+match_data->startchar = 0;
|
||||
+
|
||||
/* If the pattern was successfully studied with JIT support, run the JIT
|
||||
executable instead of the rest of this function. Most options must be set at
|
||||
compile time for the JIT code to be usable. Fallback to the normal code path if
|
||||
diff --git a/testdata/testinput10 b/testdata/testinput10
|
||||
index 4399f82..19d2f2f 100644
|
||||
--- a/testdata/testinput10
|
||||
+++ b/testdata/testinput10
|
||||
@@ -493,4 +493,10 @@
|
||||
|
||||
/(?(á/utf
|
||||
|
||||
+/x/utf
|
||||
+ abxyz
|
||||
+ \x80\=startchar
|
||||
+ abc\x80\=startchar
|
||||
+ abc\x80\=startchar,offset=3
|
||||
+
|
||||
# End of testinput10
|
||||
diff --git a/testdata/testoutput10 b/testdata/testoutput10
|
||||
index dfecda1..dd91c45 100644
|
||||
--- a/testdata/testoutput10
|
||||
+++ b/testdata/testoutput10
|
||||
@@ -1651,4 +1651,14 @@ Failed: error 142 at offset 4: syntax error in subpattern name (missing terminat
|
||||
/(?(á/utf
|
||||
Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?)
|
||||
|
||||
+/x/utf
|
||||
+ abxyz
|
||||
+ 0: x
|
||||
+ \x80\=startchar
|
||||
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
|
||||
+ abc\x80\=startchar
|
||||
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
|
||||
+ abc\x80\=startchar,offset=3
|
||||
+Error -36 (bad UTF-8 offset)
|
||||
+
|
||||
# End of testinput10
|
||||
--
|
||||
2.21.0
|
||||
|
@ -1,134 +0,0 @@
|
||||
From 4c3e518bff94e5f206a63e3a1e5d7e570402786b Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Sat, 3 Aug 2019 08:30:40 +0000
|
||||
Subject: [PATCH] Fix incorrect computation of group length when one branch
|
||||
exceeded 65535.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1155 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
src/pcre2_study.c | 18 ++++++++++++------
|
||||
testdata/testinput2 | 8 ++++++++
|
||||
testdata/testoutput2 | 27 +++++++++++++++++++++++++++
|
||||
3 files changed, 47 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_study.c b/src/pcre2_study.c
|
||||
index e883c2e..cb5e7f1 100644
|
||||
--- a/src/pcre2_study.c
|
||||
+++ b/src/pcre2_study.c
|
||||
@@ -103,6 +103,7 @@ find_minlength(const pcre2_real_code *re, PCRE2_SPTR code,
|
||||
int *backref_cache)
|
||||
{
|
||||
int length = -1;
|
||||
+int branchlength = 0;
|
||||
int prev_cap_recno = -1;
|
||||
int prev_cap_d = 0;
|
||||
int prev_recurse_recno = -1;
|
||||
@@ -110,9 +111,9 @@ int prev_recurse_d = 0;
|
||||
uint32_t once_fudge = 0;
|
||||
BOOL had_recurse = FALSE;
|
||||
BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
|
||||
-recurse_check this_recurse;
|
||||
-int branchlength = 0;
|
||||
+PCRE2_SPTR nextbranch = code + GET(code, 1);
|
||||
PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
|
||||
+recurse_check this_recurse;
|
||||
|
||||
/* If this is a "could be empty" group, its minimum length is 0. */
|
||||
|
||||
@@ -128,16 +129,20 @@ if ((*countptr)++ > 1000) return -1;
|
||||
|
||||
/* Scan along the opcodes for this branch. If we get to the end of the branch,
|
||||
check the length against that of the other branches. If the accumulated length
|
||||
-passes 16-bits, stop. */
|
||||
+passes 16-bits, reset to that value and skip the rest of the branch. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int d, min, recno;
|
||||
- PCRE2_UCHAR *cs, *ce;
|
||||
- PCRE2_UCHAR op = *cc;
|
||||
+ PCRE2_UCHAR op, *cs, *ce;
|
||||
|
||||
- if (branchlength >= UINT16_MAX) return UINT16_MAX;
|
||||
+ if (branchlength >= UINT16_MAX)
|
||||
+ {
|
||||
+ branchlength = UINT16_MAX;
|
||||
+ cc = (PCRE2_UCHAR *)nextbranch;
|
||||
+ }
|
||||
|
||||
+ op = *cc;
|
||||
switch (op)
|
||||
{
|
||||
case OP_COND:
|
||||
@@ -227,6 +232,7 @@ for (;;)
|
||||
if (length < 0 || (!had_recurse && branchlength < length))
|
||||
length = branchlength;
|
||||
if (op != OP_ALT) return length;
|
||||
+ nextbranch = cc + GET(cc, 1);
|
||||
cc += 1 + LINK_SIZE;
|
||||
branchlength = 0;
|
||||
had_recurse = FALSE;
|
||||
diff --git a/testdata/testinput2 b/testdata/testinput2
|
||||
index 1bfe591..384239a 100644
|
||||
--- a/testdata/testinput2
|
||||
+++ b/testdata/testinput2
|
||||
@@ -5603,4 +5603,12 @@ a)"xI
|
||||
# Expect error (recursion => not fixed length)
|
||||
/(\2)((?=(?<=\1)))/
|
||||
|
||||
+/\A\s*(a|(?:[^`]{28500}){4})/I
|
||||
+ a
|
||||
+
|
||||
+/\A\s*((?:[^`]{28500}){4})/I
|
||||
+
|
||||
+/\A\s*((?:[^`]{28500}){4}|a)/I
|
||||
+ a
|
||||
+
|
||||
# End of testinput2
|
||||
diff --git a/testdata/testoutput2 b/testdata/testoutput2
|
||||
index 758b4db..0983741 100644
|
||||
--- a/testdata/testoutput2
|
||||
+++ b/testdata/testoutput2
|
||||
@@ -16956,6 +16956,33 @@ No match
|
||||
/(\2)((?=(?<=\1)))/
|
||||
Failed: error 125 at offset 8: lookbehind assertion is not fixed length
|
||||
|
||||
+/\A\s*(a|(?:[^`]{28500}){4})/I
|
||||
+Capture group count = 1
|
||||
+Max lookbehind = 1
|
||||
+Compile options: <none>
|
||||
+Overall options: anchored
|
||||
+Subject length lower bound = 1
|
||||
+ a
|
||||
+ 0: a
|
||||
+ 1: a
|
||||
+
|
||||
+/\A\s*((?:[^`]{28500}){4})/I
|
||||
+Capture group count = 1
|
||||
+Max lookbehind = 1
|
||||
+Compile options: <none>
|
||||
+Overall options: anchored
|
||||
+Subject length lower bound = 65535
|
||||
+
|
||||
+/\A\s*((?:[^`]{28500}){4}|a)/I
|
||||
+Capture group count = 1
|
||||
+Max lookbehind = 1
|
||||
+Compile options: <none>
|
||||
+Overall options: anchored
|
||||
+Subject length lower bound = 1
|
||||
+ a
|
||||
+ 0: a
|
||||
+ 1: a
|
||||
+
|
||||
# End of testinput2
|
||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,217 +0,0 @@
|
||||
From 44c8382acfe0902b302e0d7a5b1c6d9ee9226a51 Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Tue, 16 Jul 2019 15:06:21 +0000
|
||||
Subject: [PATCH] Fix lookbehind within lookahead within lookbehind
|
||||
misbehaviour bug.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1133 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
src/pcre2_compile.c | 58 +++++++++++++++++++++++++++++---------------
|
||||
testdata/testinput1 | 6 +++++
|
||||
testdata/testinput2 | 3 +++
|
||||
testdata/testoutput1 | 9 +++++++
|
||||
testdata/testoutput2 | 4 +++
|
||||
5 files changed, 61 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||||
index f6e0a0b..2ae95ed 100644
|
||||
--- a/src/pcre2_compile.c
|
||||
+++ b/src/pcre2_compile.c
|
||||
@@ -135,6 +135,8 @@ static BOOL
|
||||
set_lookbehind_lengths(uint32_t **, int *, int *, parsed_recurse_check *,
|
||||
compile_block *);
|
||||
|
||||
+static int
|
||||
+ check_lookbehinds(uint32_t *, uint32_t **, compile_block *);
|
||||
|
||||
|
||||
/*************************************************
|
||||
@@ -8997,15 +8999,15 @@ for (;; pptr++)
|
||||
}
|
||||
break;
|
||||
|
||||
- /* Lookaheads can be ignored, but we must start the skip inside the group
|
||||
- so that it isn't treated as a group within the branch. */
|
||||
+ /* Lookaheads do not contribute to the length of this branch, but they may
|
||||
+ contain lookbehinds within them whose lengths need to be set. */
|
||||
|
||||
case META_LOOKAHEAD:
|
||||
case META_LOOKAHEADNOT:
|
||||
- pptr = parsed_skip(pptr + 1, PSKIP_KET);
|
||||
- if (pptr == NULL) goto PARSED_SKIP_FAILED;
|
||||
+ *errcodeptr = check_lookbehinds(pptr + 1, &pptr, cb);
|
||||
+ if (*errcodeptr != 0) return -1;
|
||||
|
||||
- /* Also ignore any qualifiers that follow a lookahead assertion. */
|
||||
+ /* Ignore any qualifiers that follow a lookahead assertion. */
|
||||
|
||||
switch (pptr[1])
|
||||
{
|
||||
@@ -9319,20 +9321,28 @@ set_lookbehind_lengths() for each one. At the start, the errorcode is zero and
|
||||
the error offset is marked unset. The enables the functions above not to
|
||||
override settings from deeper nestings.
|
||||
|
||||
-Arguments cb points to the compile block
|
||||
-Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
|
||||
+This function is called recursively from get_branchlength() for lookaheads in
|
||||
+order to process any lookbehinds that they may contain. It stops when it hits a
|
||||
+non-nested closing parenthesis in this case, returning a pointer to it.
|
||||
+
|
||||
+Arguments
|
||||
+ pptr points to where to start (start of pattern or start of lookahead)
|
||||
+ retptr if not NULL, return the ket pointer here
|
||||
+ cb points to the compile block
|
||||
+
|
||||
+Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
|
||||
*/
|
||||
|
||||
static int
|
||||
-check_lookbehinds(compile_block *cb)
|
||||
+check_lookbehinds(uint32_t *pptr, uint32_t **retptr, compile_block *cb)
|
||||
{
|
||||
-uint32_t *pptr;
|
||||
int errorcode = 0;
|
||||
int loopcount = 0;
|
||||
+int nestlevel = 0;
|
||||
|
||||
cb->erroroffset = PCRE2_UNSET;
|
||||
|
||||
-for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
||||
+for (; *pptr != META_END; pptr++)
|
||||
{
|
||||
if (*pptr < META_END) continue; /* Literal */
|
||||
|
||||
@@ -9346,14 +9356,30 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
||||
pptr += 1;
|
||||
break;
|
||||
|
||||
+ case META_KET:
|
||||
+ if (--nestlevel < 0)
|
||||
+ {
|
||||
+ if (retptr != NULL) *retptr = pptr;
|
||||
+ return 0;
|
||||
+ }
|
||||
+ break;
|
||||
+
|
||||
+ case META_ATOMIC:
|
||||
+ case META_CAPTURE:
|
||||
+ case META_COND_ASSERT:
|
||||
+ case META_LOOKAHEAD:
|
||||
+ case META_LOOKAHEADNOT:
|
||||
+ case META_NOCAPTURE:
|
||||
+ case META_SCRIPT_RUN:
|
||||
+ nestlevel++;
|
||||
+ break;
|
||||
+
|
||||
case META_ACCEPT:
|
||||
case META_ALT:
|
||||
case META_ASTERISK:
|
||||
case META_ASTERISK_PLUS:
|
||||
case META_ASTERISK_QUERY:
|
||||
- case META_ATOMIC:
|
||||
case META_BACKREF:
|
||||
- case META_CAPTURE:
|
||||
case META_CIRCUMFLEX:
|
||||
case META_CLASS:
|
||||
case META_CLASS_EMPTY:
|
||||
@@ -9361,14 +9387,9 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
||||
case META_CLASS_END:
|
||||
case META_CLASS_NOT:
|
||||
case META_COMMIT:
|
||||
- case META_COND_ASSERT:
|
||||
case META_DOLLAR:
|
||||
case META_DOT:
|
||||
case META_FAIL:
|
||||
- case META_KET:
|
||||
- case META_LOOKAHEAD:
|
||||
- case META_LOOKAHEADNOT:
|
||||
- case META_NOCAPTURE:
|
||||
case META_PLUS:
|
||||
case META_PLUS_PLUS:
|
||||
case META_PLUS_QUERY:
|
||||
@@ -9378,7 +9399,6 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
||||
case META_QUERY_QUERY:
|
||||
case META_RANGE_ESCAPED:
|
||||
case META_RANGE_LITERAL:
|
||||
- case META_SCRIPT_RUN:
|
||||
case META_SKIP:
|
||||
case META_THEN:
|
||||
break;
|
||||
@@ -9879,7 +9899,7 @@ lengths. */
|
||||
|
||||
if (has_lookbehind)
|
||||
{
|
||||
- errorcode = check_lookbehinds(&cb);
|
||||
+ errorcode = check_lookbehinds(cb.parsed_pattern, NULL, &cb);
|
||||
if (errorcode != 0) goto HAD_CB_ERROR;
|
||||
}
|
||||
|
||||
diff --git a/testdata/testinput1 b/testdata/testinput1
|
||||
index 4d9ec5a..ee9354b 100644
|
||||
--- a/testdata/testinput1
|
||||
+++ b/testdata/testinput1
|
||||
@@ -6365,4 +6365,10 @@ ef) x/x,mark
|
||||
/(?(DEFINE)(a|ab))(?1){1}+c/
|
||||
abc
|
||||
|
||||
+/(?<=(?=.(?<=x)))/aftertext
|
||||
+ abx
|
||||
+
|
||||
+/(?<=(?=(?<=a)))b/
|
||||
+ ab
|
||||
+
|
||||
# End of testinput1
|
||||
diff --git a/testdata/testinput2 b/testdata/testinput2
|
||||
index 9412bf6..d85fc5f 100644
|
||||
--- a/testdata/testinput2
|
||||
+++ b/testdata/testinput2
|
||||
@@ -5597,4 +5597,7 @@ a)"xI
|
||||
# Multiplication overflow
|
||||
/(X{65535})(?<=\1{32770})/
|
||||
|
||||
+/(?<=(?=.(?<=x)))/
|
||||
+ ab\=ph
|
||||
+
|
||||
# End of testinput2
|
||||
diff --git a/testdata/testoutput1 b/testdata/testoutput1
|
||||
index fffb8ec..c9bfea8 100644
|
||||
--- a/testdata/testoutput1
|
||||
+++ b/testdata/testoutput1
|
||||
@@ -10081,4 +10081,13 @@ No match
|
||||
abc
|
||||
No match
|
||||
|
||||
+/(?<=(?=.(?<=x)))/aftertext
|
||||
+ abx
|
||||
+ 0:
|
||||
+ 0+ x
|
||||
+
|
||||
+/(?<=(?=(?<=a)))b/
|
||||
+ ab
|
||||
+ 0: b
|
||||
+
|
||||
# End of testinput1
|
||||
diff --git a/testdata/testoutput2 b/testdata/testoutput2
|
||||
index 950095f..6405e26 100644
|
||||
--- a/testdata/testoutput2
|
||||
+++ b/testdata/testoutput2
|
||||
@@ -16948,6 +16948,10 @@ Failed: error 187 at offset 15: lookbehind assertion is too long
|
||||
/(X{65535})(?<=\1{32770})/
|
||||
Failed: error 187 at offset 10: lookbehind assertion is too long
|
||||
|
||||
+/(?<=(?=.(?<=x)))/
|
||||
+ ab\=ph
|
||||
+No match
|
||||
+
|
||||
# End of testinput2
|
||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,130 +0,0 @@
|
||||
From 427e9b2fffc46d6b49a31df34c8b120bffc2ea60 Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Wed, 26 Jun 2019 16:13:28 +0000
|
||||
Subject: [PATCH] Fix partial matching bug in pcre2_dfa_match().
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1123 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
---
|
||||
src/pcre2_dfa_match.c | 11 ++++-----
|
||||
testdata/testinput6 | 22 +++++++++++++++++
|
||||
testdata/testoutput6 | 36 ++++++++++++++++++++++++++++
|
||||
|
||||
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
|
||||
index 911e9b9..538d15d 100644
|
||||
--- a/src/pcre2_dfa_match.c
|
||||
+++ b/src/pcre2_dfa_match.c
|
||||
@@ -3152,8 +3152,8 @@ for (;;)
|
||||
|
||||
/* We have finished the processing at the current subject character. If no
|
||||
new states have been set for the next character, we have found all the
|
||||
- matches that we are going to find. If we are at the top level and partial
|
||||
- matching has been requested, check for appropriate conditions.
|
||||
+ matches that we are going to find. If partial matching has been requested,
|
||||
+ check for appropriate conditions.
|
||||
|
||||
The "forced_ fail" variable counts the number of (*F) encountered for the
|
||||
character. If it is equal to the original active_count (saved in
|
||||
@@ -3165,8 +3165,7 @@ for (;;)
|
||||
|
||||
if (new_count <= 0)
|
||||
{
|
||||
- if (rlevel == 1 && /* Top level, and */
|
||||
- could_continue && /* Some could go on, and */
|
||||
+ if (could_continue && /* Some could go on, and */
|
||||
forced_fail != workspace[1] && /* Not all forced fail & */
|
||||
( /* either... */
|
||||
(mb->moptions & PCRE2_PARTIAL_HARD) != 0 /* Hard partial */
|
||||
@@ -3175,8 +3174,8 @@ for (;;)
|
||||
match_count < 0) /* no matches */
|
||||
) && /* And... */
|
||||
(
|
||||
- partial_newline || /* Either partial NL */
|
||||
- ( /* or ... */
|
||||
+ partial_newline || /* Either partial NL */
|
||||
+ ( /* or ... */
|
||||
ptr >= end_subject && /* End of subject and */
|
||||
ptr > mb->start_used_ptr) /* Inspected non-empty string */
|
||||
)
|
||||
diff --git a/testdata/testinput6 b/testdata/testinput6
|
||||
index 403e3fa..cc3ebd0 100644
|
||||
--- a/testdata/testinput6
|
||||
+++ b/testdata/testinput6
|
||||
@@ -4972,4 +4972,26 @@
|
||||
\= Expect no match
|
||||
0
|
||||
|
||||
+/(?<=pqr)abc(?=xyz)/
|
||||
+ 123pqrabcxy\=ps,allusedtext
|
||||
+ 123pqrabcxyz\=ps,allusedtext
|
||||
+
|
||||
+/(?>a+b)/
|
||||
+ aaaa\=ps
|
||||
+ aaaab\=ps
|
||||
+
|
||||
+/(abc)(?1)/
|
||||
+ abca\=ps
|
||||
+ abcabc\=ps
|
||||
+
|
||||
+/(?(?=abc).*|Z)/
|
||||
+ ab\=ps
|
||||
+ abcxyz\=ps
|
||||
+
|
||||
+/(abc)++x/
|
||||
+ abcab\=ps
|
||||
+ abc\=ps
|
||||
+ ab\=ps
|
||||
+ abcx
|
||||
+
|
||||
# End of testinput6
|
||||
diff --git a/testdata/testoutput6 b/testdata/testoutput6
|
||||
index 6a975dd..61cbfe2 100644
|
||||
--- a/testdata/testoutput6
|
||||
+++ b/testdata/testoutput6
|
||||
@@ -7809,4 +7809,40 @@ No match
|
||||
0
|
||||
No match
|
||||
|
||||
+/(?<=pqr)abc(?=xyz)/
|
||||
+ 123pqrabcxy\=ps,allusedtext
|
||||
+Partial match: pqrabcxy
|
||||
+ <<<
|
||||
+ 123pqrabcxyz\=ps,allusedtext
|
||||
+ 0: pqrabcxyz
|
||||
+ <<< >>>
|
||||
+
|
||||
+/(?>a+b)/
|
||||
+ aaaa\=ps
|
||||
+Partial match: aaaa
|
||||
+ aaaab\=ps
|
||||
+ 0: aaaab
|
||||
+
|
||||
+/(abc)(?1)/
|
||||
+ abca\=ps
|
||||
+Partial match: abca
|
||||
+ abcabc\=ps
|
||||
+ 0: abcabc
|
||||
+
|
||||
+/(?(?=abc).*|Z)/
|
||||
+ ab\=ps
|
||||
+Partial match: ab
|
||||
+ abcxyz\=ps
|
||||
+ 0: abcxyz
|
||||
+
|
||||
+/(abc)++x/
|
||||
+ abcab\=ps
|
||||
+Partial match: abcab
|
||||
+ abc\=ps
|
||||
+Partial match: abc
|
||||
+ ab\=ps
|
||||
+Partial match: ab
|
||||
+ abcx
|
||||
+ 0: abcx
|
||||
+
|
||||
# End of testinput6
|
||||
--
|
||||
2.20.1
|
||||
|
@ -1,382 +0,0 @@
|
||||
From e29388de53ea3a4f9d1c6b4932613681493ac9dc Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||
Date: Sat, 15 Jun 2019 15:51:07 +0000
|
||||
Subject: [PATCH] Fix pcre2grep -o bug when ovector overflows; add option to
|
||||
adjust the limit; raise the default limit; give error if -o requests an
|
||||
uncaptured parens.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1106 6239d852-aaf2-0410-a92c-79f79f948069
|
||||
Petr Písař: Ported to 10.33.
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
RunGrepTest | 7 ++++++
|
||||
doc/html/pcre2api.html | 12 +++++-----
|
||||
doc/html/pcre2grep.html | 28 +++++++++++++++-------
|
||||
doc/html/pcre2test.html | 4 +++-
|
||||
doc/pcre2grep.1 | 26 +++++++++++++-------
|
||||
doc/pcre2grep.txt | 43 ++++++++++++++++++++-------------
|
||||
doc/pcre2test.txt | 4 +++-
|
||||
src/pcre2grep.c | 53 ++++++++++++++++++++++++++++-------------
|
||||
testdata/grepoutput | 7 ++++++
|
||||
9 files changed, 126 insertions(+), 58 deletions(-)
|
||||
|
||||
diff --git a/RunGrepTest b/RunGrepTest
|
||||
index bac1f1b..ea37f70 100755
|
||||
--- a/RunGrepTest
|
||||
+++ b/RunGrepTest
|
||||
@@ -653,6 +653,13 @@ printf 'ABC\0XYZ\nABCDEF\nDEFABC\n' >testtemp2grep
|
||||
$valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
+echo "---------------------------- Test 127 -----------------------------" >>testtrygrep
|
||||
+(cd $srcdir; $valgrind $vjs $pcre2grep -o --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep
|
||||
+echo "RC=$?" >>testtrygrep
|
||||
+
|
||||
+echo "---------------------------- Test 128 -----------------------------" >>testtrygrep
|
||||
+(cd $srcdir; $valgrind $vjs $pcre2grep -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1
|
||||
+echo "RC=$?" >>testtrygrep
|
||||
|
||||
# Now compare the results.
|
||||
|
||||
diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html
|
||||
index 7ca39f5..84f4442 100644
|
||||
--- a/doc/html/pcre2api.html
|
||||
+++ b/doc/html/pcre2api.html
|
||||
@@ -2252,12 +2252,12 @@ segment.
|
||||
PCRE2_INFO_MINLENGTH
|
||||
</pre>
|
||||
If a minimum length for matching subject strings was computed, its value is
|
||||
-returned. Otherwise the returned value is 0. The value is a number of
|
||||
-characters, which in UTF mode may be different from the number of code units.
|
||||
-The third argument should point to an <b>uint32_t</b> variable. The value is a
|
||||
-lower bound to the length of any matching string. There may not be any strings
|
||||
-of that length that do actually match, but every string that does match is at
|
||||
-least that long.
|
||||
+returned. Otherwise the returned value is 0. This value is not computed when
|
||||
+PCRE2_NO_START_OPTIMIZE is set. The value is a number of characters, which in
|
||||
+UTF mode may be different from the number of code units. The third argument
|
||||
+should point to an <b>uint32_t</b> variable. The value is a lower bound to the
|
||||
+length of any matching string. There may not be any strings of that length that
|
||||
+do actually match, but every string that does match is at least that long.
|
||||
<pre>
|
||||
PCRE2_INFO_NAMECOUNT
|
||||
PCRE2_INFO_NAMEENTRYSIZE
|
||||
diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html
|
||||
index d66cee3..de699e7 100644
|
||||
--- a/doc/html/pcre2grep.html
|
||||
+++ b/doc/html/pcre2grep.html
|
||||
@@ -685,20 +685,32 @@ otherwise empty line. This option is mutually exclusive with <b>--output</b>,
|
||||
<P>
|
||||
<b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
|
||||
Show only the part of the line that matched the capturing parentheses of the
|
||||
-given number. Up to 32 capturing parentheses are supported, and -o0 is
|
||||
-equivalent to <b>-o</b> without a number. Because these options can be given
|
||||
-without an argument (see above), if an argument is present, it must be given in
|
||||
-the same shell item, for example, -o3 or --only-matching=2. The comments given
|
||||
-for the non-argument case above also apply to this option. If the specified
|
||||
-capturing parentheses do not exist in the pattern, or were not set in the
|
||||
-match, nothing is output unless the file name or line number are being output.
|
||||
+given number. Up to 50 capturing parentheses are supported by default. This
|
||||
+limit can be changed via the <b>--om-capture</b> option. A pattern may contain
|
||||
+any number of capturing parentheses, but only those whose number is within the
|
||||
+limit can be accessed by <b>-o</b>. An error occurs if the number specified by
|
||||
+<b>-o</b> is greater than the limit.
|
||||
+<br>
|
||||
+<br>
|
||||
+-o0 is the same as <b>-o</b> without a number. Because these options can be
|
||||
+given without an argument (see above), if an argument is present, it must be
|
||||
+given in the same shell item, for example, -o3 or --only-matching=2. The
|
||||
+comments given for the non-argument case above also apply to this option. If
|
||||
+the specified capturing parentheses do not exist in the pattern, or were not
|
||||
+set in the match, nothing is output unless the file name or line number are
|
||||
+being output.
|
||||
<br>
|
||||
<br>
|
||||
If this option is given multiple times, multiple substrings are output for each
|
||||
match, in the order the options are given, and all on one line. For example,
|
||||
-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and
|
||||
then 3 again to be output. By default, there is no separator (but see the next
|
||||
-option).
|
||||
+but one option).
|
||||
+</P>
|
||||
+<P>
|
||||
+<b>--om-capture</b>=<i>number</i>
|
||||
+Set the number of capturing parentheses that can be accessed by <b>-o</b>. The
|
||||
+default is 50.
|
||||
</P>
|
||||
<P>
|
||||
<b>--om-separator</b>=<i>text</i>
|
||||
diff --git a/doc/html/pcre2test.html b/doc/html/pcre2test.html
|
||||
index 083d5cc..4be47c6 100644
|
||||
--- a/doc/html/pcre2test.html
|
||||
+++ b/doc/html/pcre2test.html
|
||||
@@ -738,7 +738,9 @@ options, the line is omitted. "First code unit" is where any match must start;
|
||||
if there is more than one they are listed as "starting code units". "Last code
|
||||
unit" is the last literal code unit that must be present in any match. This is
|
||||
not necessarily the last character. These lines are omitted if no starting or
|
||||
-ending code units are recorded.
|
||||
+ending code units are recorded. The subject length line is omitted when
|
||||
+<b>no_start_optimize</b> is set because the minimum length is not calculated
|
||||
+when it can never be used.
|
||||
</P>
|
||||
<P>
|
||||
The <b>framesize</b> modifier shows the size, in bytes, of the storage frames
|
||||
diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1
|
||||
index 6b3219b..1dcdb68 100644
|
||||
--- a/doc/pcre2grep.1
|
||||
+++ b/doc/pcre2grep.1
|
||||
@@ -596,19 +596,29 @@ otherwise empty line. This option is mutually exclusive with \fB--output\fP,
|
||||
.TP
|
||||
\fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP
|
||||
Show only the part of the line that matched the capturing parentheses of the
|
||||
-given number. Up to 32 capturing parentheses are supported, and -o0 is
|
||||
-equivalent to \fB-o\fP without a number. Because these options can be given
|
||||
-without an argument (see above), if an argument is present, it must be given in
|
||||
-the same shell item, for example, -o3 or --only-matching=2. The comments given
|
||||
-for the non-argument case above also apply to this option. If the specified
|
||||
-capturing parentheses do not exist in the pattern, or were not set in the
|
||||
-match, nothing is output unless the file name or line number are being output.
|
||||
+given number. Up to 50 capturing parentheses are supported by default. This
|
||||
+limit can be changed via the \fB--om-capture\fP option. A pattern may contain
|
||||
+any number of capturing parentheses, but only those whose number is within the
|
||||
+limit can be accessed by \fB-o\fP. An error occurs if the number specified by
|
||||
+\fB-o\fP is greater than the limit.
|
||||
+.sp
|
||||
+-o0 is the same as \fB-o\fP without a number. Because these options can be
|
||||
+given without an argument (see above), if an argument is present, it must be
|
||||
+given in the same shell item, for example, -o3 or --only-matching=2. The
|
||||
+comments given for the non-argument case above also apply to this option. If
|
||||
+the specified capturing parentheses do not exist in the pattern, or were not
|
||||
+set in the match, nothing is output unless the file name or line number are
|
||||
+being output.
|
||||
.sp
|
||||
If this option is given multiple times, multiple substrings are output for each
|
||||
match, in the order the options are given, and all on one line. For example,
|
||||
-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and
|
||||
then 3 again to be output. By default, there is no separator (but see the next
|
||||
-option).
|
||||
+but one option).
|
||||
+.TP
|
||||
+\fB--om-capture\fP=\fInumber\fP
|
||||
+Set the number of capturing parentheses that can be accessed by \fB-o\fP. The
|
||||
+default is 50.
|
||||
.TP
|
||||
\fB--om-separator\fP=\fItext\fP
|
||||
Specify a separating string for multiple occurrences of \fB-o\fP. The default
|
||||
diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt
|
||||
index cd44fe0..2920643 100644
|
||||
--- a/doc/pcre2grep.txt
|
||||
+++ b/doc/pcre2grep.txt
|
||||
@@ -662,23 +662,32 @@ OPTIONS
|
||||
|
||||
-onumber, --only-matching=number
|
||||
Show only the part of the line that matched the capturing
|
||||
- parentheses of the given number. Up to 32 capturing parenthe-
|
||||
- ses are supported, and -o0 is equivalent to -o without a num-
|
||||
- ber. Because these options can be given without an argument
|
||||
- (see above), if an argument is present, it must be given in
|
||||
- the same shell item, for example, -o3 or --only-matching=2.
|
||||
- The comments given for the non-argument case above also apply
|
||||
- to this option. If the specified capturing parentheses do not
|
||||
- exist in the pattern, or were not set in the match, nothing
|
||||
- is output unless the file name or line number are being out-
|
||||
- put.
|
||||
-
|
||||
- If this option is given multiple times, multiple substrings
|
||||
- are output for each match, in the order the options are
|
||||
- given, and all on one line. For example, -o3 -o1 -o3 causes
|
||||
- the substrings matched by capturing parentheses 3 and 1 and
|
||||
- then 3 again to be output. By default, there is no separator
|
||||
- (but see the next option).
|
||||
+ parentheses of the given number. Up to 50 capturing parenthe-
|
||||
+ ses are supported by default. This limit can be changed via
|
||||
+ the --om-capture option. A pattern may contain any number of
|
||||
+ capturing parentheses, but only those whose number is within
|
||||
+ the limit can be accessed by -o. An error occurs if the num-
|
||||
+ ber specified by -o is greater than the limit.
|
||||
+
|
||||
+ -o0 is the same as -o without a number. Because these options
|
||||
+ can be given without an argument (see above), if an argument
|
||||
+ is present, it must be given in the same shell item, for
|
||||
+ example, -o3 or --only-matching=2. The comments given for the
|
||||
+ non-argument case above also apply to this option. If the
|
||||
+ specified capturing parentheses do not exist in the pattern,
|
||||
+ or were not set in the match, nothing is output unless the
|
||||
+ file name or line number are being output.
|
||||
+
|
||||
+ If this option is given multiple times, multiple substrings
|
||||
+ are output for each match, in the order the options are
|
||||
+ given, and all on one line. For example, -o3 -o1 -o3 causes
|
||||
+ the substrings matched by capturing parentheses 3 and 1 and
|
||||
+ then 3 again to be output. By default, there is no separator
|
||||
+ (but see the next but one option).
|
||||
+
|
||||
+ --om-capture=number
|
||||
+ Set the number of capturing parentheses that can be accessed
|
||||
+ by -o. The default is 50.
|
||||
|
||||
--om-separator=text
|
||||
Specify a separating string for multiple occurrences of -o.
|
||||
diff --git a/doc/pcre2test.txt b/doc/pcre2test.txt
|
||||
index cbe3528..f287f6d 100644
|
||||
--- a/doc/pcre2test.txt
|
||||
+++ b/doc/pcre2test.txt
|
||||
@@ -669,7 +669,9 @@ PATTERN MODIFIERS
|
||||
as "starting code units". "Last code unit" is the last literal code
|
||||
unit that must be present in any match. This is not necessarily the
|
||||
last character. These lines are omitted if no starting or ending code
|
||||
- units are recorded.
|
||||
+ units are recorded. The subject length line is omitted when
|
||||
+ no_start_optimize is set because the minimum length is not calculated
|
||||
+ when it can never be used.
|
||||
|
||||
The framesize modifier shows the size, in bytes, of the storage frames
|
||||
used by pcre2_match() for handling backtracking. The size depends on
|
||||
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
|
||||
index a3cc3ec..d17cd2a 100644
|
||||
--- a/src/pcre2grep.c
|
||||
+++ b/src/pcre2grep.c
|
||||
@@ -115,7 +115,7 @@ MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
|
||||
|
||||
typedef int BOOL;
|
||||
|
||||
-#define OFFSET_SIZE 33
|
||||
+#define DEFAULT_CAPTURE_MAX 50
|
||||
|
||||
#if BUFSIZ > 8192
|
||||
#define MAXPATLEN BUFSIZ
|
||||
@@ -242,6 +242,8 @@ static pcre2_compile_context *compile_context;
|
||||
static pcre2_match_context *match_context;
|
||||
static pcre2_match_data *match_data;
|
||||
static PCRE2_SIZE *offsets;
|
||||
+static uint32_t offset_size;
|
||||
+static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
|
||||
|
||||
static BOOL count_only = FALSE;
|
||||
static BOOL do_colour = FALSE;
|
||||
@@ -391,6 +393,7 @@ used to identify them. */
|
||||
#define N_INCLUDE_FROM (-21)
|
||||
#define N_OM_SEPARATOR (-22)
|
||||
#define N_MAX_BUFSIZE (-23)
|
||||
+#define N_OM_CAPTURE (-24)
|
||||
|
||||
static option_item optionlist[] = {
|
||||
{ OP_NODATA, N_NULL, NULL, "", "terminate options" },
|
||||
@@ -437,6 +440,7 @@ static option_item optionlist[] = {
|
||||
{ OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
|
||||
{ OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
|
||||
{ OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
|
||||
+ { OP_U32NUMBER, N_OM_CAPTURE, &capture_max, "om-capture=n", "set capture count for --only-matching" },
|
||||
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
|
||||
{ OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
|
||||
{ OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
|
||||
@@ -2568,7 +2572,7 @@ while (ptr < endptr)
|
||||
|
||||
for (i = 0; i < jfriedl_XR; i++)
|
||||
match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
|
||||
- PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
|
||||
+ PCRE2_NOTEMPTY, offsets, offset_size) >= 0);
|
||||
|
||||
if (gettimeofday(&end_time, &dummy) != 0)
|
||||
perror("bad gettimeofday");
|
||||
@@ -2688,7 +2692,7 @@ while (ptr < endptr)
|
||||
for (om = only_matching; om != NULL; om = om->next)
|
||||
{
|
||||
int n = om->groupnum;
|
||||
- if (n < mrc)
|
||||
+ if (n == 0 || n < mrc)
|
||||
{
|
||||
int plen = offsets[2*n + 1] - offsets[2*n];
|
||||
if (plen > 0)
|
||||
@@ -3639,6 +3643,7 @@ int rc = 1;
|
||||
BOOL only_one_at_top;
|
||||
patstr *cp;
|
||||
fnstr *fn;
|
||||
+omstr *om;
|
||||
const char *locale_from = "--locale";
|
||||
|
||||
#ifdef SUPPORT_PCRE2GREP_JIT
|
||||
@@ -3655,20 +3660,6 @@ must use STDOUT_NL to terminate lines. */
|
||||
_setmode(_fileno(stdout), _O_BINARY);
|
||||
#endif
|
||||
|
||||
-/* Set up a default compile and match contexts and a match data block. */
|
||||
-
|
||||
-compile_context = pcre2_compile_context_create(NULL);
|
||||
-match_context = pcre2_match_context_create(NULL);
|
||||
-match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
|
||||
-offsets = pcre2_get_ovector_pointer(match_data);
|
||||
-
|
||||
-/* If string (script) callouts are supported, set up the callout processing
|
||||
-function. */
|
||||
-
|
||||
-#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||
-pcre2_set_callout(match_context, pcre2grep_callout, NULL);
|
||||
-#endif
|
||||
-
|
||||
/* Process the options */
|
||||
|
||||
for (i = 1; i < argc; i++)
|
||||
@@ -4015,12 +4006,40 @@ if (only_matching_count > 1)
|
||||
pcre2grep_exit(usage(2));
|
||||
}
|
||||
|
||||
+/* Check that there is a big enough ovector for all -o settings. */
|
||||
+
|
||||
+for (om = only_matching; om != NULL; om = om->next)
|
||||
+ {
|
||||
+ int n = om->groupnum;
|
||||
+ if (n > (int)capture_max)
|
||||
+ {
|
||||
+ fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
|
||||
+ fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
|
||||
+ goto EXIT2;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* Check the text supplied to --output for errors. */
|
||||
|
||||
if (output_text != NULL &&
|
||||
!syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
|
||||
goto EXIT2;
|
||||
|
||||
+/* Set up default compile and match contexts and a match data block. */
|
||||
+
|
||||
+offset_size = capture_max + 1;
|
||||
+compile_context = pcre2_compile_context_create(NULL);
|
||||
+match_context = pcre2_match_context_create(NULL);
|
||||
+match_data = pcre2_match_data_create(offset_size, NULL);
|
||||
+offsets = pcre2_get_ovector_pointer(match_data);
|
||||
+
|
||||
+/* If string (script) callouts are supported, set up the callout processing
|
||||
+function. */
|
||||
+
|
||||
+#ifdef SUPPORT_PCRE2GREP_CALLOUT
|
||||
+pcre2_set_callout(match_context, pcre2grep_callout, NULL);
|
||||
+#endif
|
||||
+
|
||||
/* Put limits into the match data block. */
|
||||
|
||||
if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
|
||||
diff --git a/testdata/grepoutput b/testdata/grepoutput
|
||||
index 2bd69be..a9297e1 100644
|
||||
--- a/testdata/grepoutput
|
||||
+++ b/testdata/grepoutput
|
||||
@@ -949,3 +949,10 @@ RC=0
|
||||
---------------------------- Test 126 -----------------------------
|
||||
ABC |