10.34-RC1 bump
This commit is contained in:
		
							parent
							
								
									96360c9835
								
							
						
					
					
						commit
						db0a3cc7dd
					
				
							
								
								
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -16,3 +16,5 @@ | |||||||
| /pcre2-10.33-RC1.tar.bz2 | /pcre2-10.33-RC1.tar.bz2 | ||||||
| /pcre2-10.33.tar.bz2 | /pcre2-10.33.tar.bz2 | ||||||
| /pcre2-10.33.tar.bz2.sig | /pcre2-10.33.tar.bz2.sig | ||||||
|  | /pcre2-10.34-RC1.tar.bz2 | ||||||
|  | /pcre2-10.34-RC1.tar.bz2.sig | ||||||
|  | |||||||
| @ -1,39 +0,0 @@ | |||||||
| From bc7fb8964ca3a422f472189b0eff751c1cc377b0 Mon Sep 17 00:00:00 2001 |  | ||||||
| From: zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Mon, 9 Sep 2019 07:12:00 +0000 |  | ||||||
| Subject: [PATCH] Add underflow check in JIT. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1167 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| ---
 |  | ||||||
|  src/pcre2_jit_compile.c | 8 ++++++-- |  | ||||||
| 
 |  | ||||||
| diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
 |  | ||||||
| index 8cbd8f9..79a27fd 100644
 |  | ||||||
| --- a/src/pcre2_jit_compile.c
 |  | ||||||
| +++ b/src/pcre2_jit_compile.c
 |  | ||||||
| @@ -5793,12 +5793,16 @@ if (common->match_end_ptr != 0)
 |  | ||||||
|    { |  | ||||||
|    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |  | ||||||
|    OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |  | ||||||
| -  OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
 |  | ||||||
| +  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
 |  | ||||||
| +  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
 |  | ||||||
|    OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); |  | ||||||
|    CMOV(SLJIT_GREATER, STR_END, TMP1, 0); |  | ||||||
|    } |  | ||||||
|  else |  | ||||||
| -  OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
 |  | ||||||
| +  {
 |  | ||||||
| +  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
 |  | ||||||
| +  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
 |  | ||||||
| +  }
 |  | ||||||
|   |  | ||||||
|  SLJIT_ASSERT(range_right >= 0); |  | ||||||
|   |  | ||||||
| -- 
 |  | ||||||
| 2.21.0 |  | ||||||
| 
 |  | ||||||
| @ -1,54 +0,0 @@ | |||||||
| From bcf39c1828399ebc33fb92c4edaf2bdd5f891a58 Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Fri, 5 Jul 2019 15:49:37 +0000 |  | ||||||
| Subject: [PATCH] Additional overflow test. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1127 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| 
 |  | ||||||
| Signed-off-by: Petr Písař <ppisar@redhat.com> |  | ||||||
| ---
 |  | ||||||
|  testdata/testinput2  | 4 ++++ |  | ||||||
|  testdata/testoutput2 | 5 +++++ |  | ||||||
|  2 files changed, 9 insertions(+) |  | ||||||
| 
 |  | ||||||
| diff --git a/testdata/testinput2 b/testdata/testinput2
 |  | ||||||
| index 079d6d8..9412bf6 100644
 |  | ||||||
| --- a/testdata/testinput2
 |  | ||||||
| +++ b/testdata/testinput2
 |  | ||||||
| @@ -5591,6 +5591,10 @@ a)"xI
 |  | ||||||
|   |  | ||||||
|  /\[()]{65535}(?<A>)/expand |  | ||||||
|   |  | ||||||
| +# Addition overflow
 |  | ||||||
|  /( {32742} {42})(?<!\1{65481})/ |  | ||||||
|   |  | ||||||
| +# Multiplication overflow
 |  | ||||||
| +/(X{65535})(?<=\1{32770})/
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput2 |  | ||||||
| diff --git a/testdata/testoutput2 b/testdata/testoutput2
 |  | ||||||
| index bfe61a3..950095f 100644
 |  | ||||||
| --- a/testdata/testoutput2
 |  | ||||||
| +++ b/testdata/testoutput2
 |  | ||||||
| @@ -16940,9 +16940,14 @@ Failed: error 197 at offset 131071: too many capturing groups (maximum 65535)
 |  | ||||||
|  /\[()]{65535}(?<A>)/expand |  | ||||||
|  Failed: error 197 at offset 131075: too many capturing groups (maximum 65535) |  | ||||||
|   |  | ||||||
| +# Addition overflow
 |  | ||||||
|  /( {32742} {42})(?<!\1{65481})/ |  | ||||||
|  Failed: error 187 at offset 15: lookbehind assertion is too long |  | ||||||
|   |  | ||||||
| +# Multiplication overflow
 |  | ||||||
| +/(X{65535})(?<=\1{32770})/
 |  | ||||||
| +Failed: error 187 at offset 10: lookbehind assertion is too long
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput2 |  | ||||||
|  Error -70: PCRE2_ERROR_BADDATA (unknown error number) |  | ||||||
|  Error -62: bad serialized data |  | ||||||
| -- 
 |  | ||||||
| 2.20.1 |  | ||||||
| 
 |  | ||||||
| @ -1,108 +0,0 @@ | |||||||
| From cdefe642dc2e6b5b8e6703773934813f317bc488 Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Thu, 4 Jul 2019 17:01:53 +0000 |  | ||||||
| Subject: [PATCH] Check for integer overflow when computing lookbehind lengths. |  | ||||||
|  Fixes Clusterfuzz issue 13656. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1126 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| 
 |  | ||||||
| Signed-off-by: Petr Písař <ppisar@redhat.com> |  | ||||||
| ---
 |  | ||||||
|  src/pcre2_compile.c  | 38 ++++++++++++++++++++++++++++---------- |  | ||||||
|  testdata/testinput2  |  2 ++ |  | ||||||
|  testdata/testoutput2 |  3 +++ |  | ||||||
|  3 files changed, 33 insertions(+), 10 deletions(-) |  | ||||||
| 
 |  | ||||||
| diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
 |  | ||||||
| index c82c6ca..f6e0a0b 100644
 |  | ||||||
| --- a/src/pcre2_compile.c
 |  | ||||||
| +++ b/src/pcre2_compile.c
 |  | ||||||
| @@ -9197,8 +9197,26 @@ for (;; pptr++)
 |  | ||||||
|      case META_MINMAX_QUERY: |  | ||||||
|      if (pptr[1] == pptr[2]) |  | ||||||
|        { |  | ||||||
| -      if (pptr[1] == 0) branchlength -= lastitemlength;
 |  | ||||||
| -        else itemlength = (pptr[1] - 1) * lastitemlength;
 |  | ||||||
| +      switch(pptr[1])
 |  | ||||||
| +        {
 |  | ||||||
| +        case 0:
 |  | ||||||
| +        branchlength -= lastitemlength;
 |  | ||||||
| +        break;
 |  | ||||||
| +
 |  | ||||||
| +        case 1:
 |  | ||||||
| +        itemlength = 0;
 |  | ||||||
| +        break;
 |  | ||||||
| +
 |  | ||||||
| +        default:  /* Check for integer overflow */
 |  | ||||||
| +        if (lastitemlength != 0 &&  /* Should not occur, but just in case */
 |  | ||||||
| +            INT_MAX/lastitemlength < pptr[1] - 1)
 |  | ||||||
| +          {
 |  | ||||||
| +          *errcodeptr = ERR87;  /* Integer overflow; lookbehind too big */
 |  | ||||||
| +          return -1;
 |  | ||||||
| +          }
 |  | ||||||
| +        itemlength = (pptr[1] - 1) * lastitemlength;
 |  | ||||||
| +        break;
 |  | ||||||
| +        }
 |  | ||||||
|        pptr += 2; |  | ||||||
|        break; |  | ||||||
|        } |  | ||||||
| @@ -9212,19 +9230,19 @@ for (;; pptr++)
 |  | ||||||
|      return -1; |  | ||||||
|      } |  | ||||||
|   |  | ||||||
| -  /* Add the item length to the branchlength, and save it for use if the next
 |  | ||||||
| -  thing is a quantifier. */
 |  | ||||||
| -
 |  | ||||||
| -  branchlength += itemlength;
 |  | ||||||
| -  lastitemlength = itemlength;
 |  | ||||||
| -
 |  | ||||||
| -  /* Ensure that the length does not overflow the limit. */
 |  | ||||||
| +  /* Add the item length to the branchlength, checking for integer overflow and
 |  | ||||||
| +  for the branch length exceeding the limit. */
 |  | ||||||
|   |  | ||||||
| -  if (branchlength > LOOKBEHIND_MAX)
 |  | ||||||
| +  if (INT_MAX - branchlength < (int)itemlength ||
 |  | ||||||
| +      (branchlength += itemlength) > LOOKBEHIND_MAX)
 |  | ||||||
|      { |  | ||||||
|      *errcodeptr = ERR87; |  | ||||||
|      return -1; |  | ||||||
|      } |  | ||||||
| +
 |  | ||||||
| +  /* Save this item length for use if the next item is a quantifier. */
 |  | ||||||
| +
 |  | ||||||
| +  lastitemlength = itemlength;
 |  | ||||||
|    } |  | ||||||
|   |  | ||||||
|  EXIT: |  | ||||||
| diff --git a/testdata/testinput2 b/testdata/testinput2
 |  | ||||||
| index 8a98f94..079d6d8 100644
 |  | ||||||
| --- a/testdata/testinput2
 |  | ||||||
| +++ b/testdata/testinput2
 |  | ||||||
| @@ -5591,4 +5591,6 @@ a)"xI
 |  | ||||||
|   |  | ||||||
|  /\[()]{65535}(?<A>)/expand |  | ||||||
|   |  | ||||||
| +/( {32742} {42})(?<!\1{65481})/
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput2 |  | ||||||
| diff --git a/testdata/testoutput2 b/testdata/testoutput2
 |  | ||||||
| index 158fbad..bfe61a3 100644
 |  | ||||||
| --- a/testdata/testoutput2
 |  | ||||||
| +++ b/testdata/testoutput2
 |  | ||||||
| @@ -16940,6 +16940,9 @@ Failed: error 197 at offset 131071: too many capturing groups (maximum 65535)
 |  | ||||||
|  /\[()]{65535}(?<A>)/expand |  | ||||||
|  Failed: error 197 at offset 131075: too many capturing groups (maximum 65535) |  | ||||||
|   |  | ||||||
| +/( {32742} {42})(?<!\1{65481})/
 |  | ||||||
| +Failed: error 187 at offset 15: lookbehind assertion is too long
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput2 |  | ||||||
|  Error -70: PCRE2_ERROR_BADDATA (unknown error number) |  | ||||||
|  Error -62: bad serialized data |  | ||||||
| -- 
 |  | ||||||
| 2.20.1 |  | ||||||
| 
 |  | ||||||
| @ -1,160 +0,0 @@ | |||||||
| From 76d59bdbc2d30bad1d11e0490b767058dc33d39c Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Wed, 19 Jun 2019 16:27:50 +0000 |  | ||||||
| Subject: [PATCH] Don't ignore {1}+ when it is applied to a parenthesized item. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1111 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| 
 |  | ||||||
| Signed-off-by: Petr Písař <ppisar@redhat.com> |  | ||||||
| ---
 |  | ||||||
|  src/pcre2_compile.c  | 29 +++++++++++++++++++---------- |  | ||||||
|  testdata/testinput1  | 14 ++++++++++++++ |  | ||||||
|  testdata/testoutput1 | 18 ++++++++++++++++++ |  | ||||||
|  3 files changed, 51 insertions(+), 10 deletions(-) |  | ||||||
| 
 |  | ||||||
| diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
 |  | ||||||
| index cd6fbea..c82c6ca 100644
 |  | ||||||
| --- a/src/pcre2_compile.c
 |  | ||||||
| +++ b/src/pcre2_compile.c
 |  | ||||||
| @@ -6723,10 +6723,6 @@ for (;; pptr++)
 |  | ||||||
|      reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; |  | ||||||
|      op_type = 0; |  | ||||||
|   |  | ||||||
| -    /* If the repeat is {1} we can ignore it. */
 |  | ||||||
| -
 |  | ||||||
| -    if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
 |  | ||||||
| -
 |  | ||||||
|      /* Adjust first and required code units for a zero repeat. */ |  | ||||||
|   |  | ||||||
|      if (repeat_min == 0) |  | ||||||
| @@ -6769,7 +6765,10 @@ for (;; pptr++)
 |  | ||||||
|      tempcode = previous; |  | ||||||
|      op_previous = *previous; |  | ||||||
|   |  | ||||||
| -    /* Now handle repetition for the different types of item. */
 |  | ||||||
| +    /* Now handle repetition for the different types of item. If the repeat
 |  | ||||||
| +    minimum and the repeat maximum are both 1, we can ignore the quantifier for
 |  | ||||||
| +    non-parenthesized items, as they have only one alternative. For anything in
 |  | ||||||
| +    parentheses, we must not ignore if {1} is possessive. */
 |  | ||||||
|   |  | ||||||
|      switch (op_previous) |  | ||||||
|        { |  | ||||||
| @@ -6783,6 +6782,7 @@ for (;; pptr++)
 |  | ||||||
|        case OP_CHARI: |  | ||||||
|        case OP_NOT: |  | ||||||
|        case OP_NOTI: |  | ||||||
| +      if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
 |  | ||||||
|        op_type = chartypeoffset[op_previous - OP_CHAR]; |  | ||||||
|   |  | ||||||
|        /* Deal with UTF characters that take up more than one code unit. */ |  | ||||||
| @@ -6829,6 +6829,7 @@ for (;; pptr++)
 |  | ||||||
|          code = previous; |  | ||||||
|          goto END_REPEAT; |  | ||||||
|          } |  | ||||||
| +      if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
 |  | ||||||
|   |  | ||||||
|        if (repeat_min == 0 && repeat_max == REPEAT_UNLIMITED) |  | ||||||
|          *code++ = OP_CRSTAR + repeat_type; |  | ||||||
| @@ -6863,6 +6864,8 @@ for (;; pptr++)
 |  | ||||||
|        repetition. */ |  | ||||||
|   |  | ||||||
|        case OP_RECURSE: |  | ||||||
| +      if (repeat_max == 1 && repeat_min == 1 && !possessive_quantifier)
 |  | ||||||
| +        goto END_REPEAT;
 |  | ||||||
|   |  | ||||||
|        /* Generate unwrapped repeats for a non-zero minimum, except when the |  | ||||||
|        minimum is 1 and the maximum unlimited, because that can be handled with |  | ||||||
| @@ -6945,6 +6948,9 @@ for (;; pptr++)
 |  | ||||||
|          PCRE2_UCHAR *bralink = NULL; |  | ||||||
|          PCRE2_UCHAR *brazeroptr = NULL; |  | ||||||
|   |  | ||||||
| +        if (repeat_max == 1 && repeat_min == 1 && !possessive_quantifier)
 |  | ||||||
| +          goto END_REPEAT;
 |  | ||||||
| +
 |  | ||||||
|          /* Repeating a DEFINE group (or any group where the condition is always |  | ||||||
|          FALSE and there is only one branch) is pointless, but Perl allows the |  | ||||||
|          syntax, so we just ignore the repeat. */ |  | ||||||
| @@ -7161,11 +7167,12 @@ for (;; pptr++)
 |  | ||||||
|          and SCRIPT_RUN groups at runtime, but in a different way.] |  | ||||||
|   |  | ||||||
|          Then, if the quantifier was possessive and the bracket is not a |  | ||||||
| -        conditional, we convert the BRA code to the POS form, and the KET code to
 |  | ||||||
| -        KETRPOS. (It turns out to be convenient at runtime to detect this kind of
 |  | ||||||
| -        subpattern at both the start and at the end.) The use of special opcodes
 |  | ||||||
| -        makes it possible to reduce greatly the stack usage in pcre2_match(). If
 |  | ||||||
| -        the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
 |  | ||||||
| +        conditional, we convert the BRA code to the POS form, and the KET code
 |  | ||||||
| +        to KETRPOS. (It turns out to be convenient at runtime to detect this
 |  | ||||||
| +        kind of subpattern at both the start and at the end.) The use of
 |  | ||||||
| +        special opcodes makes it possible to reduce greatly the stack usage in
 |  | ||||||
| +        pcre2_match(). If the group is preceded by OP_BRAZERO, convert this to
 |  | ||||||
| +        OP_BRAPOSZERO.
 |  | ||||||
|   |  | ||||||
|          Then, if the minimum number of matches is 1 or 0, cancel the possessive |  | ||||||
|          flag so that the default action below, of wrapping everything inside |  | ||||||
| @@ -7266,6 +7273,8 @@ for (;; pptr++)
 |  | ||||||
|          int prop_type, prop_value; |  | ||||||
|          PCRE2_UCHAR *oldcode; |  | ||||||
|   |  | ||||||
| +        if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
 |  | ||||||
| +
 |  | ||||||
|          op_type = OP_TYPESTAR - OP_STAR;      /* Use type opcodes */ |  | ||||||
|          mclength = 0;                         /* Not a character */ |  | ||||||
|   |  | ||||||
| diff --git a/testdata/testinput1 b/testdata/testinput1
 |  | ||||||
| index 7b6918a..4d9ec5a 100644
 |  | ||||||
| --- a/testdata/testinput1
 |  | ||||||
| +++ b/testdata/testinput1
 |  | ||||||
| @@ -6351,4 +6351,18 @@ ef) x/x,mark
 |  | ||||||
|      acb |  | ||||||
|      abc      |  | ||||||
|   |  | ||||||
| +/(?:a|ab){1}+c/
 |  | ||||||
| +\= Expect no match
 |  | ||||||
| +    abc
 |  | ||||||
| +
 |  | ||||||
| +/(a|ab){1}+c/
 |  | ||||||
| +    abc
 |  | ||||||
| +    
 |  | ||||||
| +/(a+){1}+a/
 |  | ||||||
| +\= Expect no match
 |  | ||||||
| +    aaaa
 |  | ||||||
| +
 |  | ||||||
| +/(?(DEFINE)(a|ab))(?1){1}+c/
 |  | ||||||
| +    abc    
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput1  |  | ||||||
| diff --git a/testdata/testoutput1 b/testdata/testoutput1
 |  | ||||||
| index d9f8c3b..fffb8ec 100644
 |  | ||||||
| --- a/testdata/testoutput1
 |  | ||||||
| +++ b/testdata/testoutput1
 |  | ||||||
| @@ -10063,4 +10063,22 @@ MK: 2
 |  | ||||||
|   0: a |  | ||||||
|  MK: 2 |  | ||||||
|   |  | ||||||
| +/(?:a|ab){1}+c/
 |  | ||||||
| +\= Expect no match
 |  | ||||||
| +    abc
 |  | ||||||
| +No match
 |  | ||||||
| +
 |  | ||||||
| +/(a|ab){1}+c/
 |  | ||||||
| +    abc
 |  | ||||||
| +No match
 |  | ||||||
| +    
 |  | ||||||
| +/(a+){1}+a/
 |  | ||||||
| +\= Expect no match
 |  | ||||||
| +    aaaa
 |  | ||||||
| +No match
 |  | ||||||
| +
 |  | ||||||
| +/(?(DEFINE)(a|ab))(?1){1}+c/
 |  | ||||||
| +    abc    
 |  | ||||||
| +No match
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput1  |  | ||||||
| -- 
 |  | ||||||
| 2.20.1 |  | ||||||
| 
 |  | ||||||
| @ -1,37 +0,0 @@ | |||||||
| From 9835bbc7fbb3423163dc49e7d822dad2b135e192 Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Tue, 15 Oct 2019 10:46:36 +0000 |  | ||||||
| Subject: [PATCH] Ensure regexec is thread safe to avoid sanitizer warnings. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1176 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| ---
 |  | ||||||
|  src/pcre2posix.c | 3 +-- |  | ||||||
| 
 |  | ||||||
| diff --git a/src/pcre2posix.c b/src/pcre2posix.c
 |  | ||||||
| index 34a8d80..b24620a 100644
 |  | ||||||
| --- a/src/pcre2posix.c
 |  | ||||||
| +++ b/src/pcre2posix.c
 |  | ||||||
| @@ -323,6 +323,7 @@ if (preg->re_pcre2_code == NULL)
 |  | ||||||
|    PCRE2_INFO_CAPTURECOUNT, &re_nsub); |  | ||||||
|  preg->re_nsub = (size_t)re_nsub; |  | ||||||
|  preg->re_match_data = pcre2_match_data_create(re_nsub + 1, NULL); |  | ||||||
| +preg->re_erroffset = (size_t)(-1);  /* No meaning after successful compile */
 |  | ||||||
|   |  | ||||||
|  if (preg->re_match_data == NULL) |  | ||||||
|    { |  | ||||||
| @@ -356,8 +357,6 @@ if ((eflags & REG_NOTBOL) != 0) options |= PCRE2_NOTBOL;
 |  | ||||||
|  if ((eflags & REG_NOTEOL) != 0) options |= PCRE2_NOTEOL; |  | ||||||
|  if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE2_NOTEMPTY; |  | ||||||
|   |  | ||||||
| -((regex_t *)preg)->re_erroffset = (size_t)(-1);  /* Only has meaning after compile */
 |  | ||||||
| -
 |  | ||||||
|  /* When REG_NOSUB was specified, or if no vector has been passed in which to |  | ||||||
|  put captured strings, ensure that nmatch is zero. This will stop any attempt to |  | ||||||
|  write to pmatch. */ |  | ||||||
| -- 
 |  | ||||||
| 2.21.0 |  | ||||||
| 
 |  | ||||||
| @ -1,89 +0,0 @@ | |||||||
| From ec098f6b898334be0674dbadc9fd67a0532fa0eb Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Sat, 10 Aug 2019 11:34:50 +0000 |  | ||||||
| Subject: [PATCH] Fix allusedtext bug, rightmost consulted character incorrect |  | ||||||
|  in negative lookaheads. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1157 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| ---
 |  | ||||||
|  src/pcre2_match.c     | 1 + |  | ||||||
|  testdata/testinput15  | 4 ++++ |  | ||||||
|  testdata/testinput2   | 2 +- |  | ||||||
|  testdata/testoutput15 | 8 ++++++++ |  | ||||||
|  testdata/testoutput2  | 2 +- |  | ||||||
| 
 |  | ||||||
| diff --git a/src/pcre2_match.c b/src/pcre2_match.c
 |  | ||||||
| index 26fc01e..4471183 100644
 |  | ||||||
| --- a/src/pcre2_match.c
 |  | ||||||
| +++ b/src/pcre2_match.c
 |  | ||||||
| @@ -5971,6 +5971,7 @@ in rrc. */
 |  | ||||||
|  #define LBL(val) case val: goto L_RM##val; |  | ||||||
|   |  | ||||||
|  RETURN_SWITCH: |  | ||||||
| +if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
 |  | ||||||
|  if (Frdepth == 0) return rrc;                     /* Exit from the top level */ |  | ||||||
|  F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */ |  | ||||||
|  mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */ |  | ||||||
| diff --git a/testdata/testinput15 b/testdata/testinput15
 |  | ||||||
| index 2cb712d..5dd6897 100644
 |  | ||||||
| --- a/testdata/testinput15
 |  | ||||||
| +++ b/testdata/testinput15
 |  | ||||||
| @@ -231,4 +231,8 @@
 |  | ||||||
|  /(*LIMIT_HEAP=21)\[(a)]{60}/expand |  | ||||||
|      \[a]{60} |  | ||||||
|   |  | ||||||
| +/b(?<!ax)(?!cx)/allusedtext
 |  | ||||||
| +    abc
 |  | ||||||
| +    abcz
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput15 |  | ||||||
| diff --git a/testdata/testinput2 b/testdata/testinput2
 |  | ||||||
| index 4377f80..7b44fb0 100644
 |  | ||||||
| --- a/testdata/testinput2
 |  | ||||||
| +++ b/testdata/testinput2
 |  | ||||||
| @@ -4584,7 +4584,7 @@ B)x/alt_verbnames,mark
 |  | ||||||
|   |  | ||||||
|  /abcd/null_context |  | ||||||
|      abcd\=null_context |  | ||||||
| -\= Expect error
 |  | ||||||
| +\= Expect error - not allowed together
 |  | ||||||
|      abcd\=null_context,find_limits |  | ||||||
|      abcd\=allusedtext,startchar |  | ||||||
|   |  | ||||||
| diff --git a/testdata/testoutput15 b/testdata/testoutput15
 |  | ||||||
| index c51cda7..d854412 100644
 |  | ||||||
| --- a/testdata/testoutput15
 |  | ||||||
| +++ b/testdata/testoutput15
 |  | ||||||
| @@ -525,4 +525,12 @@ No match
 |  | ||||||
|      \[a]{60} |  | ||||||
|  Failed: error -63: heap limit exceeded |  | ||||||
|   |  | ||||||
| +/b(?<!ax)(?!cx)/allusedtext
 |  | ||||||
| +    abc
 |  | ||||||
| + 0: abc
 |  | ||||||
| +    < >
 |  | ||||||
| +    abcz
 |  | ||||||
| + 0: abcz
 |  | ||||||
| +    < >>
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput15 |  | ||||||
| diff --git a/testdata/testoutput2 b/testdata/testoutput2
 |  | ||||||
| index 0fd2187..0803d9e 100644
 |  | ||||||
| --- a/testdata/testoutput2
 |  | ||||||
| +++ b/testdata/testoutput2
 |  | ||||||
| @@ -14803,7 +14803,7 @@ No match
 |  | ||||||
|  /abcd/null_context |  | ||||||
|      abcd\=null_context |  | ||||||
|   0: abcd |  | ||||||
| -\= Expect error
 |  | ||||||
| +\= Expect error - not allowed together
 |  | ||||||
|      abcd\=null_context,find_limits |  | ||||||
|  ** Not allowed together: find_limits null_context |  | ||||||
|      abcd\=allusedtext,startchar |  | ||||||
| -- 
 |  | ||||||
| 2.21.0 |  | ||||||
| 
 |  | ||||||
| @ -1,114 +0,0 @@ | |||||||
| From 007b635b6788f8317747842b02f9c85137277c20 Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Thu, 18 Jul 2019 17:20:29 +0000 |  | ||||||
| Subject: [PATCH] Fix bug in recent patch for lookbehinds within lookaheads. |  | ||||||
|  Fixes ClusterFuzz 15933. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1138 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| 
 |  | ||||||
| Signed-off-by: Petr Písař <ppisar@redhat.com> |  | ||||||
| ---
 |  | ||||||
|  src/pcre2_compile.c  | 22 +++++++++++++--------- |  | ||||||
|  testdata/testinput2  |  3 +++ |  | ||||||
|  testdata/testoutput2 |  4 ++++ |  | ||||||
|  3 files changed, 20 insertions(+), 9 deletions(-) |  | ||||||
| 
 |  | ||||||
| diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
 |  | ||||||
| index 2ae95ed..b68c154 100644
 |  | ||||||
| --- a/src/pcre2_compile.c
 |  | ||||||
| +++ b/src/pcre2_compile.c
 |  | ||||||
| @@ -136,7 +136,8 @@ static BOOL
 |  | ||||||
|      compile_block *); |  | ||||||
|   |  | ||||||
|  static int |  | ||||||
| -  check_lookbehinds(uint32_t *, uint32_t **, compile_block *);
 |  | ||||||
| +  check_lookbehinds(uint32_t *, uint32_t **, parsed_recurse_check *,
 |  | ||||||
| +    compile_block *);
 |  | ||||||
|   |  | ||||||
|   |  | ||||||
|  /************************************************* |  | ||||||
| @@ -9004,7 +9005,7 @@ for (;; pptr++)
 |  | ||||||
|   |  | ||||||
|      case META_LOOKAHEAD: |  | ||||||
|      case META_LOOKAHEADNOT: |  | ||||||
| -    *errcodeptr = check_lookbehinds(pptr + 1, &pptr, cb);
 |  | ||||||
| +    *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb);
 |  | ||||||
|      if (*errcodeptr != 0) return -1; |  | ||||||
|   |  | ||||||
|      /* Ignore any qualifiers that follow a lookahead assertion. */ |  | ||||||
| @@ -9326,15 +9327,17 @@ order to process any lookbehinds that they may contain. It stops when it hits a
 |  | ||||||
|  non-nested closing parenthesis in this case, returning a pointer to it. |  | ||||||
|   |  | ||||||
|  Arguments |  | ||||||
| -  pptr    points to where to start (start of pattern or start of lookahead)
 |  | ||||||
| -  retptr  if not NULL, return the ket pointer here
 |  | ||||||
| -  cb      points to the compile block
 |  | ||||||
| +  pptr      points to where to start (start of pattern or start of lookahead)
 |  | ||||||
| +  retptr    if not NULL, return the ket pointer here
 |  | ||||||
| +  recurses  chain of recurse_check to catch mutual recursion 
 |  | ||||||
| +  cb        points to the compile block
 |  | ||||||
|   |  | ||||||
| -Returns:  0 on success, or an errorcode (cb->erroroffset will be set)
 |  | ||||||
| +Returns:    0 on success, or an errorcode (cb->erroroffset will be set)
 |  | ||||||
|  */ |  | ||||||
|   |  | ||||||
|  static int |  | ||||||
| -check_lookbehinds(uint32_t *pptr, uint32_t **retptr, compile_block *cb)
 |  | ||||||
| +check_lookbehinds(uint32_t *pptr, uint32_t **retptr, 
 |  | ||||||
| +  parsed_recurse_check *recurses, compile_block *cb)
 |  | ||||||
|  { |  | ||||||
|  int errorcode = 0; |  | ||||||
|  int loopcount = 0; |  | ||||||
| @@ -9449,7 +9452,8 @@ for (; *pptr != META_END; pptr++)
 |  | ||||||
|   |  | ||||||
|      case META_LOOKBEHIND: |  | ||||||
|      case META_LOOKBEHINDNOT: |  | ||||||
| -    if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount, NULL, cb))
 |  | ||||||
| +    if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount,
 |  | ||||||
| +         recurses, cb))
 |  | ||||||
|        return errorcode; |  | ||||||
|      break; |  | ||||||
|      } |  | ||||||
| @@ -9899,7 +9903,7 @@ lengths. */
 |  | ||||||
|   |  | ||||||
|  if (has_lookbehind) |  | ||||||
|    { |  | ||||||
| -  errorcode = check_lookbehinds(cb.parsed_pattern, NULL, &cb);
 |  | ||||||
| +  errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb);
 |  | ||||||
|    if (errorcode != 0) goto HAD_CB_ERROR; |  | ||||||
|    } |  | ||||||
|   |  | ||||||
| diff --git a/testdata/testinput2 b/testdata/testinput2
 |  | ||||||
| index d85fc5f..1bfe591 100644
 |  | ||||||
| --- a/testdata/testinput2
 |  | ||||||
| +++ b/testdata/testinput2
 |  | ||||||
| @@ -5600,4 +5600,7 @@ a)"xI
 |  | ||||||
|  /(?<=(?=.(?<=x)))/ |  | ||||||
|      ab\=ph |  | ||||||
|   |  | ||||||
| +# Expect error (recursion => not fixed length)
 |  | ||||||
| +/(\2)((?=(?<=\1)))/
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput2 |  | ||||||
| diff --git a/testdata/testoutput2 b/testdata/testoutput2
 |  | ||||||
| index 6405e26..758b4db 100644
 |  | ||||||
| --- a/testdata/testoutput2
 |  | ||||||
| +++ b/testdata/testoutput2
 |  | ||||||
| @@ -16952,6 +16952,10 @@ Failed: error 187 at offset 10: lookbehind assertion is too long
 |  | ||||||
|      ab\=ph |  | ||||||
|  No match |  | ||||||
|   |  | ||||||
| +# Expect error (recursion => not fixed length)
 |  | ||||||
| +/(\2)((?=(?<=\1)))/
 |  | ||||||
| +Failed: error 125 at offset 8: lookbehind assertion is not fixed length
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput2 |  | ||||||
|  Error -70: PCRE2_ERROR_BADDATA (unknown error number) |  | ||||||
|  Error -62: bad serialized data |  | ||||||
| -- 
 |  | ||||||
| 2.20.1 |  | ||||||
| 
 |  | ||||||
| @ -1,210 +0,0 @@ | |||||||
| From 8a5ce4c055808fd3a19b0da15e0e3caeb0ac3abb Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Mon, 26 Aug 2019 16:28:26 +0000 |  | ||||||
| Subject: [PATCH] Fix bug introduced in commit 1133. Lookbehinds that follow a |  | ||||||
|  condition were not always properly handled. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1160 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| 
 |  | ||||||
| Signed-off-by: Petr Písař <ppisar@redhat.com> |  | ||||||
| ---
 |  | ||||||
|  src/pcre2_compile.c  | 12 +++++- |  | ||||||
|  testdata/testinput1  |  3 ++ |  | ||||||
|  testdata/testinput2  | 10 +++++ |  | ||||||
|  testdata/testoutput1 |  6 +++ |  | ||||||
|  testdata/testoutput2 | 93 ++++++++++++++++++++++++++++++++++++++++++++ |  | ||||||
|  5 files changed, 122 insertions(+), 2 deletions(-) |  | ||||||
| 
 |  | ||||||
| diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
 |  | ||||||
| index b68c154..5cae730 100644
 |  | ||||||
| --- a/src/pcre2_compile.c
 |  | ||||||
| +++ b/src/pcre2_compile.c
 |  | ||||||
| @@ -9411,13 +9411,22 @@ for (; *pptr != META_END; pptr++)
 |  | ||||||
|      break; |  | ||||||
|   |  | ||||||
|      case META_BACKREF_BYNAME: |  | ||||||
| +    case META_RECURSE_BYNAME:
 |  | ||||||
| +    pptr += 1 + SIZEOFFSET;
 |  | ||||||
| +    break;
 |  | ||||||
| +
 |  | ||||||
|      case META_COND_DEFINE: |  | ||||||
|      case META_COND_NAME: |  | ||||||
|      case META_COND_NUMBER: |  | ||||||
|      case META_COND_RNAME: |  | ||||||
|      case META_COND_RNUMBER: |  | ||||||
| -    case META_RECURSE_BYNAME:
 |  | ||||||
|      pptr += 1 + SIZEOFFSET; |  | ||||||
| +    nestlevel++;
 |  | ||||||
| +    break;
 |  | ||||||
| +
 |  | ||||||
| +    case META_COND_VERSION:
 |  | ||||||
| +    pptr += 3;
 |  | ||||||
| +    nestlevel++;
 |  | ||||||
|      break; |  | ||||||
|   |  | ||||||
|      case META_CALLOUT_STRING: |  | ||||||
| @@ -9438,7 +9447,6 @@ for (; *pptr != META_END; pptr++)
 |  | ||||||
|      break; |  | ||||||
|   |  | ||||||
|      case META_CALLOUT_NUMBER: |  | ||||||
| -    case META_COND_VERSION:
 |  | ||||||
|      pptr += 3; |  | ||||||
|      break; |  | ||||||
|   |  | ||||||
| diff --git a/testdata/testinput1 b/testdata/testinput1
 |  | ||||||
| index ee9354b..db1706b 100644
 |  | ||||||
| --- a/testdata/testinput1
 |  | ||||||
| +++ b/testdata/testinput1
 |  | ||||||
| @@ -6371,4 +6371,7 @@ ef) x/x,mark
 |  | ||||||
|  /(?<=(?=(?<=a)))b/ |  | ||||||
|      ab |  | ||||||
|   |  | ||||||
| +/^(?<A>a)(?(<A>)b)((?<=b).*)$/
 |  | ||||||
| +    abc
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput1  |  | ||||||
| diff --git a/testdata/testinput2 b/testdata/testinput2
 |  | ||||||
| index 2b1aced..a5f59b9 100644
 |  | ||||||
| --- a/testdata/testinput2
 |  | ||||||
| +++ b/testdata/testinput2
 |  | ||||||
| @@ -5611,4 +5611,14 @@ a)"xI
 |  | ||||||
|  /\A\s*((?:[^`]{28500}){4}|a)/I |  | ||||||
|      a |  | ||||||
|   |  | ||||||
| +/(?<A>a)(?(<A>)b)((?<=b).*)/B
 |  | ||||||
| +
 |  | ||||||
| +/(?(1)b)((?<=b).*)/B
 |  | ||||||
| +
 |  | ||||||
| +/(?(R1)b)((?<=b).*)/B
 |  | ||||||
| +
 |  | ||||||
| +/(?(DEFINE)b)((?<=b).*)/B
 |  | ||||||
| +
 |  | ||||||
| +/(?(VERSION=10.4)b)((?<=b).*)/B
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput2 |  | ||||||
| diff --git a/testdata/testoutput1 b/testdata/testoutput1
 |  | ||||||
| index c9bfea8..3f872e7 100644
 |  | ||||||
| --- a/testdata/testoutput1
 |  | ||||||
| +++ b/testdata/testoutput1
 |  | ||||||
| @@ -10090,4 +10090,10 @@ No match
 |  | ||||||
|      ab |  | ||||||
|   0: b |  | ||||||
|   |  | ||||||
| +/^(?<A>a)(?(<A>)b)((?<=b).*)$/
 |  | ||||||
| +    abc
 |  | ||||||
| + 0: abc
 |  | ||||||
| + 1: a
 |  | ||||||
| + 2: c
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput1  |  | ||||||
| diff --git a/testdata/testoutput2 b/testdata/testoutput2
 |  | ||||||
| index d2415c3..6e41ed8 100644
 |  | ||||||
| --- a/testdata/testoutput2
 |  | ||||||
| +++ b/testdata/testoutput2
 |  | ||||||
| @@ -16983,6 +16983,99 @@ Subject length lower bound = 1
 |  | ||||||
|   0: a |  | ||||||
|   1: a |  | ||||||
|   |  | ||||||
| +/(?<A>a)(?(<A>)b)((?<=b).*)/B
 |  | ||||||
| +------------------------------------------------------------------
 |  | ||||||
| +        Bra
 |  | ||||||
| +        CBra 1
 |  | ||||||
| +        a
 |  | ||||||
| +        Ket
 |  | ||||||
| +        Cond
 |  | ||||||
| +      1 Cond ref
 |  | ||||||
| +        b
 |  | ||||||
| +        Ket
 |  | ||||||
| +        CBra 2
 |  | ||||||
| +        AssertB
 |  | ||||||
| +        Reverse
 |  | ||||||
| +        b
 |  | ||||||
| +        Ket
 |  | ||||||
| +        Any*+
 |  | ||||||
| +        Ket
 |  | ||||||
| +        Ket
 |  | ||||||
| +        End
 |  | ||||||
| +------------------------------------------------------------------
 |  | ||||||
| +
 |  | ||||||
| +/(?(1)b)((?<=b).*)/B
 |  | ||||||
| +------------------------------------------------------------------
 |  | ||||||
| +        Bra
 |  | ||||||
| +        Cond
 |  | ||||||
| +      1 Cond ref
 |  | ||||||
| +        b
 |  | ||||||
| +        Ket
 |  | ||||||
| +        CBra 1
 |  | ||||||
| +        AssertB
 |  | ||||||
| +        Reverse
 |  | ||||||
| +        b
 |  | ||||||
| +        Ket
 |  | ||||||
| +        Any*+
 |  | ||||||
| +        Ket
 |  | ||||||
| +        Ket
 |  | ||||||
| +        End
 |  | ||||||
| +------------------------------------------------------------------
 |  | ||||||
| +
 |  | ||||||
| +/(?(R1)b)((?<=b).*)/B
 |  | ||||||
| +------------------------------------------------------------------
 |  | ||||||
| +        Bra
 |  | ||||||
| +        Cond
 |  | ||||||
| +        Cond recurse 1
 |  | ||||||
| +        b
 |  | ||||||
| +        Ket
 |  | ||||||
| +        CBra 1
 |  | ||||||
| +        AssertB
 |  | ||||||
| +        Reverse
 |  | ||||||
| +        b
 |  | ||||||
| +        Ket
 |  | ||||||
| +        Any*+
 |  | ||||||
| +        Ket
 |  | ||||||
| +        Ket
 |  | ||||||
| +        End
 |  | ||||||
| +------------------------------------------------------------------
 |  | ||||||
| +
 |  | ||||||
| +/(?(DEFINE)b)((?<=b).*)/B
 |  | ||||||
| +------------------------------------------------------------------
 |  | ||||||
| +        Bra
 |  | ||||||
| +        Cond
 |  | ||||||
| +        Cond false
 |  | ||||||
| +        b
 |  | ||||||
| +        Ket
 |  | ||||||
| +        CBra 1
 |  | ||||||
| +        AssertB
 |  | ||||||
| +        Reverse
 |  | ||||||
| +        b
 |  | ||||||
| +        Ket
 |  | ||||||
| +        Any*+
 |  | ||||||
| +        Ket
 |  | ||||||
| +        Ket
 |  | ||||||
| +        End
 |  | ||||||
| +------------------------------------------------------------------
 |  | ||||||
| +
 |  | ||||||
| +/(?(VERSION=10.4)b)((?<=b).*)/B
 |  | ||||||
| +------------------------------------------------------------------
 |  | ||||||
| +        Bra
 |  | ||||||
| +        Cond
 |  | ||||||
| +        Cond false
 |  | ||||||
| +        b
 |  | ||||||
| +        Ket
 |  | ||||||
| +        CBra 1
 |  | ||||||
| +        AssertB
 |  | ||||||
| +        Reverse
 |  | ||||||
| +        b
 |  | ||||||
| +        Ket
 |  | ||||||
| +        Any*+
 |  | ||||||
| +        Ket
 |  | ||||||
| +        Ket
 |  | ||||||
| +        End
 |  | ||||||
| +------------------------------------------------------------------
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput2 |  | ||||||
|  Error -70: PCRE2_ERROR_BADDATA (unknown error number) |  | ||||||
|  Error -62: bad serialized data |  | ||||||
| -- 
 |  | ||||||
| 2.21.0 |  | ||||||
| 
 |  | ||||||
| @ -1,42 +0,0 @@ | |||||||
| From 6809752eacde104d45c5e11c3c64165857200ce7 Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Mon, 13 May 2019 16:26:17 +0000 |  | ||||||
| Subject: [PATCH 1/2] Fix crash when \X is used without UTF in JIT. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1091 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| ---
 |  | ||||||
|  testdata/testinput4  | 3 +++ |  | ||||||
|  testdata/testoutput4 | 4 ++++ |  | ||||||
| 
 |  | ||||||
| diff --git a/testdata/testinput4 b/testdata/testinput4
 |  | ||||||
| index cccab0e..f3d498c 100644
 |  | ||||||
| --- a/testdata/testinput4
 |  | ||||||
| +++ b/testdata/testinput4
 |  | ||||||
| @@ -2480,4 +2480,7 @@
 |  | ||||||
|  /^(?'אABC'...)(?&אABC)/utf |  | ||||||
|      123123123456 |  | ||||||
|   |  | ||||||
| +/\X*/
 |  | ||||||
| +    \xF3aaa\xE4\xEA\xEB\xFEa
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput4 |  | ||||||
| diff --git a/testdata/testoutput4 b/testdata/testoutput4
 |  | ||||||
| index 84b8b9e..53926ed 100644
 |  | ||||||
| --- a/testdata/testoutput4
 |  | ||||||
| +++ b/testdata/testoutput4
 |  | ||||||
| @@ -4012,4 +4012,8 @@ No match
 |  | ||||||
|   0: 123123 |  | ||||||
|   1: 123 |  | ||||||
|   |  | ||||||
| +/\X*/
 |  | ||||||
| +    \xF3aaa\xE4\xEA\xEB\xFEa
 |  | ||||||
| + 0: \xf3aaa\xe4\xea\xeb\xfea
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput4 |  | ||||||
| -- 
 |  | ||||||
| 2.20.1 |  | ||||||
| 
 |  | ||||||
| @ -1,70 +0,0 @@ | |||||||
| From 00acf0e2f3f01a3057fc099c60e4f530d744619b Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Wed, 16 Oct 2019 17:12:13 +0000 |  | ||||||
| Subject: [PATCH] Fix error offset bug introduced at 1176. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1179 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| 
 |  | ||||||
| Signed-off-by: Petr Písař <ppisar@redhat.com> |  | ||||||
| ---
 |  | ||||||
|  src/pcre2_match.c     |  4 ++++ |  | ||||||
|  testdata/testinput10  |  6 ++++++ |  | ||||||
|  testdata/testoutput10 | 10 ++++++++++ |  | ||||||
|  3 files changed, 20 insertions(+) |  | ||||||
| 
 |  | ||||||
| diff --git a/src/pcre2_match.c b/src/pcre2_match.c
 |  | ||||||
| index 9f34e20..cca4d3a 100644
 |  | ||||||
| --- a/src/pcre2_match.c
 |  | ||||||
| +++ b/src/pcre2_match.c
 |  | ||||||
| @@ -6186,6 +6186,10 @@ if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
 |  | ||||||
|    } |  | ||||||
|  match_data->subject = NULL; |  | ||||||
|   |  | ||||||
| +/* Zero the error offset in case the first code unit is invalid UTF. */
 |  | ||||||
| +
 |  | ||||||
| +match_data->startchar = 0;
 |  | ||||||
| +
 |  | ||||||
|  /* If the pattern was successfully studied with JIT support, run the JIT |  | ||||||
|  executable instead of the rest of this function. Most options must be set at |  | ||||||
|  compile time for the JIT code to be usable. Fallback to the normal code path if |  | ||||||
| diff --git a/testdata/testinput10 b/testdata/testinput10
 |  | ||||||
| index 4399f82..19d2f2f 100644
 |  | ||||||
| --- a/testdata/testinput10
 |  | ||||||
| +++ b/testdata/testinput10
 |  | ||||||
| @@ -493,4 +493,10 @@
 |  | ||||||
|   |  | ||||||
|  /(?(á/utf |  | ||||||
|   |  | ||||||
| +/x/utf
 |  | ||||||
| +    abxyz
 |  | ||||||
| +    \x80\=startchar
 |  | ||||||
| +    abc\x80\=startchar
 |  | ||||||
| +    abc\x80\=startchar,offset=3
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput10 |  | ||||||
| diff --git a/testdata/testoutput10 b/testdata/testoutput10
 |  | ||||||
| index dfecda1..dd91c45 100644
 |  | ||||||
| --- a/testdata/testoutput10
 |  | ||||||
| +++ b/testdata/testoutput10
 |  | ||||||
| @@ -1651,4 +1651,14 @@ Failed: error 142 at offset 4: syntax error in subpattern name (missing terminat
 |  | ||||||
|  /(?(á/utf |  | ||||||
|  Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?) |  | ||||||
|   |  | ||||||
| +/x/utf
 |  | ||||||
| +    abxyz
 |  | ||||||
| + 0: x
 |  | ||||||
| +    \x80\=startchar
 |  | ||||||
| +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
 |  | ||||||
| +    abc\x80\=startchar
 |  | ||||||
| +Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
 |  | ||||||
| +    abc\x80\=startchar,offset=3
 |  | ||||||
| +Error -36 (bad UTF-8 offset)
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput10 |  | ||||||
| -- 
 |  | ||||||
| 2.21.0 |  | ||||||
| 
 |  | ||||||
| @ -1,134 +0,0 @@ | |||||||
| From 4c3e518bff94e5f206a63e3a1e5d7e570402786b Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Sat, 3 Aug 2019 08:30:40 +0000 |  | ||||||
| Subject: [PATCH] Fix incorrect computation of group length when one branch |  | ||||||
|  exceeded 65535. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1155 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| 
 |  | ||||||
| Signed-off-by: Petr Písař <ppisar@redhat.com> |  | ||||||
| ---
 |  | ||||||
|  src/pcre2_study.c    | 18 ++++++++++++------ |  | ||||||
|  testdata/testinput2  |  8 ++++++++ |  | ||||||
|  testdata/testoutput2 | 27 +++++++++++++++++++++++++++ |  | ||||||
|  3 files changed, 47 insertions(+), 6 deletions(-) |  | ||||||
| 
 |  | ||||||
| diff --git a/src/pcre2_study.c b/src/pcre2_study.c
 |  | ||||||
| index e883c2e..cb5e7f1 100644
 |  | ||||||
| --- a/src/pcre2_study.c
 |  | ||||||
| +++ b/src/pcre2_study.c
 |  | ||||||
| @@ -103,6 +103,7 @@ find_minlength(const pcre2_real_code *re, PCRE2_SPTR code,
 |  | ||||||
|    int *backref_cache) |  | ||||||
|  { |  | ||||||
|  int length = -1; |  | ||||||
| +int branchlength = 0;
 |  | ||||||
|  int prev_cap_recno = -1; |  | ||||||
|  int prev_cap_d = 0; |  | ||||||
|  int prev_recurse_recno = -1; |  | ||||||
| @@ -110,9 +111,9 @@ int prev_recurse_d = 0;
 |  | ||||||
|  uint32_t once_fudge = 0; |  | ||||||
|  BOOL had_recurse = FALSE; |  | ||||||
|  BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0; |  | ||||||
| -recurse_check this_recurse;
 |  | ||||||
| -int branchlength = 0;
 |  | ||||||
| +PCRE2_SPTR nextbranch = code + GET(code, 1);
 |  | ||||||
|  PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE; |  | ||||||
| +recurse_check this_recurse;
 |  | ||||||
|   |  | ||||||
|  /* If this is a "could be empty" group, its minimum length is 0. */ |  | ||||||
|   |  | ||||||
| @@ -128,16 +129,20 @@ if ((*countptr)++ > 1000) return -1;
 |  | ||||||
|   |  | ||||||
|  /* Scan along the opcodes for this branch. If we get to the end of the branch, |  | ||||||
|  check the length against that of the other branches. If the accumulated length |  | ||||||
| -passes 16-bits, stop. */
 |  | ||||||
| +passes 16-bits, reset to that value and skip the rest of the branch. */
 |  | ||||||
|   |  | ||||||
|  for (;;) |  | ||||||
|    { |  | ||||||
|    int d, min, recno; |  | ||||||
| -  PCRE2_UCHAR *cs, *ce;
 |  | ||||||
| -  PCRE2_UCHAR op = *cc;
 |  | ||||||
| +  PCRE2_UCHAR op, *cs, *ce;
 |  | ||||||
|   |  | ||||||
| -  if (branchlength >= UINT16_MAX) return UINT16_MAX;
 |  | ||||||
| +  if (branchlength >= UINT16_MAX)
 |  | ||||||
| +    {
 |  | ||||||
| +    branchlength = UINT16_MAX;
 |  | ||||||
| +    cc = (PCRE2_UCHAR *)nextbranch;
 |  | ||||||
| +    }
 |  | ||||||
|   |  | ||||||
| +  op = *cc;
 |  | ||||||
|    switch (op) |  | ||||||
|      { |  | ||||||
|      case OP_COND: |  | ||||||
| @@ -227,6 +232,7 @@ for (;;)
 |  | ||||||
|      if (length < 0 || (!had_recurse && branchlength < length)) |  | ||||||
|        length = branchlength; |  | ||||||
|      if (op != OP_ALT) return length; |  | ||||||
| +    nextbranch = cc + GET(cc, 1);
 |  | ||||||
|      cc += 1 + LINK_SIZE; |  | ||||||
|      branchlength = 0; |  | ||||||
|      had_recurse = FALSE; |  | ||||||
| diff --git a/testdata/testinput2 b/testdata/testinput2
 |  | ||||||
| index 1bfe591..384239a 100644
 |  | ||||||
| --- a/testdata/testinput2
 |  | ||||||
| +++ b/testdata/testinput2
 |  | ||||||
| @@ -5603,4 +5603,12 @@ a)"xI
 |  | ||||||
|  # Expect error (recursion => not fixed length) |  | ||||||
|  /(\2)((?=(?<=\1)))/ |  | ||||||
|   |  | ||||||
| +/\A\s*(a|(?:[^`]{28500}){4})/I
 |  | ||||||
| +    a
 |  | ||||||
| +
 |  | ||||||
| +/\A\s*((?:[^`]{28500}){4})/I
 |  | ||||||
| +
 |  | ||||||
| +/\A\s*((?:[^`]{28500}){4}|a)/I
 |  | ||||||
| +    a
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput2 |  | ||||||
| diff --git a/testdata/testoutput2 b/testdata/testoutput2
 |  | ||||||
| index 758b4db..0983741 100644
 |  | ||||||
| --- a/testdata/testoutput2
 |  | ||||||
| +++ b/testdata/testoutput2
 |  | ||||||
| @@ -16956,6 +16956,33 @@ No match
 |  | ||||||
|  /(\2)((?=(?<=\1)))/ |  | ||||||
|  Failed: error 125 at offset 8: lookbehind assertion is not fixed length |  | ||||||
|   |  | ||||||
| +/\A\s*(a|(?:[^`]{28500}){4})/I
 |  | ||||||
| +Capture group count = 1
 |  | ||||||
| +Max lookbehind = 1
 |  | ||||||
| +Compile options: <none>
 |  | ||||||
| +Overall options: anchored
 |  | ||||||
| +Subject length lower bound = 1
 |  | ||||||
| +    a
 |  | ||||||
| + 0: a
 |  | ||||||
| + 1: a
 |  | ||||||
| +
 |  | ||||||
| +/\A\s*((?:[^`]{28500}){4})/I
 |  | ||||||
| +Capture group count = 1
 |  | ||||||
| +Max lookbehind = 1
 |  | ||||||
| +Compile options: <none>
 |  | ||||||
| +Overall options: anchored
 |  | ||||||
| +Subject length lower bound = 65535
 |  | ||||||
| +
 |  | ||||||
| +/\A\s*((?:[^`]{28500}){4}|a)/I
 |  | ||||||
| +Capture group count = 1
 |  | ||||||
| +Max lookbehind = 1
 |  | ||||||
| +Compile options: <none>
 |  | ||||||
| +Overall options: anchored
 |  | ||||||
| +Subject length lower bound = 1
 |  | ||||||
| +    a
 |  | ||||||
| + 0: a
 |  | ||||||
| + 1: a
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput2 |  | ||||||
|  Error -70: PCRE2_ERROR_BADDATA (unknown error number) |  | ||||||
|  Error -62: bad serialized data |  | ||||||
| -- 
 |  | ||||||
| 2.20.1 |  | ||||||
| 
 |  | ||||||
| @ -1,217 +0,0 @@ | |||||||
| From 44c8382acfe0902b302e0d7a5b1c6d9ee9226a51 Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Tue, 16 Jul 2019 15:06:21 +0000 |  | ||||||
| Subject: [PATCH] Fix lookbehind within lookahead within lookbehind |  | ||||||
|  misbehaviour bug. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1133 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| 
 |  | ||||||
| Signed-off-by: Petr Písař <ppisar@redhat.com> |  | ||||||
| ---
 |  | ||||||
|  src/pcre2_compile.c  | 58 +++++++++++++++++++++++++++++--------------- |  | ||||||
|  testdata/testinput1  |  6 +++++ |  | ||||||
|  testdata/testinput2  |  3 +++ |  | ||||||
|  testdata/testoutput1 |  9 +++++++ |  | ||||||
|  testdata/testoutput2 |  4 +++ |  | ||||||
|  5 files changed, 61 insertions(+), 19 deletions(-) |  | ||||||
| 
 |  | ||||||
| diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
 |  | ||||||
| index f6e0a0b..2ae95ed 100644
 |  | ||||||
| --- a/src/pcre2_compile.c
 |  | ||||||
| +++ b/src/pcre2_compile.c
 |  | ||||||
| @@ -135,6 +135,8 @@ static BOOL
 |  | ||||||
|    set_lookbehind_lengths(uint32_t **, int *, int *, parsed_recurse_check *, |  | ||||||
|      compile_block *); |  | ||||||
|   |  | ||||||
| +static int
 |  | ||||||
| +  check_lookbehinds(uint32_t *, uint32_t **, compile_block *);
 |  | ||||||
|   |  | ||||||
|   |  | ||||||
|  /************************************************* |  | ||||||
| @@ -8997,15 +8999,15 @@ for (;; pptr++)
 |  | ||||||
|        } |  | ||||||
|      break; |  | ||||||
|   |  | ||||||
| -    /* Lookaheads can be ignored, but we must start the skip inside the group
 |  | ||||||
| -    so that it isn't treated as a group within the branch. */
 |  | ||||||
| +    /* Lookaheads do not contribute to the length of this branch, but they may
 |  | ||||||
| +    contain lookbehinds within them whose lengths need to be set. */
 |  | ||||||
|   |  | ||||||
|      case META_LOOKAHEAD: |  | ||||||
|      case META_LOOKAHEADNOT: |  | ||||||
| -    pptr = parsed_skip(pptr + 1, PSKIP_KET);
 |  | ||||||
| -    if (pptr == NULL) goto PARSED_SKIP_FAILED;
 |  | ||||||
| +    *errcodeptr = check_lookbehinds(pptr + 1, &pptr, cb);
 |  | ||||||
| +    if (*errcodeptr != 0) return -1;
 |  | ||||||
|   |  | ||||||
| -    /* Also ignore any qualifiers that follow a lookahead assertion. */
 |  | ||||||
| +    /* Ignore any qualifiers that follow a lookahead assertion. */
 |  | ||||||
|   |  | ||||||
|      switch (pptr[1]) |  | ||||||
|        { |  | ||||||
| @@ -9319,20 +9321,28 @@ set_lookbehind_lengths() for each one. At the start, the errorcode is zero and
 |  | ||||||
|  the error offset is marked unset. The enables the functions above not to |  | ||||||
|  override settings from deeper nestings. |  | ||||||
|   |  | ||||||
| -Arguments cb      points to the compile block
 |  | ||||||
| -Returns:          0 on success, or an errorcode (cb->erroroffset will be set)
 |  | ||||||
| +This function is called recursively from get_branchlength() for lookaheads in
 |  | ||||||
| +order to process any lookbehinds that they may contain. It stops when it hits a
 |  | ||||||
| +non-nested closing parenthesis in this case, returning a pointer to it.
 |  | ||||||
| +
 |  | ||||||
| +Arguments
 |  | ||||||
| +  pptr    points to where to start (start of pattern or start of lookahead)
 |  | ||||||
| +  retptr  if not NULL, return the ket pointer here
 |  | ||||||
| +  cb      points to the compile block
 |  | ||||||
| +
 |  | ||||||
| +Returns:  0 on success, or an errorcode (cb->erroroffset will be set)
 |  | ||||||
|  */ |  | ||||||
|   |  | ||||||
|  static int |  | ||||||
| -check_lookbehinds(compile_block *cb)
 |  | ||||||
| +check_lookbehinds(uint32_t *pptr, uint32_t **retptr, compile_block *cb)
 |  | ||||||
|  { |  | ||||||
| -uint32_t *pptr;
 |  | ||||||
|  int errorcode = 0; |  | ||||||
|  int loopcount = 0; |  | ||||||
| +int nestlevel = 0;
 |  | ||||||
|   |  | ||||||
|  cb->erroroffset = PCRE2_UNSET; |  | ||||||
|   |  | ||||||
| -for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
 |  | ||||||
| +for (; *pptr != META_END; pptr++)
 |  | ||||||
|    { |  | ||||||
|    if (*pptr < META_END) continue;  /* Literal */ |  | ||||||
|   |  | ||||||
| @@ -9346,14 +9356,30 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
 |  | ||||||
|        pptr += 1; |  | ||||||
|      break; |  | ||||||
|   |  | ||||||
| +    case META_KET:
 |  | ||||||
| +    if (--nestlevel < 0)
 |  | ||||||
| +      {
 |  | ||||||
| +      if (retptr != NULL) *retptr = pptr;
 |  | ||||||
| +      return 0;
 |  | ||||||
| +      }
 |  | ||||||
| +    break;
 |  | ||||||
| +
 |  | ||||||
| +    case META_ATOMIC:
 |  | ||||||
| +    case META_CAPTURE:
 |  | ||||||
| +    case META_COND_ASSERT:
 |  | ||||||
| +    case META_LOOKAHEAD:
 |  | ||||||
| +    case META_LOOKAHEADNOT:
 |  | ||||||
| +    case META_NOCAPTURE:
 |  | ||||||
| +    case META_SCRIPT_RUN:
 |  | ||||||
| +    nestlevel++;
 |  | ||||||
| +    break;
 |  | ||||||
| +
 |  | ||||||
|      case META_ACCEPT: |  | ||||||
|      case META_ALT: |  | ||||||
|      case META_ASTERISK: |  | ||||||
|      case META_ASTERISK_PLUS: |  | ||||||
|      case META_ASTERISK_QUERY: |  | ||||||
| -    case META_ATOMIC:
 |  | ||||||
|      case META_BACKREF: |  | ||||||
| -    case META_CAPTURE:
 |  | ||||||
|      case META_CIRCUMFLEX: |  | ||||||
|      case META_CLASS: |  | ||||||
|      case META_CLASS_EMPTY: |  | ||||||
| @@ -9361,14 +9387,9 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
 |  | ||||||
|      case META_CLASS_END: |  | ||||||
|      case META_CLASS_NOT: |  | ||||||
|      case META_COMMIT: |  | ||||||
| -    case META_COND_ASSERT:
 |  | ||||||
|      case META_DOLLAR: |  | ||||||
|      case META_DOT: |  | ||||||
|      case META_FAIL: |  | ||||||
| -    case META_KET:
 |  | ||||||
| -    case META_LOOKAHEAD:
 |  | ||||||
| -    case META_LOOKAHEADNOT:
 |  | ||||||
| -    case META_NOCAPTURE:
 |  | ||||||
|      case META_PLUS: |  | ||||||
|      case META_PLUS_PLUS: |  | ||||||
|      case META_PLUS_QUERY: |  | ||||||
| @@ -9378,7 +9399,6 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
 |  | ||||||
|      case META_QUERY_QUERY: |  | ||||||
|      case META_RANGE_ESCAPED: |  | ||||||
|      case META_RANGE_LITERAL: |  | ||||||
| -    case META_SCRIPT_RUN:
 |  | ||||||
|      case META_SKIP: |  | ||||||
|      case META_THEN: |  | ||||||
|      break; |  | ||||||
| @@ -9879,7 +9899,7 @@ lengths. */
 |  | ||||||
|   |  | ||||||
|  if (has_lookbehind) |  | ||||||
|    { |  | ||||||
| -  errorcode = check_lookbehinds(&cb);
 |  | ||||||
| +  errorcode = check_lookbehinds(cb.parsed_pattern, NULL, &cb);
 |  | ||||||
|    if (errorcode != 0) goto HAD_CB_ERROR; |  | ||||||
|    } |  | ||||||
|   |  | ||||||
| diff --git a/testdata/testinput1 b/testdata/testinput1
 |  | ||||||
| index 4d9ec5a..ee9354b 100644
 |  | ||||||
| --- a/testdata/testinput1
 |  | ||||||
| +++ b/testdata/testinput1
 |  | ||||||
| @@ -6365,4 +6365,10 @@ ef) x/x,mark
 |  | ||||||
|  /(?(DEFINE)(a|ab))(?1){1}+c/ |  | ||||||
|      abc     |  | ||||||
|   |  | ||||||
| +/(?<=(?=.(?<=x)))/aftertext
 |  | ||||||
| +    abx
 |  | ||||||
| +
 |  | ||||||
| +/(?<=(?=(?<=a)))b/
 |  | ||||||
| +    ab
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput1  |  | ||||||
| diff --git a/testdata/testinput2 b/testdata/testinput2
 |  | ||||||
| index 9412bf6..d85fc5f 100644
 |  | ||||||
| --- a/testdata/testinput2
 |  | ||||||
| +++ b/testdata/testinput2
 |  | ||||||
| @@ -5597,4 +5597,7 @@ a)"xI
 |  | ||||||
|  # Multiplication overflow |  | ||||||
|  /(X{65535})(?<=\1{32770})/ |  | ||||||
|   |  | ||||||
| +/(?<=(?=.(?<=x)))/
 |  | ||||||
| +    ab\=ph
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput2 |  | ||||||
| diff --git a/testdata/testoutput1 b/testdata/testoutput1
 |  | ||||||
| index fffb8ec..c9bfea8 100644
 |  | ||||||
| --- a/testdata/testoutput1
 |  | ||||||
| +++ b/testdata/testoutput1
 |  | ||||||
| @@ -10081,4 +10081,13 @@ No match
 |  | ||||||
|      abc     |  | ||||||
|  No match |  | ||||||
|   |  | ||||||
| +/(?<=(?=.(?<=x)))/aftertext
 |  | ||||||
| +    abx
 |  | ||||||
| + 0: 
 |  | ||||||
| + 0+ x
 |  | ||||||
| +
 |  | ||||||
| +/(?<=(?=(?<=a)))b/
 |  | ||||||
| +    ab
 |  | ||||||
| + 0: b
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput1  |  | ||||||
| diff --git a/testdata/testoutput2 b/testdata/testoutput2
 |  | ||||||
| index 950095f..6405e26 100644
 |  | ||||||
| --- a/testdata/testoutput2
 |  | ||||||
| +++ b/testdata/testoutput2
 |  | ||||||
| @@ -16948,6 +16948,10 @@ Failed: error 187 at offset 15: lookbehind assertion is too long
 |  | ||||||
|  /(X{65535})(?<=\1{32770})/ |  | ||||||
|  Failed: error 187 at offset 10: lookbehind assertion is too long |  | ||||||
|   |  | ||||||
| +/(?<=(?=.(?<=x)))/
 |  | ||||||
| +    ab\=ph
 |  | ||||||
| +No match
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput2 |  | ||||||
|  Error -70: PCRE2_ERROR_BADDATA (unknown error number) |  | ||||||
|  Error -62: bad serialized data |  | ||||||
| -- 
 |  | ||||||
| 2.20.1 |  | ||||||
| 
 |  | ||||||
| @ -1,130 +0,0 @@ | |||||||
| From 427e9b2fffc46d6b49a31df34c8b120bffc2ea60 Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Wed, 26 Jun 2019 16:13:28 +0000 |  | ||||||
| Subject: [PATCH] Fix partial matching bug in pcre2_dfa_match(). |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1123 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| ---
 |  | ||||||
|  src/pcre2_dfa_match.c | 11 ++++----- |  | ||||||
|  testdata/testinput6   | 22 +++++++++++++++++ |  | ||||||
|  testdata/testoutput6  | 36 ++++++++++++++++++++++++++++ |  | ||||||
| 
 |  | ||||||
| diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
 |  | ||||||
| index 911e9b9..538d15d 100644
 |  | ||||||
| --- a/src/pcre2_dfa_match.c
 |  | ||||||
| +++ b/src/pcre2_dfa_match.c
 |  | ||||||
| @@ -3152,8 +3152,8 @@ for (;;)
 |  | ||||||
|   |  | ||||||
|    /* We have finished the processing at the current subject character. If no |  | ||||||
|    new states have been set for the next character, we have found all the |  | ||||||
| -  matches that we are going to find. If we are at the top level and partial
 |  | ||||||
| -  matching has been requested, check for appropriate conditions.
 |  | ||||||
| +  matches that we are going to find. If partial matching has been requested,
 |  | ||||||
| +  check for appropriate conditions.
 |  | ||||||
|   |  | ||||||
|    The "forced_ fail" variable counts the number of (*F) encountered for the |  | ||||||
|    character. If it is equal to the original active_count (saved in |  | ||||||
| @@ -3165,8 +3165,7 @@ for (;;)
 |  | ||||||
|   |  | ||||||
|    if (new_count <= 0) |  | ||||||
|      { |  | ||||||
| -    if (rlevel == 1 &&                               /* Top level, and */
 |  | ||||||
| -        could_continue &&                            /* Some could go on, and */
 |  | ||||||
| +    if (could_continue &&                            /* Some could go on, and */
 |  | ||||||
|          forced_fail != workspace[1] &&               /* Not all forced fail & */ |  | ||||||
|          (                                            /* either... */ |  | ||||||
|          (mb->moptions & PCRE2_PARTIAL_HARD) != 0      /* Hard partial */ |  | ||||||
| @@ -3175,8 +3174,8 @@ for (;;)
 |  | ||||||
|           match_count < 0)                            /* no matches */ |  | ||||||
|          ) &&                                         /* And... */ |  | ||||||
|          ( |  | ||||||
| -        partial_newline ||                           /* Either partial NL */
 |  | ||||||
| -          (                                          /* or ... */
 |  | ||||||
| +        partial_newline ||                     /* Either partial NL */
 |  | ||||||
| +          (                                    /* or ... */
 |  | ||||||
|            ptr >= end_subject &&                /* End of subject and */ |  | ||||||
|            ptr > mb->start_used_ptr)            /* Inspected non-empty string */ |  | ||||||
|            ) |  | ||||||
| diff --git a/testdata/testinput6 b/testdata/testinput6
 |  | ||||||
| index 403e3fa..cc3ebd0 100644
 |  | ||||||
| --- a/testdata/testinput6
 |  | ||||||
| +++ b/testdata/testinput6
 |  | ||||||
| @@ -4972,4 +4972,26 @@
 |  | ||||||
|  \= Expect no match |  | ||||||
|      0 |  | ||||||
|   |  | ||||||
| +/(?<=pqr)abc(?=xyz)/
 |  | ||||||
| +    123pqrabcxy\=ps,allusedtext
 |  | ||||||
| +    123pqrabcxyz\=ps,allusedtext
 |  | ||||||
| +
 |  | ||||||
| +/(?>a+b)/
 |  | ||||||
| +    aaaa\=ps
 |  | ||||||
| +    aaaab\=ps
 |  | ||||||
| +    
 |  | ||||||
| +/(abc)(?1)/
 |  | ||||||
| +    abca\=ps
 |  | ||||||
| +    abcabc\=ps
 |  | ||||||
| +
 |  | ||||||
| +/(?(?=abc).*|Z)/
 |  | ||||||
| +    ab\=ps
 |  | ||||||
| +    abcxyz\=ps
 |  | ||||||
| +
 |  | ||||||
| +/(abc)++x/
 |  | ||||||
| +    abcab\=ps
 |  | ||||||
| +    abc\=ps 
 |  | ||||||
| +    ab\=ps
 |  | ||||||
| +    abcx  
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput6 |  | ||||||
| diff --git a/testdata/testoutput6 b/testdata/testoutput6
 |  | ||||||
| index 6a975dd..61cbfe2 100644
 |  | ||||||
| --- a/testdata/testoutput6
 |  | ||||||
| +++ b/testdata/testoutput6
 |  | ||||||
| @@ -7809,4 +7809,40 @@ No match
 |  | ||||||
|      0 |  | ||||||
|  No match |  | ||||||
|   |  | ||||||
| +/(?<=pqr)abc(?=xyz)/
 |  | ||||||
| +    123pqrabcxy\=ps,allusedtext
 |  | ||||||
| +Partial match: pqrabcxy
 |  | ||||||
| +               <<<
 |  | ||||||
| +    123pqrabcxyz\=ps,allusedtext
 |  | ||||||
| + 0: pqrabcxyz
 |  | ||||||
| +    <<<   >>>
 |  | ||||||
| +
 |  | ||||||
| +/(?>a+b)/
 |  | ||||||
| +    aaaa\=ps
 |  | ||||||
| +Partial match: aaaa
 |  | ||||||
| +    aaaab\=ps
 |  | ||||||
| + 0: aaaab
 |  | ||||||
| +    
 |  | ||||||
| +/(abc)(?1)/
 |  | ||||||
| +    abca\=ps
 |  | ||||||
| +Partial match: abca
 |  | ||||||
| +    abcabc\=ps
 |  | ||||||
| + 0: abcabc
 |  | ||||||
| +
 |  | ||||||
| +/(?(?=abc).*|Z)/
 |  | ||||||
| +    ab\=ps
 |  | ||||||
| +Partial match: ab
 |  | ||||||
| +    abcxyz\=ps
 |  | ||||||
| + 0: abcxyz
 |  | ||||||
| +
 |  | ||||||
| +/(abc)++x/
 |  | ||||||
| +    abcab\=ps
 |  | ||||||
| +Partial match: abcab
 |  | ||||||
| +    abc\=ps 
 |  | ||||||
| +Partial match: abc
 |  | ||||||
| +    ab\=ps
 |  | ||||||
| +Partial match: ab
 |  | ||||||
| +    abcx  
 |  | ||||||
| + 0: abcx
 |  | ||||||
| +
 |  | ||||||
|  # End of testinput6 |  | ||||||
| -- 
 |  | ||||||
| 2.20.1 |  | ||||||
| 
 |  | ||||||
| @ -1,382 +0,0 @@ | |||||||
| From e29388de53ea3a4f9d1c6b4932613681493ac9dc Mon Sep 17 00:00:00 2001 |  | ||||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> |  | ||||||
| Date: Sat, 15 Jun 2019 15:51:07 +0000 |  | ||||||
| Subject: [PATCH] Fix pcre2grep -o bug when ovector overflows; add option to |  | ||||||
|  adjust the limit; raise the default limit; give error if -o requests an |  | ||||||
|  uncaptured parens. |  | ||||||
| MIME-Version: 1.0 |  | ||||||
| Content-Type: text/plain; charset=UTF-8 |  | ||||||
| Content-Transfer-Encoding: 8bit |  | ||||||
| 
 |  | ||||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1106 6239d852-aaf2-0410-a92c-79f79f948069 |  | ||||||
| Petr Písař: Ported to 10.33. |  | ||||||
| 
 |  | ||||||
| Signed-off-by: Petr Písař <ppisar@redhat.com> |  | ||||||
| ---
 |  | ||||||
|  RunGrepTest             |  7 ++++++ |  | ||||||
|  doc/html/pcre2api.html  | 12 +++++----- |  | ||||||
|  doc/html/pcre2grep.html | 28 +++++++++++++++------- |  | ||||||
|  doc/html/pcre2test.html |  4 +++- |  | ||||||
|  doc/pcre2grep.1         | 26 +++++++++++++------- |  | ||||||
|  doc/pcre2grep.txt       | 43 ++++++++++++++++++++------------- |  | ||||||
|  doc/pcre2test.txt       |  4 +++- |  | ||||||
|  src/pcre2grep.c         | 53 ++++++++++++++++++++++++++++------------- |  | ||||||
|  testdata/grepoutput     |  7 ++++++ |  | ||||||
|  9 files changed, 126 insertions(+), 58 deletions(-) |  | ||||||
| 
 |  | ||||||
| diff --git a/RunGrepTest b/RunGrepTest
 |  | ||||||
| index bac1f1b..ea37f70 100755
 |  | ||||||
| --- a/RunGrepTest
 |  | ||||||
| +++ b/RunGrepTest
 |  | ||||||
| @@ -653,6 +653,13 @@ printf 'ABC\0XYZ\nABCDEF\nDEFABC\n' >testtemp2grep
 |  | ||||||
|  $valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep |  | ||||||
|  echo "RC=$?" >>testtrygrep |  | ||||||
|   |  | ||||||
| +echo "---------------------------- Test 127 -----------------------------" >>testtrygrep
 |  | ||||||
| +(cd $srcdir; $valgrind $vjs $pcre2grep -o --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep
 |  | ||||||
| +echo "RC=$?" >>testtrygrep
 |  | ||||||
| +
 |  | ||||||
| +echo "---------------------------- Test 128 -----------------------------" >>testtrygrep
 |  | ||||||
| +(cd $srcdir; $valgrind $vjs $pcre2grep -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1
 |  | ||||||
| +echo "RC=$?" >>testtrygrep
 |  | ||||||
|   |  | ||||||
|  # Now compare the results. |  | ||||||
|   |  | ||||||
| diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html
 |  | ||||||
| index 7ca39f5..84f4442 100644
 |  | ||||||
| --- a/doc/html/pcre2api.html
 |  | ||||||
| +++ b/doc/html/pcre2api.html
 |  | ||||||
| @@ -2252,12 +2252,12 @@ segment.
 |  | ||||||
|    PCRE2_INFO_MINLENGTH |  | ||||||
|  </pre> |  | ||||||
|  If a minimum length for matching subject strings was computed, its value is |  | ||||||
| -returned. Otherwise the returned value is 0. The value is a number of
 |  | ||||||
| -characters, which in UTF mode may be different from the number of code units.
 |  | ||||||
| -The third argument should point to an <b>uint32_t</b> variable. The value is a
 |  | ||||||
| -lower bound to the length of any matching string. There may not be any strings
 |  | ||||||
| -of that length that do actually match, but every string that does match is at
 |  | ||||||
| -least that long.
 |  | ||||||
| +returned. Otherwise the returned value is 0. This value is not computed when
 |  | ||||||
| +PCRE2_NO_START_OPTIMIZE is set. The value is a number of characters, which in
 |  | ||||||
| +UTF mode may be different from the number of code units. The third argument
 |  | ||||||
| +should point to an <b>uint32_t</b> variable. The value is a lower bound to the
 |  | ||||||
| +length of any matching string. There may not be any strings of that length that
 |  | ||||||
| +do actually match, but every string that does match is at least that long.
 |  | ||||||
|  <pre> |  | ||||||
|    PCRE2_INFO_NAMECOUNT |  | ||||||
|    PCRE2_INFO_NAMEENTRYSIZE |  | ||||||
| diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html
 |  | ||||||
| index d66cee3..de699e7 100644
 |  | ||||||
| --- a/doc/html/pcre2grep.html
 |  | ||||||
| +++ b/doc/html/pcre2grep.html
 |  | ||||||
| @@ -685,20 +685,32 @@ otherwise empty line. This option is mutually exclusive with <b>--output</b>,
 |  | ||||||
|  <P> |  | ||||||
|  <b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i> |  | ||||||
|  Show only the part of the line that matched the capturing parentheses of the |  | ||||||
| -given number. Up to 32 capturing parentheses are supported, and -o0 is
 |  | ||||||
| -equivalent to <b>-o</b> without a number. Because these options can be given
 |  | ||||||
| -without an argument (see above), if an argument is present, it must be given in
 |  | ||||||
| -the same shell item, for example, -o3 or --only-matching=2. The comments given
 |  | ||||||
| -for the non-argument case above also apply to this option. If the specified
 |  | ||||||
| -capturing parentheses do not exist in the pattern, or were not set in the
 |  | ||||||
| -match, nothing is output unless the file name or line number are being output.
 |  | ||||||
| +given number. Up to 50 capturing parentheses are supported by default. This
 |  | ||||||
| +limit can be changed via the <b>--om-capture</b> option. A pattern may contain
 |  | ||||||
| +any number of capturing parentheses, but only those whose number is within the
 |  | ||||||
| +limit can be accessed by <b>-o</b>. An error occurs if the number specified by
 |  | ||||||
| +<b>-o</b> is greater than the limit.
 |  | ||||||
| +<br>
 |  | ||||||
| +<br>
 |  | ||||||
| +-o0 is the same as <b>-o</b> without a number. Because these options can be
 |  | ||||||
| +given without an argument (see above), if an argument is present, it must be
 |  | ||||||
| +given in the same shell item, for example, -o3 or --only-matching=2. The
 |  | ||||||
| +comments given for the non-argument case above also apply to this option. If
 |  | ||||||
| +the specified capturing parentheses do not exist in the pattern, or were not
 |  | ||||||
| +set in the match, nothing is output unless the file name or line number are
 |  | ||||||
| +being output.
 |  | ||||||
|  <br> |  | ||||||
|  <br> |  | ||||||
|  If this option is given multiple times, multiple substrings are output for each |  | ||||||
|  match, in the order the options are given, and all on one line. For example, |  | ||||||
|  -o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and |  | ||||||
|  then 3 again to be output. By default, there is no separator (but see the next |  | ||||||
| -option).
 |  | ||||||
| +but one option).
 |  | ||||||
| +</P>
 |  | ||||||
| +<P>
 |  | ||||||
| +<b>--om-capture</b>=<i>number</i>
 |  | ||||||
| +Set the number of capturing parentheses that can be accessed by <b>-o</b>. The 
 |  | ||||||
| +default is 50.
 |  | ||||||
|  </P> |  | ||||||
|  <P> |  | ||||||
|  <b>--om-separator</b>=<i>text</i> |  | ||||||
| diff --git a/doc/html/pcre2test.html b/doc/html/pcre2test.html
 |  | ||||||
| index 083d5cc..4be47c6 100644
 |  | ||||||
| --- a/doc/html/pcre2test.html
 |  | ||||||
| +++ b/doc/html/pcre2test.html
 |  | ||||||
| @@ -738,7 +738,9 @@ options, the line is omitted. "First code unit" is where any match must start;
 |  | ||||||
|  if there is more than one they are listed as "starting code units". "Last code |  | ||||||
|  unit" is the last literal code unit that must be present in any match. This is |  | ||||||
|  not necessarily the last character. These lines are omitted if no starting or |  | ||||||
| -ending code units are recorded.
 |  | ||||||
| +ending code units are recorded. The subject length line is omitted when 
 |  | ||||||
| +<b>no_start_optimize</b> is set because the minimum length is not calculated 
 |  | ||||||
| +when it can never be used.
 |  | ||||||
|  </P> |  | ||||||
|  <P> |  | ||||||
|  The <b>framesize</b> modifier shows the size, in bytes, of the storage frames |  | ||||||
| diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1
 |  | ||||||
| index 6b3219b..1dcdb68 100644
 |  | ||||||
| --- a/doc/pcre2grep.1
 |  | ||||||
| +++ b/doc/pcre2grep.1
 |  | ||||||
| @@ -596,19 +596,29 @@ otherwise empty line. This option is mutually exclusive with \fB--output\fP,
 |  | ||||||
|  .TP |  | ||||||
|  \fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP |  | ||||||
|  Show only the part of the line that matched the capturing parentheses of the |  | ||||||
| -given number. Up to 32 capturing parentheses are supported, and -o0 is
 |  | ||||||
| -equivalent to \fB-o\fP without a number. Because these options can be given
 |  | ||||||
| -without an argument (see above), if an argument is present, it must be given in
 |  | ||||||
| -the same shell item, for example, -o3 or --only-matching=2. The comments given
 |  | ||||||
| -for the non-argument case above also apply to this option. If the specified
 |  | ||||||
| -capturing parentheses do not exist in the pattern, or were not set in the
 |  | ||||||
| -match, nothing is output unless the file name or line number are being output.
 |  | ||||||
| +given number. Up to 50 capturing parentheses are supported by default. This
 |  | ||||||
| +limit can be changed via the \fB--om-capture\fP option. A pattern may contain
 |  | ||||||
| +any number of capturing parentheses, but only those whose number is within the
 |  | ||||||
| +limit can be accessed by \fB-o\fP. An error occurs if the number specified by
 |  | ||||||
| +\fB-o\fP is greater than the limit.
 |  | ||||||
| +.sp
 |  | ||||||
| +-o0 is the same as \fB-o\fP without a number. Because these options can be
 |  | ||||||
| +given without an argument (see above), if an argument is present, it must be
 |  | ||||||
| +given in the same shell item, for example, -o3 or --only-matching=2. The
 |  | ||||||
| +comments given for the non-argument case above also apply to this option. If
 |  | ||||||
| +the specified capturing parentheses do not exist in the pattern, or were not
 |  | ||||||
| +set in the match, nothing is output unless the file name or line number are
 |  | ||||||
| +being output.
 |  | ||||||
|  .sp |  | ||||||
|  If this option is given multiple times, multiple substrings are output for each |  | ||||||
|  match, in the order the options are given, and all on one line. For example, |  | ||||||
|  -o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and |  | ||||||
|  then 3 again to be output. By default, there is no separator (but see the next |  | ||||||
| -option).
 |  | ||||||
| +but one option).
 |  | ||||||
| +.TP
 |  | ||||||
| +\fB--om-capture\fP=\fInumber\fP
 |  | ||||||
| +Set the number of capturing parentheses that can be accessed by \fB-o\fP. The 
 |  | ||||||
| +default is 50.
 |  | ||||||
|  .TP |  | ||||||
|  \fB--om-separator\fP=\fItext\fP |  | ||||||
|  Specify a separating string for multiple occurrences of \fB-o\fP. The default |  | ||||||
| diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt
 |  | ||||||
| index cd44fe0..2920643 100644
 |  | ||||||
| --- a/doc/pcre2grep.txt
 |  | ||||||
| +++ b/doc/pcre2grep.txt
 |  | ||||||
| @@ -662,23 +662,32 @@ OPTIONS
 |  | ||||||
|   |  | ||||||
|         -onumber, --only-matching=number |  | ||||||
|                   Show only the part of the line  that  matched  the  capturing |  | ||||||
| -                 parentheses of the given number. Up to 32 capturing parenthe-
 |  | ||||||
| -                 ses are supported, and -o0 is equivalent to -o without a num-
 |  | ||||||
| -                 ber.  Because  these options can be given without an argument
 |  | ||||||
| -                 (see above), if an argument is present, it must be  given  in
 |  | ||||||
| -                 the  same  shell item, for example, -o3 or --only-matching=2.
 |  | ||||||
| -                 The comments given for the non-argument case above also apply
 |  | ||||||
| -                 to this option. If the specified capturing parentheses do not
 |  | ||||||
| -                 exist in the pattern, or were not set in the  match,  nothing
 |  | ||||||
| -                 is  output unless the file name or line number are being out-
 |  | ||||||
| -                 put.
 |  | ||||||
| -
 |  | ||||||
| -                 If this option is given multiple times,  multiple  substrings
 |  | ||||||
| -                 are  output  for  each  match,  in  the order the options are
 |  | ||||||
| -                 given, and all on one line. For example, -o3 -o1  -o3  causes
 |  | ||||||
| -                 the  substrings  matched by capturing parentheses 3 and 1 and
 |  | ||||||
| -                 then 3 again to be output. By default, there is no  separator
 |  | ||||||
| -                 (but see the next option).
 |  | ||||||
| +                 parentheses of the given number. Up to 50 capturing parenthe-
 |  | ||||||
| +                 ses are supported by default. This limit can be  changed  via
 |  | ||||||
| +                 the  --om-capture option. A pattern may contain any number of
 |  | ||||||
| +                 capturing parentheses, but only those whose number is  within
 |  | ||||||
| +                 the  limit can be accessed by -o. An error occurs if the num-
 |  | ||||||
| +                 ber specified by -o is greater than the limit.
 |  | ||||||
| +
 |  | ||||||
| +                 -o0 is the same as -o without a number. Because these options
 |  | ||||||
| +                 can  be given without an argument (see above), if an argument
 |  | ||||||
| +                 is present, it must be given in  the  same  shell  item,  for
 |  | ||||||
| +                 example, -o3 or --only-matching=2. The comments given for the
 |  | ||||||
| +                 non-argument case above also apply to  this  option.  If  the
 |  | ||||||
| +                 specified  capturing parentheses do not exist in the pattern,
 |  | ||||||
| +                 or were not set in the match, nothing is  output  unless  the
 |  | ||||||
| +                 file name or line number are being output.
 |  | ||||||
| +
 |  | ||||||
| +                 If  this  option is given multiple times, multiple substrings
 |  | ||||||
| +                 are output for each match,  in  the  order  the  options  are
 |  | ||||||
| +                 given,  and  all on one line. For example, -o3 -o1 -o3 causes
 |  | ||||||
| +                 the substrings matched by capturing parentheses 3 and  1  and
 |  | ||||||
| +                 then  3 again to be output. By default, there is no separator
 |  | ||||||
| +                 (but see the next but one option).
 |  | ||||||
| +
 |  | ||||||
| +       --om-capture=number
 |  | ||||||
| +                 Set the number of capturing parentheses that can be  accessed
 |  | ||||||
| +                 by -o. The default is 50.
 |  | ||||||
|   |  | ||||||
|         --om-separator=text |  | ||||||
|                   Specify  a  separating string for multiple occurrences of -o. |  | ||||||
| diff --git a/doc/pcre2test.txt b/doc/pcre2test.txt
 |  | ||||||
| index cbe3528..f287f6d 100644
 |  | ||||||
| --- a/doc/pcre2test.txt
 |  | ||||||
| +++ b/doc/pcre2test.txt
 |  | ||||||
| @@ -669,7 +669,9 @@ PATTERN MODIFIERS
 |  | ||||||
|         as "starting code units". "Last code unit" is  the  last  literal  code |  | ||||||
|         unit  that  must  be  present in any match. This is not necessarily the |  | ||||||
|         last character. These lines are omitted if no starting or  ending  code |  | ||||||
| -       units are recorded.
 |  | ||||||
| +       units   are   recorded.   The  subject  length  line  is  omitted  when
 |  | ||||||
| +       no_start_optimize is set because the minimum length is  not  calculated
 |  | ||||||
| +       when it can never be used.
 |  | ||||||
|   |  | ||||||
|         The  framesize modifier shows the size, in bytes, of the storage frames |  | ||||||
|         used by pcre2_match() for handling backtracking. The  size  depends  on |  | ||||||
| diff --git a/src/pcre2grep.c b/src/pcre2grep.c
 |  | ||||||
| index a3cc3ec..d17cd2a 100644
 |  | ||||||
| --- a/src/pcre2grep.c
 |  | ||||||
| +++ b/src/pcre2grep.c
 |  | ||||||
| @@ -115,7 +115,7 @@ MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
 |  | ||||||
|   |  | ||||||
|  typedef int BOOL; |  | ||||||
|   |  | ||||||
| -#define OFFSET_SIZE 33
 |  | ||||||
| +#define DEFAULT_CAPTURE_MAX 50
 |  | ||||||
|   |  | ||||||
|  #if BUFSIZ > 8192 |  | ||||||
|  #define MAXPATLEN BUFSIZ |  | ||||||
| @@ -242,6 +242,8 @@ static pcre2_compile_context *compile_context;
 |  | ||||||
|  static pcre2_match_context *match_context; |  | ||||||
|  static pcre2_match_data *match_data; |  | ||||||
|  static PCRE2_SIZE *offsets; |  | ||||||
| +static uint32_t offset_size;
 |  | ||||||
| +static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
 |  | ||||||
|   |  | ||||||
|  static BOOL count_only = FALSE; |  | ||||||
|  static BOOL do_colour = FALSE; |  | ||||||
| @@ -391,6 +393,7 @@ used to identify them. */
 |  | ||||||
|  #define N_INCLUDE_FROM (-21) |  | ||||||
|  #define N_OM_SEPARATOR (-22) |  | ||||||
|  #define N_MAX_BUFSIZE  (-23) |  | ||||||
| +#define N_OM_CAPTURE   (-24)
 |  | ||||||
|   |  | ||||||
|  static option_item optionlist[] = { |  | ||||||
|    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" }, |  | ||||||
| @@ -437,6 +440,7 @@ static option_item optionlist[] = {
 |  | ||||||
|    { OP_STRING,     'O',      &output_text,       "output=text",   "show only this text (possibly expanded)" }, |  | ||||||
|    { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" }, |  | ||||||
|    { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" }, |  | ||||||
| +  { OP_U32NUMBER,  N_OM_CAPTURE, &capture_max,  "om-capture=n",  "set capture count for --only-matching" },
 |  | ||||||
|    { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" }, |  | ||||||
|    { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" }, |  | ||||||
|    { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" }, |  | ||||||
| @@ -2568,7 +2572,7 @@ while (ptr < endptr)
 |  | ||||||
|   |  | ||||||
|        for (i = 0; i < jfriedl_XR; i++) |  | ||||||
|            match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0, |  | ||||||
| -              PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
 |  | ||||||
| +              PCRE2_NOTEMPTY, offsets, offset_size) >= 0);
 |  | ||||||
|   |  | ||||||
|        if (gettimeofday(&end_time, &dummy) != 0) |  | ||||||
|                perror("bad gettimeofday"); |  | ||||||
| @@ -2688,7 +2692,7 @@ while (ptr < endptr)
 |  | ||||||
|            for (om = only_matching; om != NULL; om = om->next) |  | ||||||
|              { |  | ||||||
|              int n = om->groupnum; |  | ||||||
| -            if (n < mrc)
 |  | ||||||
| +            if (n == 0 || n < mrc)
 |  | ||||||
|                { |  | ||||||
|                int plen = offsets[2*n + 1] - offsets[2*n]; |  | ||||||
|                if (plen > 0) |  | ||||||
| @@ -3639,6 +3643,7 @@ int rc = 1;
 |  | ||||||
|  BOOL only_one_at_top; |  | ||||||
|  patstr *cp; |  | ||||||
|  fnstr *fn; |  | ||||||
| +omstr *om;
 |  | ||||||
|  const char *locale_from = "--locale"; |  | ||||||
|   |  | ||||||
|  #ifdef SUPPORT_PCRE2GREP_JIT |  | ||||||
| @@ -3655,20 +3660,6 @@ must use STDOUT_NL to terminate lines. */
 |  | ||||||
|  _setmode(_fileno(stdout), _O_BINARY); |  | ||||||
|  #endif |  | ||||||
|   |  | ||||||
| -/* Set up a default compile and match contexts and a match data block. */
 |  | ||||||
| -
 |  | ||||||
| -compile_context = pcre2_compile_context_create(NULL);
 |  | ||||||
| -match_context = pcre2_match_context_create(NULL);
 |  | ||||||
| -match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
 |  | ||||||
| -offsets = pcre2_get_ovector_pointer(match_data);
 |  | ||||||
| -
 |  | ||||||
| -/* If string (script) callouts are supported, set up the callout processing
 |  | ||||||
| -function. */
 |  | ||||||
| -
 |  | ||||||
| -#ifdef SUPPORT_PCRE2GREP_CALLOUT
 |  | ||||||
| -pcre2_set_callout(match_context, pcre2grep_callout, NULL);
 |  | ||||||
| -#endif
 |  | ||||||
| -
 |  | ||||||
|  /* Process the options */ |  | ||||||
|   |  | ||||||
|  for (i = 1; i < argc; i++) |  | ||||||
| @@ -4015,12 +4006,40 @@ if (only_matching_count > 1)
 |  | ||||||
|    pcre2grep_exit(usage(2)); |  | ||||||
|    } |  | ||||||
|   |  | ||||||
| +/* Check that there is a big enough ovector for all -o settings. */
 |  | ||||||
| +
 |  | ||||||
| +for (om = only_matching; om != NULL; om = om->next)
 |  | ||||||
| +  {
 |  | ||||||
| +  int n = om->groupnum;
 |  | ||||||
| +  if (n > (int)capture_max)
 |  | ||||||
| +    {
 |  | ||||||
| +    fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
 |  | ||||||
| +    fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
 |  | ||||||
| +    goto EXIT2;
 |  | ||||||
| +    }
 |  | ||||||
| +  }
 |  | ||||||
| +
 |  | ||||||
|  /* Check the text supplied to --output for errors. */ |  | ||||||
|   |  | ||||||
|  if (output_text != NULL && |  | ||||||
|      !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE)) |  | ||||||
|    goto EXIT2; |  | ||||||
|   |  | ||||||
| +/* Set up default compile and match contexts and a match data block. */
 |  | ||||||
| +
 |  | ||||||
| +offset_size = capture_max + 1;
 |  | ||||||
| +compile_context = pcre2_compile_context_create(NULL);
 |  | ||||||
| +match_context = pcre2_match_context_create(NULL);
 |  | ||||||
| +match_data = pcre2_match_data_create(offset_size, NULL);
 |  | ||||||
| +offsets = pcre2_get_ovector_pointer(match_data);
 |  | ||||||
| +
 |  | ||||||
| +/* If string (script) callouts are supported, set up the callout processing
 |  | ||||||
| +function. */
 |  | ||||||
| +
 |  | ||||||
| +#ifdef SUPPORT_PCRE2GREP_CALLOUT
 |  | ||||||
| +pcre2_set_callout(match_context, pcre2grep_callout, NULL);
 |  | ||||||
| +#endif
 |  | ||||||
| +
 |  | ||||||
|  /* Put limits into the match data block. */ |  | ||||||
|   |  | ||||||
|  if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit); |  | ||||||
| diff --git a/testdata/grepoutput b/testdata/grepoutput
 |  | ||||||
| index 2bd69be..a9297e1 100644
 |  | ||||||
| --- a/testdata/grepoutput
 |  | ||||||
| +++ b/testdata/grepoutput
 |  | ||||||
| @@ -949,3 +949,10 @@ RC=0
 |  | ||||||
|  ---------------------------- Test 126 ----------------------------- |  | ||||||
|  ABC | |||||||