Fix matching repeated character classes against an 8-bit string containting multi-code-unit characters
This commit is contained in:
		
							parent
							
								
									b92c5c8b8e
								
							
						
					
					
						commit
						25ca86c1eb
					
				| @ -0,0 +1,120 @@ | ||||
| From ea6f7a508aaa2fd61eb60d7759fe00713f46cd5c Mon Sep 17 00:00:00 2001 | ||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | ||||
| Date: Mon, 19 Feb 2018 17:26:33 +0000 | ||||
| Subject: [PATCH] Fix \C bug with repeated character classes in UTF-8 mode. | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
| 
 | ||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@918 6239d852-aaf2-0410-a92c-79f79f948069 | ||||
| Petr Písař: Ported to 10.31. | ||||
| ---
 | ||||
|  src/pcre2_match.c        | 16 ++++++++++++---- | ||||
|  testdata/testinput22     |  3 +++ | ||||
|  testdata/testoutput22-16 |  4 ++++ | ||||
|  testdata/testoutput22-32 |  4 ++++ | ||||
|  testdata/testoutput22-8  |  4 ++++ | ||||
| 
 | ||||
| diff --git a/src/pcre2_match.c b/src/pcre2_match.c
 | ||||
| index 79cc93f..ce96016 100644
 | ||||
| --- a/src/pcre2_match.c
 | ||||
| +++ b/src/pcre2_match.c
 | ||||
| @@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 | ||||
|   | ||||
|            if (reptype == REPTYPE_POS) continue;    /* No backtracking */ | ||||
|   | ||||
| +          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
 | ||||
| +          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
 | ||||
| +          go too far. */
 | ||||
| +
 | ||||
|            for (;;) | ||||
|              { | ||||
|              RMATCH(Fecode, RM201); | ||||
|              if (rrc != MATCH_NOMATCH) RRETURN(rrc); | ||||
| -            if (Feptr-- == Lstart_eptr) break;  /* Tried at original position */
 | ||||
| +            if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
 | ||||
|              BACKCHAR(Feptr); | ||||
|              } | ||||
|            } | ||||
| @@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 | ||||
|   | ||||
|          if (reptype == REPTYPE_POS) continue;    /* No backtracking */ | ||||
|   | ||||
| +        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
 | ||||
| +        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
 | ||||
| +        go too far. */
 | ||||
| +
 | ||||
|          for(;;) | ||||
|            { | ||||
|            RMATCH(Fecode, RM101); | ||||
|            if (rrc != MATCH_NOMATCH) RRETURN(rrc); | ||||
| -          if (Feptr-- == Lstart_eptr) break;  /* Tried at original position */
 | ||||
| +          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
 | ||||
|  #ifdef SUPPORT_UNICODE | ||||
|            if (utf) BACKCHAR(Feptr); | ||||
|  #endif | ||||
| @@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 | ||||
|          if (reptype == REPTYPE_POS) continue;    /* No backtracking */ | ||||
|   | ||||
|          /* After \C in UTF mode, Lstart_eptr might be in the middle of a | ||||
| -        Unicode character. Use <= pp to ensure backtracking doesn't go too far.
 | ||||
| -        */
 | ||||
| +        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
 | ||||
| +        go too far. */
 | ||||
|   | ||||
|          for(;;) | ||||
|            { | ||||
| diff --git a/testdata/testinput22 b/testdata/testinput22
 | ||||
| index e6d4053..c218ea6 100644
 | ||||
| --- a/testdata/testinput22
 | ||||
| +++ b/testdata/testinput22
 | ||||
| @@ -98,4 +98,7 @@
 | ||||
|  \= Expect no match - tests \C at end of subject | ||||
|      ab | ||||
|   | ||||
| +/\C[^\v]+\x80/utf
 | ||||
| +    [AΏBŀC]
 | ||||
| +
 | ||||
|  # End of testinput22 | ||||
| diff --git a/testdata/testoutput22-16 b/testdata/testoutput22-16
 | ||||
| index 88f827c..5e23611 100644
 | ||||
| --- a/testdata/testoutput22-16
 | ||||
| +++ b/testdata/testoutput22-16
 | ||||
| @@ -171,4 +171,8 @@ No match
 | ||||
|      ab | ||||
|  No match | ||||
|   | ||||
| +/\C[^\v]+\x80/utf
 | ||||
| +    [AΏBŀC]
 | ||||
| +No match
 | ||||
| +
 | ||||
|  # End of testinput22 | ||||
| diff --git a/testdata/testoutput22-32 b/testdata/testoutput22-32
 | ||||
| index ac485fc..8576f31 100644
 | ||||
| --- a/testdata/testoutput22-32
 | ||||
| +++ b/testdata/testoutput22-32
 | ||||
| @@ -169,4 +169,8 @@ No match
 | ||||
|      ab | ||||
|  No match | ||||
|   | ||||
| +/\C[^\v]+\x80/utf
 | ||||
| +    [AΏBŀC]
 | ||||
| +No match
 | ||||
| +
 | ||||
|  # End of testinput22 | ||||
| diff --git a/testdata/testoutput22-8 b/testdata/testoutput22-8
 | ||||
| index 3d31fbc..8543652 100644
 | ||||
| --- a/testdata/testoutput22-8
 | ||||
| +++ b/testdata/testoutput22-8
 | ||||
| @@ -173,4 +173,8 @@ No match
 | ||||
|      ab | ||||
|  No match | ||||
|   | ||||
| +/\C[^\v]+\x80/utf
 | ||||
| +    [AΏBŀC]
 | ||||
| +No match
 | ||||
| +
 | ||||
|  # End of testinput22 | ||||
| -- 
 | ||||
| 2.13.6 | ||||
| 
 | ||||
| @ -57,6 +57,9 @@ Patch2:     pcre2-10.31-Fix-pcre2test-C-to-correctly-show-what-R-matches.patch | ||||
| # 2/2 Fix pcre2test -C to correctly show what \R matches, | ||||
| # in upstream after 10.31 | ||||
| Patch3:     pcre2-10.31-Oops-forgot-about-C-bsr-in-previous-patch.patch | ||||
| # Fix matching repeated character classes against an 8-bit string containting | ||||
| # multi-code-unit characters, in upstream after 10.31 | ||||
| Patch4:     pcre2-10.31-Fix-C-bug-with-repeated-character-classes-in-UTF-8-m.patch | ||||
| BuildRequires:  autoconf | ||||
| BuildRequires:  automake | ||||
| BuildRequires:  coreutils | ||||
| @ -136,6 +139,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. | ||||
| %patch1 -p1 | ||||
| %patch2 -p1 | ||||
| %patch3 -p1 | ||||
| %patch4 -p1 | ||||
| # Because of multilib patch | ||||
| libtoolize --copy --force | ||||
| autoreconf -vif | ||||
| @ -242,6 +246,8 @@ make %{?_smp_mflags} check VERBOSE=yes | ||||
| - Fix returning unset groups in POSIX interface if REG_STARTEND a non-zero | ||||
|   starting offset (upstream bug #2244) | ||||
| - Fix pcre2test -C to correctly show what \R matches | ||||
| - Fix matching repeated character classes against an 8-bit string containting | ||||
|   multi-code-unit characters | ||||
| 
 | ||||
| * Mon Feb 12 2018 Petr Pisar <ppisar@redhat.com> - 10.31-1 | ||||
| - 10.31 bump | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user