Fix matching repeated character classes against an 8-bit string containting multi-code-unit characters
This commit is contained in:
		
							parent
							
								
									b92c5c8b8e
								
							
						
					
					
						commit
						25ca86c1eb
					
				| @ -0,0 +1,120 @@ | |||||||
|  | From ea6f7a508aaa2fd61eb60d7759fe00713f46cd5c Mon Sep 17 00:00:00 2001 | ||||||
|  | From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | ||||||
|  | Date: Mon, 19 Feb 2018 17:26:33 +0000 | ||||||
|  | Subject: [PATCH] Fix \C bug with repeated character classes in UTF-8 mode. | ||||||
|  | MIME-Version: 1.0 | ||||||
|  | Content-Type: text/plain; charset=UTF-8 | ||||||
|  | Content-Transfer-Encoding: 8bit | ||||||
|  | 
 | ||||||
|  | git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@918 6239d852-aaf2-0410-a92c-79f79f948069 | ||||||
|  | Petr Písař: Ported to 10.31. | ||||||
|  | ---
 | ||||||
|  |  src/pcre2_match.c        | 16 ++++++++++++---- | ||||||
|  |  testdata/testinput22     |  3 +++ | ||||||
|  |  testdata/testoutput22-16 |  4 ++++ | ||||||
|  |  testdata/testoutput22-32 |  4 ++++ | ||||||
|  |  testdata/testoutput22-8  |  4 ++++ | ||||||
|  | 
 | ||||||
|  | diff --git a/src/pcre2_match.c b/src/pcre2_match.c
 | ||||||
|  | index 79cc93f..ce96016 100644
 | ||||||
|  | --- a/src/pcre2_match.c
 | ||||||
|  | +++ b/src/pcre2_match.c
 | ||||||
|  | @@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 | ||||||
|  |   | ||||||
|  |            if (reptype == REPTYPE_POS) continue;    /* No backtracking */ | ||||||
|  |   | ||||||
|  | +          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
 | ||||||
|  | +          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
 | ||||||
|  | +          go too far. */
 | ||||||
|  | +
 | ||||||
|  |            for (;;) | ||||||
|  |              { | ||||||
|  |              RMATCH(Fecode, RM201); | ||||||
|  |              if (rrc != MATCH_NOMATCH) RRETURN(rrc); | ||||||
|  | -            if (Feptr-- == Lstart_eptr) break;  /* Tried at original position */
 | ||||||
|  | +            if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
 | ||||||
|  |              BACKCHAR(Feptr); | ||||||
|  |              } | ||||||
|  |            } | ||||||
|  | @@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 | ||||||
|  |   | ||||||
|  |          if (reptype == REPTYPE_POS) continue;    /* No backtracking */ | ||||||
|  |   | ||||||
|  | +        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
 | ||||||
|  | +        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
 | ||||||
|  | +        go too far. */
 | ||||||
|  | +
 | ||||||
|  |          for(;;) | ||||||
|  |            { | ||||||
|  |            RMATCH(Fecode, RM101); | ||||||
|  |            if (rrc != MATCH_NOMATCH) RRETURN(rrc); | ||||||
|  | -          if (Feptr-- == Lstart_eptr) break;  /* Tried at original position */
 | ||||||
|  | +          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
 | ||||||
|  |  #ifdef SUPPORT_UNICODE | ||||||
|  |            if (utf) BACKCHAR(Feptr); | ||||||
|  |  #endif | ||||||
|  | @@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 | ||||||
|  |          if (reptype == REPTYPE_POS) continue;    /* No backtracking */ | ||||||
|  |   | ||||||
|  |          /* After \C in UTF mode, Lstart_eptr might be in the middle of a | ||||||
|  | -        Unicode character. Use <= pp to ensure backtracking doesn't go too far.
 | ||||||
|  | -        */
 | ||||||
|  | +        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
 | ||||||
|  | +        go too far. */
 | ||||||
|  |   | ||||||
|  |          for(;;) | ||||||
|  |            { | ||||||
|  | diff --git a/testdata/testinput22 b/testdata/testinput22
 | ||||||
|  | index e6d4053..c218ea6 100644
 | ||||||
|  | --- a/testdata/testinput22
 | ||||||
|  | +++ b/testdata/testinput22
 | ||||||
|  | @@ -98,4 +98,7 @@
 | ||||||
|  |  \= Expect no match - tests \C at end of subject | ||||||
|  |      ab | ||||||
|  |   | ||||||
|  | +/\C[^\v]+\x80/utf
 | ||||||
|  | +    [AΏBŀC]
 | ||||||
|  | +
 | ||||||
|  |  # End of testinput22 | ||||||
|  | diff --git a/testdata/testoutput22-16 b/testdata/testoutput22-16
 | ||||||
|  | index 88f827c..5e23611 100644
 | ||||||
|  | --- a/testdata/testoutput22-16
 | ||||||
|  | +++ b/testdata/testoutput22-16
 | ||||||
|  | @@ -171,4 +171,8 @@ No match
 | ||||||
|  |      ab | ||||||
|  |  No match | ||||||
|  |   | ||||||
|  | +/\C[^\v]+\x80/utf
 | ||||||
|  | +    [AΏBŀC]
 | ||||||
|  | +No match
 | ||||||
|  | +
 | ||||||
|  |  # End of testinput22 | ||||||
|  | diff --git a/testdata/testoutput22-32 b/testdata/testoutput22-32
 | ||||||
|  | index ac485fc..8576f31 100644
 | ||||||
|  | --- a/testdata/testoutput22-32
 | ||||||
|  | +++ b/testdata/testoutput22-32
 | ||||||
|  | @@ -169,4 +169,8 @@ No match
 | ||||||
|  |      ab | ||||||
|  |  No match | ||||||
|  |   | ||||||
|  | +/\C[^\v]+\x80/utf
 | ||||||
|  | +    [AΏBŀC]
 | ||||||
|  | +No match
 | ||||||
|  | +
 | ||||||
|  |  # End of testinput22 | ||||||
|  | diff --git a/testdata/testoutput22-8 b/testdata/testoutput22-8
 | ||||||
|  | index 3d31fbc..8543652 100644
 | ||||||
|  | --- a/testdata/testoutput22-8
 | ||||||
|  | +++ b/testdata/testoutput22-8
 | ||||||
|  | @@ -173,4 +173,8 @@ No match
 | ||||||
|  |      ab | ||||||
|  |  No match | ||||||
|  |   | ||||||
|  | +/\C[^\v]+\x80/utf
 | ||||||
|  | +    [AΏBŀC]
 | ||||||
|  | +No match
 | ||||||
|  | +
 | ||||||
|  |  # End of testinput22 | ||||||
|  | -- 
 | ||||||
|  | 2.13.6 | ||||||
|  | 
 | ||||||
| @ -57,6 +57,9 @@ Patch2:     pcre2-10.31-Fix-pcre2test-C-to-correctly-show-what-R-matches.patch | |||||||
| # 2/2 Fix pcre2test -C to correctly show what \R matches, | # 2/2 Fix pcre2test -C to correctly show what \R matches, | ||||||
| # in upstream after 10.31 | # in upstream after 10.31 | ||||||
| Patch3:     pcre2-10.31-Oops-forgot-about-C-bsr-in-previous-patch.patch | Patch3:     pcre2-10.31-Oops-forgot-about-C-bsr-in-previous-patch.patch | ||||||
|  | # Fix matching repeated character classes against an 8-bit string containting | ||||||
|  | # multi-code-unit characters, in upstream after 10.31 | ||||||
|  | Patch4:     pcre2-10.31-Fix-C-bug-with-repeated-character-classes-in-UTF-8-m.patch | ||||||
| BuildRequires:  autoconf | BuildRequires:  autoconf | ||||||
| BuildRequires:  automake | BuildRequires:  automake | ||||||
| BuildRequires:  coreutils | BuildRequires:  coreutils | ||||||
| @ -136,6 +139,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. | |||||||
| %patch1 -p1 | %patch1 -p1 | ||||||
| %patch2 -p1 | %patch2 -p1 | ||||||
| %patch3 -p1 | %patch3 -p1 | ||||||
|  | %patch4 -p1 | ||||||
| # Because of multilib patch | # Because of multilib patch | ||||||
| libtoolize --copy --force | libtoolize --copy --force | ||||||
| autoreconf -vif | autoreconf -vif | ||||||
| @ -242,6 +246,8 @@ make %{?_smp_mflags} check VERBOSE=yes | |||||||
| - Fix returning unset groups in POSIX interface if REG_STARTEND a non-zero | - Fix returning unset groups in POSIX interface if REG_STARTEND a non-zero | ||||||
|   starting offset (upstream bug #2244) |   starting offset (upstream bug #2244) | ||||||
| - Fix pcre2test -C to correctly show what \R matches | - Fix pcre2test -C to correctly show what \R matches | ||||||
|  | - Fix matching repeated character classes against an 8-bit string containting | ||||||
|  |   multi-code-unit characters | ||||||
| 
 | 
 | ||||||
| * Mon Feb 12 2018 Petr Pisar <ppisar@redhat.com> - 10.31-1 | * Mon Feb 12 2018 Petr Pisar <ppisar@redhat.com> - 10.31-1 | ||||||
| - 10.31 bump | - 10.31 bump | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user