Fix matching at a first code unit of a new line sequence if PCRE2_FIRSTLINE is enabled
This commit is contained in:
		
							parent
							
								
									6d626f9a4d
								
							
						
					
					
						commit
						1c9da09ce5
					
				
							
								
								
									
										61
									
								
								pcre2-10.30-FIRSTLINE_documentation-update.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										61
									
								
								pcre2-10.30-FIRSTLINE_documentation-update.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,61 @@ | ||||
| From 3fdced6eef96f50ac5bd287426db0aa699be3edc Mon Sep 17 00:00:00 2001 | ||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | ||||
| Date: Sun, 31 Dec 2017 17:44:12 +0000 | ||||
| Subject: [PATCH] Documentation update. | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
| 
 | ||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@898 6239d852-aaf2-0410-a92c-79f79f948069 | ||||
| Petr Písař: Ported to 10.30. | ||||
| 
 | ||||
| Signed-off-by: Petr Písař <ppisar@redhat.com> | ||||
| ---
 | ||||
|  doc/pcre2api.3 | 24 +++++++++++++++--------- | ||||
|  1 file changed, 15 insertions(+), 9 deletions(-) | ||||
| 
 | ||||
| diff --git a/doc/pcre2api.3 b/doc/pcre2api.3
 | ||||
| index f80ae58..d55debf 100644
 | ||||
| --- a/doc/pcre2api.3
 | ||||
| +++ b/doc/pcre2api.3
 | ||||
| @@ -871,10 +871,11 @@ compiled. If a match is started with a non-default match limit when
 | ||||
|  PCRE2_USE_OFFSET_LIMIT is not set, an error is generated. | ||||
|  .P | ||||
|  The offset limit facility can be used to track progress when searching large | ||||
| -subject strings. See also the PCRE2_FIRSTLINE option, which requires a match to
 | ||||
| -start within the first line of the subject. If this is set with an offset
 | ||||
| -limit, a match must occur in the first line and also within the offset limit.
 | ||||
| -In other words, whichever limit comes first is used.
 | ||||
| +subject strings. See also the PCRE2_FIRSTLINE option, which requires a match
 | ||||
| +to start before or at the first newline that follows the start of matching in
 | ||||
| +the subject. If this is set with an offset limit, a match must occur in the
 | ||||
| +first line and also within the offset limit. In other words, whichever limit
 | ||||
| +comes first is used.
 | ||||
|  .sp | ||||
|  .nf | ||||
|  .B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP, | ||||
| @@ -1423,11 +1424,16 @@ changed within a pattern by a (?xx) option setting.
 | ||||
|    PCRE2_FIRSTLINE | ||||
|  .sp | ||||
|  If this option is set, the start of an unanchored pattern match must be before | ||||
| -or at the first newline in the subject string, though the matched text may
 | ||||
| -continue over the newline. See also PCRE2_USE_OFFSET_LIMIT, which provides a
 | ||||
| -more general limiting facility. If PCRE2_FIRSTLINE is set with an offset limit,
 | ||||
| -a match must occur in the first line and also within the offset limit. In other
 | ||||
| -words, whichever limit comes first is used.
 | ||||
| +or at the first newline in the subject string following the start of matching,
 | ||||
| +though the matched text may continue over the newline. If \fIstartoffset\fP is 
 | ||||
| +non-zero, the limiting newline is not necessarily the first newline in the 
 | ||||
| +subject. For example, if the subject string is "abc\enxyz" (where \en
 | ||||
| +represents a single-character newline) a pattern match for "yz" succeeds with
 | ||||
| +PCRE2_FIRSTLINE if \fIstartoffset\fP is greater than 3. See also
 | ||||
| +PCRE2_USE_OFFSET_LIMIT, which provides a more general limiting facility. If
 | ||||
| +PCRE2_FIRSTLINE is set with an offset limit, a match must occur in the first
 | ||||
| +line and also within the offset limit. In other words, whichever limit comes
 | ||||
| +first is used.
 | ||||
|  .sp | ||||
|    PCRE2_LITERAL | ||||
|  .sp | ||||
| -- 
 | ||||
| 2.13.6 | ||||
| 
 | ||||
| @ -0,0 +1,206 @@ | ||||
| From f3b22988611cca57770a705f05c0d9ef583d605a Mon Sep 17 00:00:00 2001 | ||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | ||||
| Date: Mon, 1 Jan 2018 14:12:35 +0000 | ||||
| Subject: [PATCH] Fix PCRE2_FIRSTLINE bug when a pattern match starts with the | ||||
|  first code unit of a newline sequence. | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
| 
 | ||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@899 6239d852-aaf2-0410-a92c-79f79f948069 | ||||
| Petr Písař: Ported to 10.32 | ||||
| 
 | ||||
| Signed-off-by: Petr Písař <ppisar@redhat.com> | ||||
| ---
 | ||||
|  src/pcre2_dfa_match.c | 21 +++++++++++++++------ | ||||
|  src/pcre2_match.c     | 19 ++++++++++++++----- | ||||
|  testdata/testinput2   | 10 ++++++++++ | ||||
|  testdata/testinput6   | 10 ++++++++++ | ||||
|  testdata/testoutput2  | 13 +++++++++++++ | ||||
|  testdata/testoutput6  | 13 +++++++++++++ | ||||
|  6 files changed, 75 insertions(+), 11 deletions(-) | ||||
| 
 | ||||
| diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
 | ||||
| index 5ae1394..7bbd6d3 100644
 | ||||
| --- a/src/pcre2_dfa_match.c
 | ||||
| +++ b/src/pcre2_dfa_match.c
 | ||||
| @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 | ||||
|   | ||||
|                         Written by Philip Hazel | ||||
|       Original API code Copyright (c) 1997-2012 University of Cambridge | ||||
| -          New API code Copyright (c) 2016-2017 University of Cambridge
 | ||||
| +          New API code Copyright (c) 2016-2018 University of Cambridge
 | ||||
|   | ||||
|  ----------------------------------------------------------------------------- | ||||
|  Redistribution and use in source and binary forms, with or without | ||||
| @@ -3558,9 +3558,11 @@ for (;;)
 | ||||
|   | ||||
|      /* If firstline is TRUE, the start of the match is constrained to the first | ||||
|      line of a multiline string. That is, the match must be before or at the | ||||
| -    first newline. Implement this by temporarily adjusting end_subject so that
 | ||||
| -    we stop the optimization scans for a first code unit at a newline. If the
 | ||||
| -    match fails at the newline, later code breaks this loop. */
 | ||||
| +    first newline following the start of matching. Temporarily adjust
 | ||||
| +    end_subject so that we stop the optimization scans for a first code unit
 | ||||
| +    immediately after the first character of a newline (the first code unit can
 | ||||
| +    legitimately be a newline). If the match fails at the newline, later code
 | ||||
| +    breaks this loop. */
 | ||||
|   | ||||
|      if (firstline) | ||||
|        { | ||||
| @@ -3568,7 +3570,7 @@ for (;;)
 | ||||
|  #ifdef SUPPORT_UNICODE | ||||
|        if (utf) | ||||
|          { | ||||
| -        while (t < mb->end_subject && !IS_NEWLINE(t))
 | ||||
| +        while (t < end_subject && !IS_NEWLINE(t))
 | ||||
|            { | ||||
|            t++; | ||||
|            ACROSSCHAR(t < end_subject, *t, t++); | ||||
| @@ -3576,7 +3578,14 @@ for (;;)
 | ||||
|          } | ||||
|        else | ||||
|  #endif | ||||
| -      while (t < mb->end_subject && !IS_NEWLINE(t)) t++;
 | ||||
| +      while (t < end_subject && !IS_NEWLINE(t)) t++;
 | ||||
| +
 | ||||
| +      /* Note that we only need to advance by one code unit if we found a
 | ||||
| +      newline. If the newline is CRLF, a first code unit of LF should not
 | ||||
| +      match, because it is not at or before the newline. Similarly, only the
 | ||||
| +      first code unit of a Unicode newline might be relevant. */
 | ||||
| +
 | ||||
| +      if (t < end_subject) t++;
 | ||||
|        end_subject = t; | ||||
|        } | ||||
|   | ||||
| diff --git a/src/pcre2_match.c b/src/pcre2_match.c
 | ||||
| index 050b7e9..8afb0d8 100644
 | ||||
| --- a/src/pcre2_match.c
 | ||||
| +++ b/src/pcre2_match.c
 | ||||
| @@ -6541,9 +6541,11 @@ for(;;)
 | ||||
|   | ||||
|      /* If firstline is TRUE, the start of the match is constrained to the first | ||||
|      line of a multiline string. That is, the match must be before or at the | ||||
| -    first newline. Implement this by temporarily adjusting end_subject so that
 | ||||
| -    we stop the optimization scans for a first code unit at a newline. If the
 | ||||
| -    match fails at the newline, later code breaks this loop. */
 | ||||
| +    first newline following the start of matching. Temporarily adjust
 | ||||
| +    end_subject so that we stop the optimization scans for a first code unit
 | ||||
| +    immediately after the first character of a newline (the first code unit can
 | ||||
| +    legitimately be a newline). If the match fails at the newline, later code
 | ||||
| +    breaks this loop. */
 | ||||
|   | ||||
|      if (firstline) | ||||
|        { | ||||
| @@ -6551,7 +6553,7 @@ for(;;)
 | ||||
|  #ifdef SUPPORT_UNICODE | ||||
|        if (utf) | ||||
|          { | ||||
| -        while (t < mb->end_subject && !IS_NEWLINE(t))
 | ||||
| +        while (t < end_subject && !IS_NEWLINE(t))
 | ||||
|            { | ||||
|            t++; | ||||
|            ACROSSCHAR(t < end_subject, *t, t++); | ||||
| @@ -6559,7 +6561,14 @@ for(;;)
 | ||||
|          } | ||||
|        else | ||||
|  #endif | ||||
| -      while (t < mb->end_subject && !IS_NEWLINE(t)) t++;
 | ||||
| +      while (t < end_subject && !IS_NEWLINE(t)) t++;
 | ||||
| +
 | ||||
| +      /* Note that we only need to advance by one code unit if we found a
 | ||||
| +      newline. If the newline is CRLF, a first code unit of LF should not
 | ||||
| +      match, because it is not at or before the newline. Similarly, only the
 | ||||
| +      first code unit of a Unicode newline might be relevant. */
 | ||||
| +
 | ||||
| +      if (t < end_subject) t++;
 | ||||
|        end_subject = t; | ||||
|        } | ||||
|   | ||||
| diff --git a/testdata/testinput2 b/testdata/testinput2
 | ||||
| index 695f0a4..b173fe0 100644
 | ||||
| --- a/testdata/testinput2
 | ||||
| +++ b/testdata/testinput2
 | ||||
| @@ -5385,4 +5385,14 @@ a)"xI
 | ||||
|      ab | ||||
|      aaab  | ||||
|   | ||||
| +/\n/firstline
 | ||||
| +    xyz\nabc
 | ||||
| +
 | ||||
| +/\nabc/firstline
 | ||||
| +    xyz\nabc
 | ||||
| +
 | ||||
| +/\x{0a}abc/firstline,newline=crlf
 | ||||
| +\= Expect no match
 | ||||
| +    xyz\r\nabc
 | ||||
| +
 | ||||
|  # End of testinput2 | ||||
| diff --git a/testdata/testinput6 b/testdata/testinput6
 | ||||
| index ce2e082..614c3a0 100644
 | ||||
| --- a/testdata/testinput6
 | ||||
| +++ b/testdata/testinput6
 | ||||
| @@ -4932,4 +4932,14 @@
 | ||||
|  /(*LIMIT_MATCH=100).*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00\x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););/no_dotstar_anchor | ||||
|  .*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00\x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?);); | ||||
|   | ||||
| +/\n/firstline
 | ||||
| +    xyz\nabc
 | ||||
| +
 | ||||
| +/\nabc/firstline
 | ||||
| +    xyz\nabc
 | ||||
| +
 | ||||
| +/\x{0a}abc/firstline,newline=crlf
 | ||||
| +\= Expect no match
 | ||||
| +    xyz\r\nabc
 | ||||
| +
 | ||||
|  # End of testinput6 | ||||
| diff --git a/testdata/testoutput2 b/testdata/testoutput2
 | ||||
| index 31ccfbe..c19c270 100644
 | ||||
| --- a/testdata/testoutput2
 | ||||
| +++ b/testdata/testoutput2
 | ||||
| @@ -16368,6 +16368,19 @@ Subject length lower bound = 1
 | ||||
|   0: ab | ||||
|   1: a | ||||
|   | ||||
| +/\n/firstline
 | ||||
| +    xyz\nabc
 | ||||
| + 0: \x0a
 | ||||
| +
 | ||||
| +/\nabc/firstline
 | ||||
| +    xyz\nabc
 | ||||
| + 0: \x0aabc
 | ||||
| +
 | ||||
| +/\x{0a}abc/firstline,newline=crlf
 | ||||
| +\= Expect no match
 | ||||
| +    xyz\r\nabc
 | ||||
| +No match
 | ||||
| +
 | ||||
|  # End of testinput2 | ||||
|  Error -65: PCRE2_ERROR_BADDATA (unknown error number) | ||||
|  Error -62: bad serialized data | ||||
| diff --git a/testdata/testoutput6 b/testdata/testoutput6
 | ||||
| index b912944..2d321d5 100644
 | ||||
| --- a/testdata/testoutput6
 | ||||
| +++ b/testdata/testoutput6
 | ||||
| @@ -7753,4 +7753,17 @@ No match
 | ||||
|  .*(?![|H]?.*(?![|H]?););.*(?![|H]?.*(?![|H]?););\x00\x00\x00\x00\x00\x00\x00(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?!(?![|);)?.*(![|H]?);)?.*(?![|H]?);)?.*(?![|H]?);)?.*(?![|H]););![|H]?););[|H]?);|H]?);)\x00\x00\x00\x00\x00\x00H]?););?![|H]?);)?.*(?![|H]?););[||H]?);)?.*(?![|H]?););[|H]?);(?![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?););;[\x00\x00\x00\x00\x00\x00\x00![|H]?););![|H]?););[|H]?);|H]?);)?.*(?![|H]?);); | ||||
|  Failed: error -47: match limit exceeded | ||||
|   | ||||
| +/\n/firstline
 | ||||
| +    xyz\nabc
 | ||||
| + 0: \x0a
 | ||||
| +
 | ||||
| +/\nabc/firstline
 | ||||
| +    xyz\nabc
 | ||||
| + 0: \x0aabc
 | ||||
| +
 | ||||
| +/\x{0a}abc/firstline,newline=crlf
 | ||||
| +\= Expect no match
 | ||||
| +    xyz\r\nabc
 | ||||
| +No match
 | ||||
| +
 | ||||
|  # End of testinput6 | ||||
| -- 
 | ||||
| 2.13.6 | ||||
| 
 | ||||
							
								
								
									
										218
									
								
								pcre2-10.30-Previous-FIRSTLINE-patch-was-broken.-Fix-it.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										218
									
								
								pcre2-10.30-Previous-FIRSTLINE-patch-was-broken.-Fix-it.patch
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,218 @@ | ||||
| From 1b5d77c6edc5ee8e8fe5c96bf9cad5798d6ce36c Mon Sep 17 00:00:00 2001 | ||||
| From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | ||||
| Date: Mon, 1 Jan 2018 14:54:06 +0000 | ||||
| Subject: [PATCH 3/3] Previous FIRSTLINE patch was broken. Fix it. | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
| 
 | ||||
| git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@900 6239d852-aaf2-0410-a92c-79f79f948069 | ||||
| Signed-off-by: Petr Písař <ppisar@redhat.com> | ||||
| ---
 | ||||
|  src/pcre2_dfa_match.c | 27 +++++++++++---------------- | ||||
|  src/pcre2_match.c     | 37 +++++++++++++++---------------------- | ||||
|  testdata/testinput2   |  4 ++++ | ||||
|  testdata/testinput6   |  4 ++++ | ||||
|  testdata/testoutput2  |  5 +++++ | ||||
|  testdata/testoutput6  |  5 +++++ | ||||
|  6 files changed, 44 insertions(+), 38 deletions(-) | ||||
| 
 | ||||
| diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
 | ||||
| index 9c1d805..65243bf 100644
 | ||||
| --- a/src/pcre2_dfa_match.c
 | ||||
| +++ b/src/pcre2_dfa_match.c
 | ||||
| @@ -3363,8 +3363,6 @@ for (;;)
 | ||||
|    if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && | ||||
|        (options & PCRE2_DFA_RESTART) == 0) | ||||
|      { | ||||
| -    PCRE2_SPTR save_end_subject = end_subject;
 | ||||
| -
 | ||||
|      /* If firstline is TRUE, the start of the match is constrained to the first | ||||
|      line of a multiline string. That is, the match must be before or at the | ||||
|      first newline following the start of matching. Temporarily adjust | ||||
| @@ -3388,13 +3386,6 @@ for (;;)
 | ||||
|        else | ||||
|  #endif | ||||
|        while (t < end_subject && !IS_NEWLINE(t)) t++; | ||||
| -
 | ||||
| -      /* Note that we only need to advance by one code unit if we found a
 | ||||
| -      newline. If the newline is CRLF, a first code unit of LF should not
 | ||||
| -      match, because it is not at or before the newline. Similarly, only the
 | ||||
| -      first code unit of a Unicode newline might be relevant. */
 | ||||
| -
 | ||||
| -      if (t < end_subject) t++;
 | ||||
|        end_subject = t; | ||||
|        } | ||||
|   | ||||
| @@ -3466,14 +3457,18 @@ for (;;)
 | ||||
|  #endif | ||||
|            } | ||||
|   | ||||
| -        /* If we can't find the required code unit, break the bumpalong loop,
 | ||||
| -        to force a match failure, except when doing partial matching, when we
 | ||||
| -        let the next cycle run at the end of the subject. To see why, consider
 | ||||
| -        the pattern /(?<=abc)def/, which partially matches "abc", even though
 | ||||
| -        the string does not contain the starting character "d". */
 | ||||
| +        /* If we can't find the required code unit, having reached the true end
 | ||||
| +        of the subject, break the bumpalong loop, to force a match failure,
 | ||||
| +        except when doing partial matching, when we let the next cycle run at
 | ||||
| +        the end of the subject. To see why, consider the pattern /(?<=abc)def/,
 | ||||
| +        which partially matches "abc", even though the string does not contain
 | ||||
| +        the starting character "d". If we have not reached the true end of the
 | ||||
| +        subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
 | ||||
| +        we also let the cycle run, because the matching string is legitimately
 | ||||
| +        allowed to start with the first code unit of a newline. */
 | ||||
|   | ||||
|          if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 && | ||||
| -            start_match >= end_subject)
 | ||||
| +            start_match >= mb->end_subject)
 | ||||
|            break; | ||||
|          } | ||||
|   | ||||
| @@ -3532,7 +3527,7 @@ for (;;)
 | ||||
|   | ||||
|      /* Restore fudged end_subject */ | ||||
|   | ||||
| -    end_subject = save_end_subject;
 | ||||
| +    end_subject = mb->end_subject;
 | ||||
|   | ||||
|      /* The following two optimizations are disabled for partial matching. */ | ||||
|   | ||||
| diff --git a/src/pcre2_match.c b/src/pcre2_match.c
 | ||||
| index 8872345..c6b6975 100644
 | ||||
| --- a/src/pcre2_match.c
 | ||||
| +++ b/src/pcre2_match.c
 | ||||
| @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 | ||||
|   | ||||
|                         Written by Philip Hazel | ||||
|       Original API code Copyright (c) 1997-2012 University of Cambridge | ||||
| -          New API code Copyright (c) 2015-2017 University of Cambridge
 | ||||
| +          New API code Copyright (c) 2015-2018 University of Cambridge
 | ||||
|   | ||||
|  ----------------------------------------------------------------------------- | ||||
|  Redistribution and use in source and binary forms, with or without | ||||
| @@ -6363,15 +6363,11 @@ for(;;)
 | ||||
|   | ||||
|    if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) | ||||
|      { | ||||
| -    PCRE2_SPTR save_end_subject = end_subject;
 | ||||
| -
 | ||||
|      /* If firstline is TRUE, the start of the match is constrained to the first | ||||
|      line of a multiline string. That is, the match must be before or at the | ||||
|      first newline following the start of matching. Temporarily adjust | ||||
| -    end_subject so that we stop the optimization scans for a first code unit
 | ||||
| -    immediately after the first character of a newline (the first code unit can
 | ||||
| -    legitimately be a newline). If the match fails at the newline, later code
 | ||||
| -    breaks this loop. */
 | ||||
| +    end_subject so that we stop the scans for a first code unit at a newline.
 | ||||
| +    If the match fails at the newline, later code breaks the loop. */
 | ||||
|   | ||||
|      if (firstline) | ||||
|        { | ||||
| @@ -6388,13 +6384,6 @@ for(;;)
 | ||||
|        else | ||||
|  #endif | ||||
|        while (t < end_subject && !IS_NEWLINE(t)) t++; | ||||
| -
 | ||||
| -      /* Note that we only need to advance by one code unit if we found a
 | ||||
| -      newline. If the newline is CRLF, a first code unit of LF should not
 | ||||
| -      match, because it is not at or before the newline. Similarly, only the
 | ||||
| -      first code unit of a Unicode newline might be relevant. */
 | ||||
| -
 | ||||
| -      if (t < end_subject) t++;
 | ||||
|        end_subject = t; | ||||
|        } | ||||
|   | ||||
| @@ -6470,13 +6459,17 @@ for(;;)
 | ||||
|  #endif | ||||
|            } | ||||
|   | ||||
| -        /* If we can't find the required code unit, break the bumpalong loop,
 | ||||
| -        to force a match failure, except when doing partial matching, when we
 | ||||
| -        let the next cycle run at the end of the subject. To see why, consider
 | ||||
| -        the pattern /(?<=abc)def/, which partially matches "abc", even though
 | ||||
| -        the string does not contain the starting character "d". */
 | ||||
| -
 | ||||
| -        if (!mb->partial && start_match >= end_subject)
 | ||||
| +        /* If we can't find the required code unit, having reached the true end
 | ||||
| +        of the subject, break the bumpalong loop, to force a match failure,
 | ||||
| +        except when doing partial matching, when we let the next cycle run at
 | ||||
| +        the end of the subject. To see why, consider the pattern /(?<=abc)def/,
 | ||||
| +        which partially matches "abc", even though the string does not contain
 | ||||
| +        the starting character "d". If we have not reached the true end of the
 | ||||
| +        subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
 | ||||
| +        we also let the cycle run, because the matching string is legitimately
 | ||||
| +        allowed to start with the first code unit of a newline. */
 | ||||
| +
 | ||||
| +        if (!mb->partial && start_match >= mb->end_subject)
 | ||||
|            { | ||||
|            rc = MATCH_NOMATCH; | ||||
|            break; | ||||
| @@ -6538,7 +6531,7 @@ for(;;)
 | ||||
|   | ||||
|      /* Restore fudged end_subject */ | ||||
|   | ||||
| -    end_subject = save_end_subject;
 | ||||
| +    end_subject = mb->end_subject;
 | ||||
|   | ||||
|      /* The following two optimizations must be disabled for partial matching. */ | ||||
|   | ||||
| diff --git a/testdata/testinput2 b/testdata/testinput2
 | ||||
| index fe8efbf..36e4454 100644
 | ||||
| --- a/testdata/testinput2
 | ||||
| +++ b/testdata/testinput2
 | ||||
| @@ -5405,4 +5405,8 @@ a)"xI
 | ||||
|  \= Expect no match | ||||
|      xyz\r\nabc | ||||
|   | ||||
| +/[abc]/firstline
 | ||||
| +\= Expect no match
 | ||||
| +    \na
 | ||||
| +    
 | ||||
|  # End of testinput2 | ||||
| diff --git a/testdata/testinput6 b/testdata/testinput6
 | ||||
| index 614c3a0..e2f00c0 100644
 | ||||
| --- a/testdata/testinput6
 | ||||
| +++ b/testdata/testinput6
 | ||||
| @@ -4942,4 +4942,8 @@
 | ||||
|  \= Expect no match | ||||
|      xyz\r\nabc | ||||
|   | ||||
| +/[abc]/firstline
 | ||||
| +\= Expect no match
 | ||||
| +    \na
 | ||||
| +    
 | ||||
|  # End of testinput6 | ||||
| diff --git a/testdata/testoutput2 b/testdata/testoutput2
 | ||||
| index 62ec12f..f146c0c 100644
 | ||||
| --- a/testdata/testoutput2
 | ||||
| +++ b/testdata/testoutput2
 | ||||
| @@ -16453,6 +16453,11 @@ No match
 | ||||
|      xyz\r\nabc | ||||
|  No match | ||||
|   | ||||
| +/[abc]/firstline
 | ||||
| +\= Expect no match
 | ||||
| +    \na
 | ||||
| +No match
 | ||||
| +    
 | ||||
|  # End of testinput2 | ||||
|  Error -65: PCRE2_ERROR_BADDATA (unknown error number) | ||||
|  Error -62: bad serialized data | ||||
| diff --git a/testdata/testoutput6 b/testdata/testoutput6
 | ||||
| index 998f20b..b409fe0 100644
 | ||||
| --- a/testdata/testoutput6
 | ||||
| +++ b/testdata/testoutput6
 | ||||
| @@ -7766,4 +7766,9 @@ Failed: error -47: match limit exceeded
 | ||||
|      xyz\r\nabc | ||||
|  No match | ||||
|   | ||||
| +/[abc]/firstline
 | ||||
| +\= Expect no match
 | ||||
| +    \na
 | ||||
| +No match
 | ||||
| +    
 | ||||
|  # End of testinput6 | ||||
| -- 
 | ||||
| 2.13.6 | ||||
| 
 | ||||
							
								
								
									
										14
									
								
								pcre2.spec
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								pcre2.spec
									
									
									
									
									
								
							| @ -71,6 +71,15 @@ Patch9:     pcre2-10.30-Documentation-update.patch | ||||
| # Fix handling \K in an assertion in pcre2grep tool, upstream bug #2211, | ||||
| # in upstream after 10.30 | ||||
| Patch10:    pcre2-10.30-Fix-K-issues-in-pcre2grep.patch | ||||
| # 1/3 Fix matching at a first code unit of a new line sequence if | ||||
| # PCRE2_FIRSTLINE is enabled, in upstream after 10.30 | ||||
| Patch11:    pcre2-10.30-FIRSTLINE_documentation-update.patch | ||||
| # 2/3 Fix matching at a first code unit of a new line sequence if | ||||
| # PCRE2_FIRSTLINE is enabled, in upstream after 10.30 | ||||
| Patch12:    pcre2-10.30-Fix-PCRE2_FIRSTLINE-bug-when-a-pattern-match-starts-.patch | ||||
| # 3/3 Fix matching at a first code unit of a new line sequence if | ||||
| # PCRE2_FIRSTLINE is enabled, in upstream after 10.30 | ||||
| Patch13:    pcre2-10.30-Previous-FIRSTLINE-patch-was-broken.-Fix-it.patch | ||||
| BuildRequires:  autoconf | ||||
| BuildRequires:  automake | ||||
| BuildRequires:  coreutils | ||||
| @ -155,6 +164,9 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. | ||||
| %patch8 -p1 | ||||
| %patch9 -p1 | ||||
| %patch10 -p1 | ||||
| %patch11 -p1 | ||||
| %patch12 -p1 | ||||
| %patch13 -p1 | ||||
| # Because of multilib patch | ||||
| libtoolize --copy --force | ||||
| autoreconf -vif | ||||
| @ -261,6 +273,8 @@ make %{?_smp_mflags} check VERBOSE=yes | ||||
| * Fri Jan 12 2018 Petr Pisar <ppisar@redhat.com> - 10.30-5 | ||||
| - Fix handling \K in an assertion in pcre2grep tool and documentation | ||||
|   (upstream bug #2211) | ||||
| - Fix matching at a first code unit of a new line sequence if PCRE2_FIRSTLINE | ||||
|   is enabled | ||||
| 
 | ||||
| * Fri Dec 22 2017 Petr Pisar <ppisar@redhat.com> - 10.30-4 | ||||
| - Fix pcre2_jit_match() to properly check the pattern was JIT-compiled | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user