diff --git a/pcre2-10.30-Documentation-update.patch b/pcre2-10.30-Documentation-update.patch new file mode 100644 index 0000000..fa646cc --- /dev/null +++ b/pcre2-10.30-Documentation-update.patch @@ -0,0 +1,174 @@ +From c26d49eda45dd8a26e1de65a4430e84116266227 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Sun, 24 Dec 2017 10:27:13 +0000 +Subject: [PATCH 2/3] Documentation update. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@895 6239d852-aaf2-0410-a92c-79f79f948069 +Signed-off-by: Petr Písař +--- + doc/html/pcre2demo.html | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ + doc/pcre2demo.3 | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 106 insertions(+) + +diff --git a/doc/html/pcre2demo.html b/doc/html/pcre2demo.html +index d64e16b..72754d3 100644 +--- a/doc/html/pcre2demo.html ++++ b/doc/html/pcre2demo.html +@@ -228,6 +228,21 @@ pcre2_match_data_create_from_pattern() above. */ + if (rc == 0) + printf("ovector was not big enough for all the captured substrings\n"); + ++/* We must guard against patterns such as /(?=.\K)/ that use \K in an assertion ++to set the start of a match later than its end. In this demonstration program, ++we just detect this case and give up. */ ++ ++if (ovector[0] > ovector[1]) ++ { ++ printf("\\K was used in an assertion to set the match start after its end.\n" ++ "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]), ++ (char *)(subject + ovector[1])); ++ printf("Run abandoned\n"); ++ pcre2_match_data_free(match_data); ++ pcre2_code_free(re); ++ return 1; ++ } ++ + /* Show substrings stored in the output vector by number. Obviously, in a real + application you might want to do things other than print them. */ + +@@ -355,6 +370,29 @@ for (;;) + options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + } + ++ /* If the previous match was not an empty string, there is one tricky case to ++ consider. If a pattern contains \K within a lookbehind assertion at the ++ start, the end of the matched string can be at the offset where the match ++ started. Without special action, this leads to a loop that keeps on matching ++ the same substring. We must detect this case and arrange to move the start on ++ by one character. The pcre2_get_startchar() function returns the starting ++ offset that was passed to pcre2_match(). */ ++ ++ else ++ { ++ PCRE2_SIZE startchar = pcre2_get_startchar(match_data); ++ if (start_offset <= startchar) ++ { ++ if (startchar >= subject_length) break; /* Reached end of subject. */ ++ start_offset = startchar + 1; /* Advance by one character. */ ++ if (utf8) /* If UTF-8, it may be more */ ++ { /* than one code unit. */ ++ for (; start_offset < subject_length; start_offset++) ++ if ((subject[start_offset] & 0xc0) != 0x80) break; ++ } ++ } ++ } ++ + /* Run the next matching operation */ + + rc = pcre2_match( +@@ -419,6 +457,21 @@ for (;;) + if (rc == 0) + printf("ovector was not big enough for all the captured substrings\n"); + ++ /* We must guard against patterns such as /(?=.\K)/ that use \K in an ++ assertion to set the start of a match later than its end. In this ++ demonstration program, we just detect this case and give up. */ ++ ++ if (ovector[0] > ovector[1]) ++ { ++ printf("\\K was used in an assertion to set the match start after its end.\n" ++ "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]), ++ (char *)(subject + ovector[1])); ++ printf("Run abandoned\n"); ++ pcre2_match_data_free(match_data); ++ pcre2_code_free(re); ++ return 1; ++ } ++ + /* As before, show substrings stored in the output vector by number, and then + also any named substrings. */ + +diff --git a/doc/pcre2demo.3 b/doc/pcre2demo.3 +index c02dcd9..a9e58e2 100644 +--- a/doc/pcre2demo.3 ++++ b/doc/pcre2demo.3 +@@ -228,6 +228,21 @@ pcre2_match_data_create_from_pattern() above. */ + if (rc == 0) + printf("ovector was not big enough for all the captured substrings\en"); + ++/* We must guard against patterns such as /(?=.\eK)/ that use \eK in an assertion ++to set the start of a match later than its end. In this demonstration program, ++we just detect this case and give up. */ ++ ++if (ovector[0] > ovector[1]) ++ { ++ printf("\e\eK was used in an assertion to set the match start after its end.\en" ++ "From end to start the match was: %.*s\en", (int)(ovector[0] - ovector[1]), ++ (char *)(subject + ovector[1])); ++ printf("Run abandoned\en"); ++ pcre2_match_data_free(match_data); ++ pcre2_code_free(re); ++ return 1; ++ } ++ + /* Show substrings stored in the output vector by number. Obviously, in a real + application you might want to do things other than print them. */ + +@@ -355,6 +370,29 @@ for (;;) + options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + } + ++ /* If the previous match was not an empty string, there is one tricky case to ++ consider. If a pattern contains \eK within a lookbehind assertion at the ++ start, the end of the matched string can be at the offset where the match ++ started. Without special action, this leads to a loop that keeps on matching ++ the same substring. We must detect this case and arrange to move the start on ++ by one character. The pcre2_get_startchar() function returns the starting ++ offset that was passed to pcre2_match(). */ ++ ++ else ++ { ++ PCRE2_SIZE startchar = pcre2_get_startchar(match_data); ++ if (start_offset <= startchar) ++ { ++ if (startchar >= subject_length) break; /* Reached end of subject. */ ++ start_offset = startchar + 1; /* Advance by one character. */ ++ if (utf8) /* If UTF-8, it may be more */ ++ { /* than one code unit. */ ++ for (; start_offset < subject_length; start_offset++) ++ if ((subject[start_offset] & 0xc0) != 0x80) break; ++ } ++ } ++ } ++ + /* Run the next matching operation */ + + rc = pcre2_match( +@@ -419,6 +457,21 @@ for (;;) + if (rc == 0) + printf("ovector was not big enough for all the captured substrings\en"); + ++ /* We must guard against patterns such as /(?=.\eK)/ that use \eK in an ++ assertion to set the start of a match later than its end. In this ++ demonstration program, we just detect this case and give up. */ ++ ++ if (ovector[0] > ovector[1]) ++ { ++ printf("\e\eK was used in an assertion to set the match start after its end.\en" ++ "From end to start the match was: %.*s\en", (int)(ovector[0] - ovector[1]), ++ (char *)(subject + ovector[1])); ++ printf("Run abandoned\en"); ++ pcre2_match_data_free(match_data); ++ pcre2_code_free(re); ++ return 1; ++ } ++ + /* As before, show substrings stored in the output vector by number, and then + also any named substrings. */ + +-- +2.13.6 + diff --git a/pcre2-10.30-Fix-K-issues-in-pcre2grep.patch b/pcre2-10.30-Fix-K-issues-in-pcre2grep.patch new file mode 100644 index 0000000..3100386 --- /dev/null +++ b/pcre2-10.30-Fix-K-issues-in-pcre2grep.patch @@ -0,0 +1,149 @@ +From a109c9e35a040a7a8032c12ce7396bc949f3f735 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Tue, 26 Dec 2017 15:10:04 +0000 +Subject: [PATCH 3/3] Fix \K issues in pcre2grep. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@896 6239d852-aaf2-0410-a92c-79f79f948069 +Petr Písař: Ported to 10.30. +--- + RunGrepTest | 11 +++++++++++ + src/pcre2grep.c | 49 ++++++++++++++++++++++++++++++++++++++++++++----- + testdata/grepoutput | 9 +++++++++ + +diff --git a/RunGrepTest b/RunGrepTest +index a7496cb..a26f677 100755 +--- a/RunGrepTest ++++ b/RunGrepTest +@@ -630,6 +630,17 @@ echo "RC=$?" >>testtrygrep + (cd $srcdir; $valgrind $vjs $pcre2grep -Mn -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep + echo "RC=$?" >>testtrygrep + ++echo "---------------------------- Test 125 -----------------------------" >>testtrygrep ++printf "abcd\n" >testNinputgrep ++$valgrind $vjs $pcre2grep --colour=always '(?<=\K.)' testNinputgrep >>testtrygrep ++echo "RC=$?" >>testtrygrep ++$valgrind $vjs $pcre2grep --colour=always '(?=.\K)' testNinputgrep >>testtrygrep ++echo "RC=$?" >>testtrygrep ++$valgrind $vjs $pcre2grep --colour=always '(?<=\K[ac])' testNinputgrep >>testtrygrep ++echo "RC=$?" >>testtrygrep ++$valgrind $vjs $pcre2grep --colour=always '(?=[ac]\K)' testNinputgrep >>testtrygrep ++echo "RC=$?" >>testtrygrep ++ + + # Now compare the results. + +diff --git a/src/pcre2grep.c b/src/pcre2grep.c +index e764313..02339f5 100644 +--- a/src/pcre2grep.c ++++ b/src/pcre2grep.c +@@ -1607,7 +1607,7 @@ Returns: nothing + */ + + static void +-do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart, ++do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart, + char *endptr, const char *printname) + { + if (after_context > 0 && lastmatchnumber > 0) +@@ -2764,11 +2764,38 @@ while (ptr < endptr) + if ((multiline || do_colour) && !invert) + { + int plength; ++ PCRE2_SIZE endprevious; ++ ++ /* The use of \K may make the end offset earlier than the start. In ++ this situation, swap them round. */ ++ ++ if (offsets[0] > offsets[1]) ++ { ++ PCRE2_SIZE temp = offsets[0]; ++ offsets[0] = offsets[1]; ++ offsets[1] = temp; ++ } ++ + FWRITE_IGNORE(ptr, 1, offsets[0], stdout); + print_match(ptr + offsets[0], offsets[1] - offsets[0]); ++ + for (;;) + { +- startoffset = offsets[1]; /* Advance after previous match. */ ++ PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data); ++ ++ endprevious = offsets[1]; ++ startoffset = endprevious; /* Advance after previous match. */ ++ ++ /* If the pattern contained a lookbehind that included \K, it is ++ possible that the end of the match might be at or before the actual ++ starting offset we have just used. In this case, start one character ++ further on. */ ++ ++ if (startoffset <= oldstartoffset) ++ { ++ startoffset = oldstartoffset + 1; ++ if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++; ++ } + + /* If the current match ended past the end of the line (only possible + in multiline mode), we must move on to the line in which it did end +@@ -2782,6 +2809,7 @@ while (ptr < endptr) + filepos += (int)(linelength + endlinelength); + linenumber++; + startoffset -= (int)(linelength + endlinelength); ++ endprevious -= (int)(linelength + endlinelength); + t = end_of_line(ptr, endptr, &endlinelength); + linelength = t - ptr - endlinelength; + length = (size_t)(endptr - ptr); +@@ -2797,7 +2825,18 @@ while (ptr < endptr) + loop for any that may follow. */ + + if (!match_patterns(ptr, length, options, startoffset, &mrc)) break; +- FWRITE_IGNORE(ptr + startoffset, 1, offsets[0] - startoffset, stdout); ++ ++ /* The use of \K may make the end offset earlier than the start. In ++ this situation, swap them round. */ ++ ++ if (offsets[0] > offsets[1]) ++ { ++ PCRE2_SIZE temp = offsets[0]; ++ offsets[0] = offsets[1]; ++ offsets[1] = temp; ++ } ++ ++ FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout); + print_match(ptr + offsets[0], offsets[1] - offsets[0]); + } + +@@ -2805,8 +2844,8 @@ while (ptr < endptr) + and its line-ending characters (if they matched the pattern), so there + may be no more to print. */ + +- plength = (int)((linelength + endlinelength) - startoffset); +- if (plength > 0) FWRITE_IGNORE(ptr + startoffset, 1, plength, stdout); ++ plength = (int)((linelength + endlinelength) - endprevious); ++ if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout); + } + + /* Not colouring or multiline; no need to search for further matches. */ +diff --git a/testdata/grepoutput b/testdata/grepoutput +index 7e963fb..e49c2b2 100644 +--- a/testdata/grepoutput ++++ b/testdata/grepoutput +@@ -936,3 +936,12 @@ end + 16:start end in between start + end + RC=0 ++---------------------------- Test 125 ----------------------------- ++abcd ++RC=0 ++abcd ++RC=0 ++abcd ++RC=0 ++abcd ++RC=0 +-- +2.13.6 + diff --git a/pcre2-10.30-Update-pcre2demo-to-deal-with-various-K-inside-asser.patch b/pcre2-10.30-Update-pcre2demo-to-deal-with-various-K-inside-asser.patch new file mode 100644 index 0000000..4dcdec6 --- /dev/null +++ b/pcre2-10.30-Update-pcre2demo-to-deal-with-various-K-inside-asser.patch @@ -0,0 +1,96 @@ +From f442210323e228a407dfda75b6bb7a62e91111ee Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Sat, 23 Dec 2017 17:15:51 +0000 +Subject: [PATCH 1/3] Update pcre2demo to deal with various \K inside assertion + anomalies. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@894 6239d852-aaf2-0410-a92c-79f79f948069 + +Petr Písař: Ported to 10.30. +--- + src/pcre2demo.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ + +diff --git a/src/pcre2demo.c b/src/pcre2demo.c +index 8ae49f1..5d9b321 100644 +--- a/src/pcre2demo.c ++++ b/src/pcre2demo.c +@@ -211,6 +211,21 @@ pcre2_match_data_create_from_pattern() above. */ + if (rc == 0) + printf("ovector was not big enough for all the captured substrings\n"); + ++/* We must guard against patterns such as /(?=.\K)/ that use \K in an assertion ++to set the start of a match later than its end. In this demonstration program, ++we just detect this case and give up. */ ++ ++if (ovector[0] > ovector[1]) ++ { ++ printf("\\K was used in an assertion to set the match start after its end.\n" ++ "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]), ++ (char *)(subject + ovector[1])); ++ printf("Run abandoned\n"); ++ pcre2_match_data_free(match_data); ++ pcre2_code_free(re); ++ return 1; ++ } ++ + /* Show substrings stored in the output vector by number. Obviously, in a real + application you might want to do things other than print them. */ + +@@ -338,6 +353,29 @@ for (;;) + options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + } + ++ /* If the previous match was not an empty string, there is one tricky case to ++ consider. If a pattern contains \K within a lookbehind assertion at the ++ start, the end of the matched string can be at the offset where the match ++ started. Without special action, this leads to a loop that keeps on matching ++ the same substring. We must detect this case and arrange to move the start on ++ by one character. The pcre2_get_startchar() function returns the starting ++ offset that was passed to pcre2_match(). */ ++ ++ else ++ { ++ PCRE2_SIZE startchar = pcre2_get_startchar(match_data); ++ if (start_offset <= startchar) ++ { ++ if (startchar >= subject_length) break; /* Reached end of subject. */ ++ start_offset = startchar + 1; /* Advance by one character. */ ++ if (utf8) /* If UTF-8, it may be more */ ++ { /* than one code unit. */ ++ for (; start_offset < subject_length; start_offset++) ++ if ((subject[start_offset] & 0xc0) != 0x80) break; ++ } ++ } ++ } ++ + /* Run the next matching operation */ + + rc = pcre2_match( +@@ -402,6 +440,21 @@ for (;;) + if (rc == 0) + printf("ovector was not big enough for all the captured substrings\n"); + ++ /* We must guard against patterns such as /(?=.\K)/ that use \K in an ++ assertion to set the start of a match later than its end. In this ++ demonstration program, we just detect this case and give up. */ ++ ++ if (ovector[0] > ovector[1]) ++ { ++ printf("\\K was used in an assertion to set the match start after its end.\n" ++ "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]), ++ (char *)(subject + ovector[1])); ++ printf("Run abandoned\n"); ++ pcre2_match_data_free(match_data); ++ pcre2_code_free(re); ++ return 1; ++ } ++ + /* As before, show substrings stored in the output vector by number, and then + also any named substrings. */ + +-- +2.13.6 + diff --git a/pcre2.spec b/pcre2.spec index 426dc16..1800967 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -6,7 +6,7 @@ #%%global rcversion RC1 Name: pcre2 Version: 10.30 -Release: %{?rcversion:0.}4%{?rcversion:.%rcversion}%{?dist} +Release: %{?rcversion:0.}5%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library # the library: BSD with exceptions @@ -62,6 +62,15 @@ Patch6: pcre2-10.30-Change-pcre2grep-line-number-and-count-variables-to-.pat # Fix incorrect first matching character when a backreference with zero minimum # repeat starts a pattern, upstream bug #2209, in upstream after 10.30 Patch7: pcre2-10.30-Fix-incorrect-first-matching-character-when-a-backre.patch +# 1/2 Fix handling \K in an assertion in documentation, upstream bug #2211, +# in upstream after 10.30 +Patch8: pcre2-10.30-Update-pcre2demo-to-deal-with-various-K-inside-asser.patch +# 2/2 Fix handling \K in an assertion in documentation, upstream bug #2211, +# upstream bug #2211, in upstream after 10.30 +Patch9: pcre2-10.30-Documentation-update.patch +# Fix handling \K in an assertion in pcre2grep tool, upstream bug #2211, +# in upstream after 10.30 +Patch10: pcre2-10.30-Fix-K-issues-in-pcre2grep.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -143,6 +152,9 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %patch5 -p1 %patch6 -p1 %patch7 -p1 +%patch8 -p1 +%patch9 -p1 +%patch10 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -246,6 +258,10 @@ make %{?_smp_mflags} check VERBOSE=yes %{_mandir}/man1/pcre2test.* %changelog +* Fri Jan 12 2018 Petr Pisar - 10.30-5 +- Fix handling \K in an assertion in pcre2grep tool and documentation + (upstream bug #2211) + * Fri Dec 22 2017 Petr Pisar - 10.30-4 - Fix pcre2_jit_match() to properly check the pattern was JIT-compiled - Allow pcre2grep match counter to handle values larger than 2147483647,