diff --git a/pcre2-10.33-Fix-pcre2grep-o-bug-when-ovector-overflows-add-optio.patch b/pcre2-10.33-Fix-pcre2grep-o-bug-when-ovector-overflows-add-optio.patch new file mode 100644 index 0000000..d1b0d7b --- /dev/null +++ b/pcre2-10.33-Fix-pcre2grep-o-bug-when-ovector-overflows-add-optio.patch @@ -0,0 +1,382 @@ +From e29388de53ea3a4f9d1c6b4932613681493ac9dc Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Sat, 15 Jun 2019 15:51:07 +0000 +Subject: [PATCH] Fix pcre2grep -o bug when ovector overflows; add option to + adjust the limit; raise the default limit; give error if -o requests an + uncaptured parens. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1106 6239d852-aaf2-0410-a92c-79f79f948069 +Petr Písař: Ported to 10.33. + +Signed-off-by: Petr Písař +--- + RunGrepTest | 7 ++++++ + doc/html/pcre2api.html | 12 +++++----- + doc/html/pcre2grep.html | 28 +++++++++++++++------- + doc/html/pcre2test.html | 4 +++- + doc/pcre2grep.1 | 26 +++++++++++++------- + doc/pcre2grep.txt | 43 ++++++++++++++++++++------------- + doc/pcre2test.txt | 4 +++- + src/pcre2grep.c | 53 ++++++++++++++++++++++++++++------------- + testdata/grepoutput | 7 ++++++ + 9 files changed, 126 insertions(+), 58 deletions(-) + +diff --git a/RunGrepTest b/RunGrepTest +index bac1f1b..ea37f70 100755 +--- a/RunGrepTest ++++ b/RunGrepTest +@@ -653,6 +653,13 @@ printf 'ABC\0XYZ\nABCDEF\nDEFABC\n' >testtemp2grep + $valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep + echo "RC=$?" >>testtrygrep + ++echo "---------------------------- Test 127 -----------------------------" >>testtrygrep ++(cd $srcdir; $valgrind $vjs $pcre2grep -o --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep ++echo "RC=$?" >>testtrygrep ++ ++echo "---------------------------- Test 128 -----------------------------" >>testtrygrep ++(cd $srcdir; $valgrind $vjs $pcre2grep -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1 ++echo "RC=$?" >>testtrygrep + + # Now compare the results. + +diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html +index 7ca39f5..84f4442 100644 +--- a/doc/html/pcre2api.html ++++ b/doc/html/pcre2api.html +@@ -2252,12 +2252,12 @@ segment. + PCRE2_INFO_MINLENGTH + + If a minimum length for matching subject strings was computed, its value is +-returned. Otherwise the returned value is 0. The value is a number of +-characters, which in UTF mode may be different from the number of code units. +-The third argument should point to an uint32_t variable. The value is a +-lower bound to the length of any matching string. There may not be any strings +-of that length that do actually match, but every string that does match is at +-least that long. ++returned. Otherwise the returned value is 0. This value is not computed when ++PCRE2_NO_START_OPTIMIZE is set. The value is a number of characters, which in ++UTF mode may be different from the number of code units. The third argument ++should point to an uint32_t variable. The value is a lower bound to the ++length of any matching string. There may not be any strings of that length that ++do actually match, but every string that does match is at least that long. +
+   PCRE2_INFO_NAMECOUNT
+   PCRE2_INFO_NAMEENTRYSIZE
+diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html
+index d66cee3..de699e7 100644
+--- a/doc/html/pcre2grep.html
++++ b/doc/html/pcre2grep.html
+@@ -685,20 +685,32 @@ otherwise empty line. This option is mutually exclusive with --output,
+ 

+ -onumber, --only-matching=number + Show only the part of the line that matched the capturing parentheses of the +-given number. Up to 32 capturing parentheses are supported, and -o0 is +-equivalent to -o without a number. Because these options can be given +-without an argument (see above), if an argument is present, it must be given in +-the same shell item, for example, -o3 or --only-matching=2. The comments given +-for the non-argument case above also apply to this option. If the specified +-capturing parentheses do not exist in the pattern, or were not set in the +-match, nothing is output unless the file name or line number are being output. ++given number. Up to 50 capturing parentheses are supported by default. This ++limit can be changed via the --om-capture option. A pattern may contain ++any number of capturing parentheses, but only those whose number is within the ++limit can be accessed by -o. An error occurs if the number specified by ++-o is greater than the limit. ++
++
++-o0 is the same as -o without a number. Because these options can be ++given without an argument (see above), if an argument is present, it must be ++given in the same shell item, for example, -o3 or --only-matching=2. The ++comments given for the non-argument case above also apply to this option. If ++the specified capturing parentheses do not exist in the pattern, or were not ++set in the match, nothing is output unless the file name or line number are ++being output. +
+
+ If this option is given multiple times, multiple substrings are output for each + match, in the order the options are given, and all on one line. For example, + -o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and + then 3 again to be output. By default, there is no separator (but see the next +-option). ++but one option). ++

++

++--om-capture=number ++Set the number of capturing parentheses that can be accessed by -o. The ++default is 50. +

+

+ --om-separator=text +diff --git a/doc/html/pcre2test.html b/doc/html/pcre2test.html +index 083d5cc..4be47c6 100644 +--- a/doc/html/pcre2test.html ++++ b/doc/html/pcre2test.html +@@ -738,7 +738,9 @@ options, the line is omitted. "First code unit" is where any match must start; + if there is more than one they are listed as "starting code units". "Last code + unit" is the last literal code unit that must be present in any match. This is + not necessarily the last character. These lines are omitted if no starting or +-ending code units are recorded. ++ending code units are recorded. The subject length line is omitted when ++no_start_optimize is set because the minimum length is not calculated ++when it can never be used. +

+

+ The framesize modifier shows the size, in bytes, of the storage frames +diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1 +index 6b3219b..1dcdb68 100644 +--- a/doc/pcre2grep.1 ++++ b/doc/pcre2grep.1 +@@ -596,19 +596,29 @@ otherwise empty line. This option is mutually exclusive with \fB--output\fP, + .TP + \fB-o\fP\fInumber\fP, \fB--only-matching\fP=\fInumber\fP + Show only the part of the line that matched the capturing parentheses of the +-given number. Up to 32 capturing parentheses are supported, and -o0 is +-equivalent to \fB-o\fP without a number. Because these options can be given +-without an argument (see above), if an argument is present, it must be given in +-the same shell item, for example, -o3 or --only-matching=2. The comments given +-for the non-argument case above also apply to this option. If the specified +-capturing parentheses do not exist in the pattern, or were not set in the +-match, nothing is output unless the file name or line number are being output. ++given number. Up to 50 capturing parentheses are supported by default. This ++limit can be changed via the \fB--om-capture\fP option. A pattern may contain ++any number of capturing parentheses, but only those whose number is within the ++limit can be accessed by \fB-o\fP. An error occurs if the number specified by ++\fB-o\fP is greater than the limit. ++.sp ++-o0 is the same as \fB-o\fP without a number. Because these options can be ++given without an argument (see above), if an argument is present, it must be ++given in the same shell item, for example, -o3 or --only-matching=2. The ++comments given for the non-argument case above also apply to this option. If ++the specified capturing parentheses do not exist in the pattern, or were not ++set in the match, nothing is output unless the file name or line number are ++being output. + .sp + If this option is given multiple times, multiple substrings are output for each + match, in the order the options are given, and all on one line. For example, + -o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and + then 3 again to be output. By default, there is no separator (but see the next +-option). ++but one option). ++.TP ++\fB--om-capture\fP=\fInumber\fP ++Set the number of capturing parentheses that can be accessed by \fB-o\fP. The ++default is 50. + .TP + \fB--om-separator\fP=\fItext\fP + Specify a separating string for multiple occurrences of \fB-o\fP. The default +diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt +index cd44fe0..2920643 100644 +--- a/doc/pcre2grep.txt ++++ b/doc/pcre2grep.txt +@@ -662,23 +662,32 @@ OPTIONS + + -onumber, --only-matching=number + Show only the part of the line that matched the capturing +- parentheses of the given number. Up to 32 capturing parenthe- +- ses are supported, and -o0 is equivalent to -o without a num- +- ber. Because these options can be given without an argument +- (see above), if an argument is present, it must be given in +- the same shell item, for example, -o3 or --only-matching=2. +- The comments given for the non-argument case above also apply +- to this option. If the specified capturing parentheses do not +- exist in the pattern, or were not set in the match, nothing +- is output unless the file name or line number are being out- +- put. +- +- If this option is given multiple times, multiple substrings +- are output for each match, in the order the options are +- given, and all on one line. For example, -o3 -o1 -o3 causes +- the substrings matched by capturing parentheses 3 and 1 and +- then 3 again to be output. By default, there is no separator +- (but see the next option). ++ parentheses of the given number. Up to 50 capturing parenthe- ++ ses are supported by default. This limit can be changed via ++ the --om-capture option. A pattern may contain any number of ++ capturing parentheses, but only those whose number is within ++ the limit can be accessed by -o. An error occurs if the num- ++ ber specified by -o is greater than the limit. ++ ++ -o0 is the same as -o without a number. Because these options ++ can be given without an argument (see above), if an argument ++ is present, it must be given in the same shell item, for ++ example, -o3 or --only-matching=2. The comments given for the ++ non-argument case above also apply to this option. If the ++ specified capturing parentheses do not exist in the pattern, ++ or were not set in the match, nothing is output unless the ++ file name or line number are being output. ++ ++ If this option is given multiple times, multiple substrings ++ are output for each match, in the order the options are ++ given, and all on one line. For example, -o3 -o1 -o3 causes ++ the substrings matched by capturing parentheses 3 and 1 and ++ then 3 again to be output. By default, there is no separator ++ (but see the next but one option). ++ ++ --om-capture=number ++ Set the number of capturing parentheses that can be accessed ++ by -o. The default is 50. + + --om-separator=text + Specify a separating string for multiple occurrences of -o. +diff --git a/doc/pcre2test.txt b/doc/pcre2test.txt +index cbe3528..f287f6d 100644 +--- a/doc/pcre2test.txt ++++ b/doc/pcre2test.txt +@@ -669,7 +669,9 @@ PATTERN MODIFIERS + as "starting code units". "Last code unit" is the last literal code + unit that must be present in any match. This is not necessarily the + last character. These lines are omitted if no starting or ending code +- units are recorded. ++ units are recorded. The subject length line is omitted when ++ no_start_optimize is set because the minimum length is not calculated ++ when it can never be used. + + The framesize modifier shows the size, in bytes, of the storage frames + used by pcre2_match() for handling backtracking. The size depends on +diff --git a/src/pcre2grep.c b/src/pcre2grep.c +index a3cc3ec..d17cd2a 100644 +--- a/src/pcre2grep.c ++++ b/src/pcre2grep.c +@@ -115,7 +115,7 @@ MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */ + + typedef int BOOL; + +-#define OFFSET_SIZE 33 ++#define DEFAULT_CAPTURE_MAX 50 + + #if BUFSIZ > 8192 + #define MAXPATLEN BUFSIZ +@@ -242,6 +242,8 @@ static pcre2_compile_context *compile_context; + static pcre2_match_context *match_context; + static pcre2_match_data *match_data; + static PCRE2_SIZE *offsets; ++static uint32_t offset_size; ++static uint32_t capture_max = DEFAULT_CAPTURE_MAX; + + static BOOL count_only = FALSE; + static BOOL do_colour = FALSE; +@@ -391,6 +393,7 @@ used to identify them. */ + #define N_INCLUDE_FROM (-21) + #define N_OM_SEPARATOR (-22) + #define N_MAX_BUFSIZE (-23) ++#define N_OM_CAPTURE (-24) + + static option_item optionlist[] = { + { OP_NODATA, N_NULL, NULL, "", "terminate options" }, +@@ -437,6 +440,7 @@ static option_item optionlist[] = { + { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" }, + { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" }, + { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" }, ++ { OP_U32NUMBER, N_OM_CAPTURE, &capture_max, "om-capture=n", "set capture count for --only-matching" }, + { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, + { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, + { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" }, +@@ -2568,7 +2572,7 @@ while (ptr < endptr) + + for (i = 0; i < jfriedl_XR; i++) + match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0, +- PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0); ++ PCRE2_NOTEMPTY, offsets, offset_size) >= 0); + + if (gettimeofday(&end_time, &dummy) != 0) + perror("bad gettimeofday"); +@@ -2688,7 +2692,7 @@ while (ptr < endptr) + for (om = only_matching; om != NULL; om = om->next) + { + int n = om->groupnum; +- if (n < mrc) ++ if (n == 0 || n < mrc) + { + int plen = offsets[2*n + 1] - offsets[2*n]; + if (plen > 0) +@@ -3639,6 +3643,7 @@ int rc = 1; + BOOL only_one_at_top; + patstr *cp; + fnstr *fn; ++omstr *om; + const char *locale_from = "--locale"; + + #ifdef SUPPORT_PCRE2GREP_JIT +@@ -3655,20 +3660,6 @@ must use STDOUT_NL to terminate lines. */ + _setmode(_fileno(stdout), _O_BINARY); + #endif + +-/* Set up a default compile and match contexts and a match data block. */ +- +-compile_context = pcre2_compile_context_create(NULL); +-match_context = pcre2_match_context_create(NULL); +-match_data = pcre2_match_data_create(OFFSET_SIZE, NULL); +-offsets = pcre2_get_ovector_pointer(match_data); +- +-/* If string (script) callouts are supported, set up the callout processing +-function. */ +- +-#ifdef SUPPORT_PCRE2GREP_CALLOUT +-pcre2_set_callout(match_context, pcre2grep_callout, NULL); +-#endif +- + /* Process the options */ + + for (i = 1; i < argc; i++) +@@ -4015,12 +4006,40 @@ if (only_matching_count > 1) + pcre2grep_exit(usage(2)); + } + ++/* Check that there is a big enough ovector for all -o settings. */ ++ ++for (om = only_matching; om != NULL; om = om->next) ++ { ++ int n = om->groupnum; ++ if (n > (int)capture_max) ++ { ++ fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n); ++ fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n"); ++ goto EXIT2; ++ } ++ } ++ + /* Check the text supplied to --output for errors. */ + + if (output_text != NULL && + !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE)) + goto EXIT2; + ++/* Set up default compile and match contexts and a match data block. */ ++ ++offset_size = capture_max + 1; ++compile_context = pcre2_compile_context_create(NULL); ++match_context = pcre2_match_context_create(NULL); ++match_data = pcre2_match_data_create(offset_size, NULL); ++offsets = pcre2_get_ovector_pointer(match_data); ++ ++/* If string (script) callouts are supported, set up the callout processing ++function. */ ++ ++#ifdef SUPPORT_PCRE2GREP_CALLOUT ++pcre2_set_callout(match_context, pcre2grep_callout, NULL); ++#endif ++ + /* Put limits into the match data block. */ + + if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit); +diff --git a/testdata/grepoutput b/testdata/grepoutput +index 2bd69be..a9297e1 100644 +--- a/testdata/grepoutput ++++ b/testdata/grepoutput +@@ -949,3 +949,10 @@ RC=0 + ---------------------------- Test 126 ----------------------------- + ABCXYZ + RC=0 ++---------------------------- Test 127 ----------------------------- ++pattern ++RC=0 ++---------------------------- Test 128 ----------------------------- ++pcre2grep: Requested group 1 cannot be captured. ++pcre2grep: Use --om-capture to increase the size of the capture vector. ++RC=2 +-- +2.20.1 + diff --git a/pcre2.spec b/pcre2.spec index d754aec..8d65038 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -62,6 +62,9 @@ Patch4: pcre2-10.33-Forgot-this-file-in-previous-commit.-Fixes-JIT-non-U.pat # Fix a non-JIT match to return (*MARK) names from a successful conditional # assertion, in upstream after 10.33 Patch5: pcre2-10.33-Make-pcre2_match-return-MARK-names-from-successful-c.patch +# Fix pcre2grep --only-matching output when number of capturing groups exceeds +# 32, upstream bug #2407, in upstream after 10.33 +Patch6: pcre2-10.33-Fix-pcre2grep-o-bug-when-ovector-overflows-add-optio.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -143,6 +146,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %patch3 -p1 %patch4 -p1 %patch5 -p1 +%patch6 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -244,6 +248,8 @@ make %{?_smp_mflags} check VERBOSE=yes * Mon Jun 17 2019 Petr Pisar - 10.33-4 - Fix a non-JIT match to return (*MARK) names from a successful conditional assertion +- Fix pcre2grep --only-matching output when number of capturing groups exceeds + 32 (upstream bug #2407) * Mon May 13 2019 Petr Pisar - 10.33-3 - Correct a misspelling in a documentation