diff --git a/.gitignore b/.gitignore index 4bc22f8..0a8f272 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,5 @@ /pcre2-10.34-RC2.tar.bz2.sig /pcre2-10.34.tar.bz2 /pcre2-10.34.tar.bz2.sig +/pcre2-10.35-RC1.tar.bz2 +/pcre2-10.35-RC1.tar.bz2.sig diff --git a/pcre2-10.34-Ensure-a-newline-after-the-final-line-in-a-file-is-o.patch b/pcre2-10.34-Ensure-a-newline-after-the-final-line-in-a-file-is-o.patch deleted file mode 100644 index 910f113..0000000 --- a/pcre2-10.34-Ensure-a-newline-after-the-final-line-in-a-file-is-o.patch +++ /dev/null @@ -1,598 +0,0 @@ -From b3f42a32920b20ae71988bc1d06a7148e0211925 Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Sat, 25 Jan 2020 15:50:44 +0000 -Subject: [PATCH] Ensure a newline after the final line in a file is output by - pcre2grep. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1211 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.34. ---- - RunGrepTest | 4 +- - doc/html/pcre2grep.html | 84 ++++++++++++++++++++------------- - doc/pcre2grep.1 | 83 ++++++++++++++++++++------------- - doc/pcre2grep.txt | 100 ++++++++++++++++++++++++---------------- - src/pcre2grep.c | 66 ++++++++++++++++++++++++-- - testdata/grepoutputN | 16 ++++--- - -diff --git a/RunGrepTest b/RunGrepTest -index 1113cd4..2ff4f7c 100755 ---- a/RunGrepTest -+++ b/RunGrepTest -@@ -742,11 +742,11 @@ uname=`uname` - case $uname in - Linux) - printf 'abc\0def' >testNinputgrep -- $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/' >>testtrygrep -+ $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | sed 's/\x00/ZERO/g' >>testtrygrep - echo "" >>testtrygrep - ;; - *) -- echo '1:abcZERO2:def' >>testtrygrep -+ echo '1:abcZERO2:defZERO' >>testtrygrep - ;; - esac - -diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html -index f5b72f3..abbafa1 100644 ---- a/doc/html/pcre2grep.html -+++ b/doc/html/pcre2grep.html -@@ -148,7 +148,7 @@ ignored. - By default, a file that contains a binary zero byte within the first 1024 bytes - is identified as a binary file, and is processed specially. (GNU grep - identifies binary files in this manner.) However, if the newline type is --specified as "nul", that is, the line terminator is a binary zero, the test for -+specified as NUL, that is, the line terminator is a binary zero, the test for - a binary file is not applied. See the --binary-files option for a means - of changing the way binary files are handled. -

-@@ -601,25 +601,32 @@ does not work when input is read line by line (see \fP--line-buffered\fP.) -

-

- -N newline-type, --newline=newline-type --The PCRE2 library supports five different conventions for indicating --the ends of lines. They are the single-character sequences CR (carriage return) --and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention, --which recognizes any of the preceding three types, and an "any" convention, in --which any Unicode line ending sequence is assumed to end a line. The Unicode --sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF --(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and --PS (paragraph separator, U+2029). -+Six different conventions for indicating the ends of lines in scanned files are -+supported. For example: -+

-+  pcre2grep -N CRLF 'some pattern' <file>
-+
-+The newline type may be specified in upper, lower, or mixed case. If the -+newline type is NUL, lines are separated by binary zero characters. The other -+types are the single-character sequences CR (carriage return) and LF -+(linefeed), the two-character sequence CRLF, an "anycrlf" type, which -+recognizes any of the preceding three types, and an "any" type, for which any -+Unicode line ending sequence is assumed to end a line. The Unicode sequences -+are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed, -+U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS -+(paragraph separator, U+2029). -
-
- When the PCRE2 library is built, a default line-ending sequence is specified. - This is normally the standard sequence for the operating system. Unless - otherwise specified by this option, pcre2grep uses the library's default. --The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This --makes it possible to use pcre2grep to scan files that have come from --other environments without having to modify their line endings. If the data --that is being scanned does not agree with the convention set by this option, --pcre2grep may behave in strange ways. Note that this option does not --apply to files specified by the -f, --exclude-from, or -+
-+
-+This option makes it possible to use pcre2grep to scan files that have -+come from other environments without having to modify their line endings. If -+the data that is being scanned does not agree with the convention set by this -+option, pcre2grep may behave in strange ways. Note that this option does -+not apply to files specified by the -f, --exclude-from, or - --include-from options, which are expected to use the operating system's - standard newline sequence. -

-@@ -640,12 +647,14 @@ use of JIT at run time. It is provided for testing and working round problems. - It should never be needed in normal use. -

-

---O text, --output=text -+-O text, --output=text - When there is a match, instead of outputting the whole line that matched, --output just the given text. This option is mutually exclusive with ----only-matching, --file-offsets, and --line-offsets. Escape --sequences starting with a dollar character may be used to insert the contents --of the matched part of the line and/or captured substrings into the text. -+output just the given text, followed by an operating-system standard newline. -+The --newline option has no effect on this option, which is mutually -+exclusive with --only-matching, --file-offsets, and -+--line-offsets. Escape sequences starting with a dollar character may be -+used to insert the contents of the matched part of the line and/or captured -+substrings into the text. -
-
- $<digits> or ${<digits>} is replaced by the captured -@@ -807,16 +816,27 @@ by the --locale option. If no locale is set, the PCRE2 library's default -
NEWLINES
-

- The -N (--newline) option allows pcre2grep to scan files with --different newline conventions from the default. Any parts of the input files --that are written to the standard output are copied identically, with whatever --newline sequences they have in the input. However, the setting of this option --affects only the way scanned files are processed. It does not affect the --interpretation of files specified by the -f, --file-list, ----exclude-from, or --include-from options, nor does it affect the --way in which pcre2grep writes informational messages to the standard --error and output streams. For these it uses the string "\n" to indicate --newlines, relying on the C I/O library to convert this to an appropriate --sequence. -+newline conventions that differ from the default. This option affects only the -+way scanned files are processed. It does not affect the interpretation of files -+specified by the -f, --file-list, --exclude-from, or -+--include-from options. -+

-+

-+Any parts of the scanned input files that are written to the standard output -+are copied with whatever newline sequences they have in the input. However, if -+the final line of a file is output, and it does not end with a newline -+sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF -+or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a -+single NL is used. -+

-+

-+The newline setting does not affect the way in which pcre2grep writes -+newlines in informational messages to the standard output and error streams. -+Under Windows, the standard output is set to be binary, so that "\r\n" at the -+ends of output lines that are copied from the input is not converted to -+"\r\r\n" by the C I/O library. This means that any messages written to the -+standard output must end with "\r\n". For all other operating systems, and -+for all messages to the standard error stream, "\n" is used. -

-
OPTIONS COMPATIBILITY
-

-@@ -992,9 +1012,9 @@ Cambridge, England. -

-
REVISION
-

--Last updated: 15 June 2019 -+Last updated: 25 January 2020 -
--Copyright © 1997-2019 University of Cambridge. -+Copyright © 1997-2020 University of Cambridge. -
-

- Return to the PCRE2 index page. -diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1 -index 22992b1..82f0435 100644 ---- a/doc/pcre2grep.1 -+++ b/doc/pcre2grep.1 -@@ -1,4 +1,4 @@ --.TH PCRE2GREP 1 "15 June 2019" "PCRE2 10.34" -+.TH PCRE2GREP 1 "25 January 2020" "PCRE2 10.35" - .SH NAME - pcre2grep - a grep with Perl-compatible regular expressions. - .SH SYNOPSIS -@@ -117,7 +117,7 @@ ignored. - By default, a file that contains a binary zero byte within the first 1024 bytes - is identified as a binary file, and is processed specially. (GNU grep - identifies binary files in this manner.) However, if the newline type is --specified as "nul", that is, the line terminator is a binary zero, the test for -+specified as NUL, that is, the line terminator is a binary zero, the test for - a binary file is not applied. See the \fB--binary-files\fP option for a means - of changing the way binary files are handled. - . -@@ -523,24 +523,30 @@ large processing buffer, this should not be a problem, but the \fB-M\fP option - does not work when input is read line by line (see \fP--line-buffered\fP.) - .TP - \fB-N\fP \fInewline-type\fP, \fB--newline\fP=\fInewline-type\fP --The PCRE2 library supports five different conventions for indicating --the ends of lines. They are the single-character sequences CR (carriage return) --and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention, --which recognizes any of the preceding three types, and an "any" convention, in --which any Unicode line ending sequence is assumed to end a line. The Unicode --sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF --(form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and --PS (paragraph separator, U+2029). -+Six different conventions for indicating the ends of lines in scanned files are -+supported. For example: -+.sp -+ pcre2grep -N CRLF 'some pattern' -+.sp -+The newline type may be specified in upper, lower, or mixed case. If the -+newline type is NUL, lines are separated by binary zero characters. The other -+types are the single-character sequences CR (carriage return) and LF -+(linefeed), the two-character sequence CRLF, an "anycrlf" type, which -+recognizes any of the preceding three types, and an "any" type, for which any -+Unicode line ending sequence is assumed to end a line. The Unicode sequences -+are the three just mentioned, plus VT (vertical tab, U+000B), FF (form feed, -+U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS -+(paragraph separator, U+2029). - .sp - When the PCRE2 library is built, a default line-ending sequence is specified. - This is normally the standard sequence for the operating system. Unless - otherwise specified by this option, \fBpcre2grep\fP uses the library's default. --The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This --makes it possible to use \fBpcre2grep\fP to scan files that have come from --other environments without having to modify their line endings. If the data --that is being scanned does not agree with the convention set by this option, --\fBpcre2grep\fP may behave in strange ways. Note that this option does not --apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or -+.sp -+This option makes it possible to use \fBpcre2grep\fP to scan files that have -+come from other environments without having to modify their line endings. If -+the data that is being scanned does not agree with the convention set by this -+option, \fBpcre2grep\fP may behave in strange ways. Note that this option does -+not apply to files specified by the \fB-f\fP, \fB--exclude-from\fP, or - \fB--include-from\fP options, which are expected to use the operating system's - standard newline sequence. - .TP -@@ -558,12 +564,14 @@ was explicitly disabled at build time. This option can be used to disable the - use of JIT at run time. It is provided for testing and working round problems. - It should never be needed in normal use. - .TP --\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP -+\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP - When there is a match, instead of outputting the whole line that matched, --output just the given text. This option is mutually exclusive with --\fB--only-matching\fP, \fB--file-offsets\fP, and \fB--line-offsets\fP. Escape --sequences starting with a dollar character may be used to insert the contents --of the matched part of the line and/or captured substrings into the text. -+output just the given text, followed by an operating-system standard newline. -+The \fB--newline\fP option has no effect on this option, which is mutually -+exclusive with \fB--only-matching\fP, \fB--file-offsets\fP, and -+\fB--line-offsets\fP. Escape sequences starting with a dollar character may be -+used to insert the contents of the matched part of the line and/or captured -+substrings into the text. - .sp - $ or ${} is replaced by the captured - substring of the given decimal number; zero substitutes the whole match. If -@@ -709,16 +717,25 @@ by the \fB--locale\fP option. If no locale is set, the PCRE2 library's default - .rs - .sp - The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with --different newline conventions from the default. Any parts of the input files --that are written to the standard output are copied identically, with whatever --newline sequences they have in the input. However, the setting of this option --affects only the way scanned files are processed. It does not affect the --interpretation of files specified by the \fB-f\fP, \fB--file-list\fP, --\fB--exclude-from\fP, or \fB--include-from\fP options, nor does it affect the --way in which \fBpcre2grep\fP writes informational messages to the standard --error and output streams. For these it uses the string "\en" to indicate --newlines, relying on the C I/O library to convert this to an appropriate --sequence. -+newline conventions that differ from the default. This option affects only the -+way scanned files are processed. It does not affect the interpretation of files -+specified by the \fB-f\fP, \fB--file-list\fP, \fB--exclude-from\fP, or -+\fB--include-from\fP options. -+.P -+Any parts of the scanned input files that are written to the standard output -+are copied with whatever newline sequences they have in the input. However, if -+the final line of a file is output, and it does not end with a newline -+sequence, a newline sequence is added. If the newline setting is CR, LF, CRLF -+or NUL, that line ending is output; for the other settings (ANYCRLF or ANY) a -+single NL is used. -+.P -+The newline setting does not affect the way in which \fBpcre2grep\fP writes -+newlines in informational messages to the standard output and error streams. -+Under Windows, the standard output is set to be binary, so that "\er\en" at the -+ends of output lines that are copied from the input is not converted to -+"\er\er\en" by the C I/O library. This means that any messages written to the -+standard output must end with "\er\en". For all other operating systems, and -+for all messages to the standard error stream, "\en" is used. - . - . - .SH "OPTIONS COMPATIBILITY" -@@ -904,6 +921,6 @@ Cambridge, England. - .rs - .sp - .nf --Last updated: 15 June 2019 --Copyright (c) 1997-2019 University of Cambridge. -+Last updated: 25 January 2020 -+Copyright (c) 1997-2020 University of Cambridge. - .fi -diff --git a/doc/pcre2grep.txt b/doc/pcre2grep.txt -index b11092a..4d41f54 100644 ---- a/doc/pcre2grep.txt -+++ b/doc/pcre2grep.txt -@@ -116,9 +116,9 @@ BINARY FILES - By default, a file that contains a binary zero byte within the first - 1024 bytes is identified as a binary file, and is processed specially. - (GNU grep identifies binary files in this manner.) However, if the new- -- line type is specified as "nul", that is, the line terminator is a bi- -- nary zero, the test for a binary file is not applied. See the --binary- -- files option for a means of changing the way binary files are handled. -+ line type is specified as NUL, that is, the line terminator is a binary -+ zero, the test for a binary file is not applied. See the --binary-files -+ option for a means of changing the way binary files are handled. - - - BINARY ZEROS IN PATTERNS -@@ -578,30 +578,36 @@ OPTIONS - when input is read line by line (see --line-buffered.) - - -N newline-type, --newline=newline-type -- The PCRE2 library supports five different conventions for in- -- dicating the ends of lines. They are the single-character se- -- quences CR (carriage return) and LF (linefeed), the two-char- -- acter sequence CRLF, an "anycrlf" convention, which recog- -- nizes any of the preceding three types, and an "any" conven- -- tion, in which any Unicode line ending sequence is assumed to -- end a line. The Unicode sequences are the three just men- -- tioned, plus VT (vertical tab, U+000B), FF (form feed, -- U+000C), NEL (next line, U+0085), LS (line separator, -- U+2028), and PS (paragraph separator, U+2029). -+ Six different conventions for indicating the ends of lines in -+ scanned files are supported. For example: -+ -+ pcre2grep -N CRLF 'some pattern' -+ -+ The newline type may be specified in upper, lower, or mixed -+ case. If the newline type is NUL, lines are separated by bi- -+ nary zero characters. The other types are the single-charac- -+ ter sequences CR (carriage return) and LF (linefeed), the -+ two-character sequence CRLF, an "anycrlf" type, which recog- -+ nizes any of the preceding three types, and an "any" type, -+ for which any Unicode line ending sequence is assumed to end -+ a line. The Unicode sequences are the three just mentioned, -+ plus VT (vertical tab, U+000B), FF (form feed, U+000C), NEL -+ (next line, U+0085), LS (line separator, U+2028), and PS -+ (paragraph separator, U+2029). - - When the PCRE2 library is built, a default line-ending se- - quence is specified. This is normally the standard sequence - for the operating system. Unless otherwise specified by this -- option, pcre2grep uses the library's default. The possible -- values for this option are CR, LF, CRLF, ANYCRLF, or ANY. -- This makes it possible to use pcre2grep to scan files that -- have come from other environments without having to modify -- their line endings. If the data that is being scanned does -- not agree with the convention set by this option, pcre2grep -- may behave in strange ways. Note that this option does not -- apply to files specified by the -f, --exclude-from, or --in- -- clude-from options, which are expected to use the operating -- system's standard newline sequence. -+ option, pcre2grep uses the library's default. -+ -+ This option makes it possible to use pcre2grep to scan files -+ that have come from other environments without having to mod- -+ ify their line endings. If the data that is being scanned -+ does not agree with the convention set by this option, -+ pcre2grep may behave in strange ways. Note that this option -+ does not apply to files specified by the -f, --exclude-from, -+ or --include-from options, which are expected to use the op- -+ erating system's standard newline sequence. - - -n, --line-number - Precede each output line by its line number in the file, fol- -@@ -620,11 +626,13 @@ OPTIONS - - -O text, --output=text - When there is a match, instead of outputting the whole line -- that matched, output just the given text. This option is mu- -- tually exclusive with --only-matching, --file-offsets, and -- --line-offsets. Escape sequences starting with a dollar char- -- acter may be used to insert the contents of the matched part -- of the line and/or captured substrings into the text. -+ that matched, output just the given text, followed by an op- -+ erating-system standard newline. The --newline option has no -+ effect on this option, which is mutually exclusive with -+ --only-matching, --file-offsets, and --line-offsets. Escape -+ sequences starting with a dollar character may be used to in- -+ sert the contents of the matched part of the line and/or cap- -+ tured substrings into the text. - - $ or ${} is replaced by the captured sub- - string of the given decimal number; zero substitutes the -@@ -780,17 +788,27 @@ ENVIRONMENT VARIABLES - - NEWLINES - -- The -N (--newline) option allows pcre2grep to scan files with different -- newline conventions from the default. Any parts of the input files that -- are written to the standard output are copied identically, with what- -- ever newline sequences they have in the input. However, the setting of -- this option affects only the way scanned files are processed. It does -- not affect the interpretation of files specified by the -f, --file- -- list, --exclude-from, or --include-from options, nor does it affect the -- way in which pcre2grep writes informational messages to the standard -- error and output streams. For these it uses the string "\n" to indicate -- newlines, relying on the C I/O library to convert this to an appropri- -- ate sequence. -+ The -N (--newline) option allows pcre2grep to scan files with newline -+ conventions that differ from the default. This option affects only the -+ way scanned files are processed. It does not affect the interpretation -+ of files specified by the -f, --file-list, --exclude-from, or --in- -+ clude-from options. -+ -+ Any parts of the scanned input files that are written to the standard -+ output are copied with whatever newline sequences they have in the in- -+ put. However, if the final line of a file is output, and it does not -+ end with a newline sequence, a newline sequence is added. If the new- -+ line setting is CR, LF, CRLF or NUL, that line ending is output; for -+ the other settings (ANYCRLF or ANY) a single NL is used. -+ -+ The newline setting does not affect the way in which pcre2grep writes -+ newlines in informational messages to the standard output and error -+ streams. Under Windows, the standard output is set to be binary, so -+ that "\r\n" at the ends of output lines that are copied from the input -+ is not converted to "\r\r\n" by the C I/O library. This means that any -+ messages written to the standard output must end with "\r\n". For all -+ other operating systems, and for all messages to the standard error -+ stream, "\n" is used. - - - OPTIONS COMPATIBILITY -@@ -963,5 +981,5 @@ AUTHOR - - REVISION - -- Last updated: 15 June 2019 -- Copyright (c) 1997-2019 University of Cambridge. -+ Last updated: 25 January 2020 -+ Copyright (c) 1997-2020 University of Cambridge. -diff --git a/src/pcre2grep.c b/src/pcre2grep.c -index 12fe95e..10314a5 100644 ---- a/src/pcre2grep.c -+++ b/src/pcre2grep.c -@@ -13,7 +13,7 @@ distribution because other apparatus is needed to compile pcre2grep for z/OS. - The header can be found in the special z/OS distribution, which is available - from www.zaconsultants.net or from www.cbttape.org. - -- Copyright (c) 1997-2019 University of Cambridge -+ Copyright (c) 1997-2020 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -1665,6 +1665,44 @@ switch(endlinetype) - - - -+/************************************************* -+* Output newline at end * -+*************************************************/ -+ -+/* This function is called if the final line of a file has been written to -+stdout, but it does not have a terminating newline. -+ -+Arguments: none -+Returns: nothing -+*/ -+ -+static void -+write_final_newline(void) -+{ -+switch(endlinetype) -+ { -+ default: /* Just in case */ -+ case PCRE2_NEWLINE_LF: -+ case PCRE2_NEWLINE_ANY: -+ case PCRE2_NEWLINE_ANYCRLF: -+ fprintf(stdout, "\n"); -+ break; -+ -+ case PCRE2_NEWLINE_CR: -+ fprintf(stdout, "\r"); -+ break; -+ -+ case PCRE2_NEWLINE_CRLF: -+ fprintf(stdout, "\r\n"); -+ break; -+ -+ case PCRE2_NEWLINE_NUL: -+ fprintf(stdout, "%c", 0); -+ break; -+ } -+} -+ -+ - /************************************************* - * Print the previous "after" lines * - *************************************************/ -@@ -1689,9 +1727,9 @@ do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart, - if (after_context > 0 && lastmatchnumber > 0) - { - int count = 0; -+ int ellength = 0; - while (lastmatchrestart < endptr && count < after_context) - { -- int ellength; - char *pp = end_of_line(lastmatchrestart, endptr, &ellength); - if (ellength == 0 && pp == main_buffer + bufsize) break; - if (printname != NULL) fprintf(stdout, "%s-", printname); -@@ -1700,7 +1738,17 @@ if (after_context > 0 && lastmatchnumber > 0) - lastmatchrestart = pp; - count++; - } -- if (count > 0) hyphenpending = TRUE; -+ -+ /* If we have printed any lines, arrange for a hyphen separator if anything -+ else follows. Also, if the last line is the final line in the file and it had -+ no newline, add one. */ -+ -+ if (count > 0) -+ { -+ hyphenpending = TRUE; -+ if (ellength == 0 && lastmatchrestart >= endptr) -+ write_final_newline(); -+ } - } - } - -@@ -2437,6 +2485,7 @@ char *endptr; - PCRE2_SIZE bufflength; - BOOL binary = FALSE; - BOOL endhyphenpending = FALSE; -+BOOL lines_printed = FALSE; - BOOL input_line_buffered = line_buffered; - FILE *in = NULL; /* Ensure initialized */ - -@@ -2777,6 +2826,8 @@ while (ptr < endptr) - - else - { -+ lines_printed = TRUE; -+ - /* See if there is a requirement to print some "after" lines from a - previous match. We never print any overlaps. */ - -@@ -2825,7 +2876,8 @@ while (ptr < endptr) - int linecount = 0; - char *p = ptr; - -- while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) && -+ while (p > main_buffer && -+ (lastmatchnumber == 0 || p > lastmatchrestart) && - linecount < before_context) - { - linecount++; -@@ -2981,6 +3033,12 @@ while (ptr < endptr) - - lastmatchrestart = ptr + linelength + endlinelength; - lastmatchnumber = linenumber + 1; -+ -+ /* If a line was printed and we are now at the end of the file and the last -+ line had no newline, output one. */ -+ -+ if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0) -+ write_final_newline(); - } - - /* For a match in multiline inverted mode (which of course did not cause -diff --git a/testdata/grepoutputN b/testdata/grepoutputN -index ba97e90..caaeb75 100644 ---- a/testdata/grepoutputN -+++ b/testdata/grepoutputN -@@ -2,16 +2,20 @@ - 1:abc 2:def ---------------------------- Test N2 ------------------------------ - 1:abc def - 2:ghi --jkl---------------------------- Test N3 ------------------------------ -+jkl -+---------------------------- Test N3 ------------------------------ - 2:def 3: - ghi --jkl---------------------------- Test N4 ------------------------------ -+jkl ---------------------------- Test N4 ------------------------------ - 2:ghi --jkl---------------------------- Test N5 ------------------------------ -+jkl -+---------------------------- Test N5 ------------------------------ - 1:abc 2:def - 3:ghi --4:jkl---------------------------- Test N6 ------------------------------ -+4:jkl -+---------------------------- Test N6 ------------------------------ - 1:abc 2:def - 3:ghi --4:jkl---------------------------- Test N7 ------------------------------ --1:abcZERO2:def -+4:jkl -+---------------------------- Test N7 ------------------------------ -+1:abcZERO2:defZERO --- -2.21.1 - diff --git a/pcre2-10.34-Fix-THEN-verbs-in-lookahead-assertions-in-JIT.patch b/pcre2-10.34-Fix-THEN-verbs-in-lookahead-assertions-in-JIT.patch deleted file mode 100644 index c578bbd..0000000 --- a/pcre2-10.34-Fix-THEN-verbs-in-lookahead-assertions-in-JIT.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 5e6a7641c60a1fcee8ae445be3511ce398c0baaa Mon Sep 17 00:00:00 2001 -From: zherczeg -Date: Sat, 11 Jan 2020 15:28:15 +0000 -Subject: [PATCH] Fix *THEN verbs in lookahead assertions in JIT. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1204 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.34. ---- - src/pcre2_jit_compile.c | 3 ++- - src/pcre2_jit_test.c | 1 + - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index 78b94c1..00d13f1 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -9597,7 +9597,8 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) - } - else - { -- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); -+ SLJIT_ASSERT(extrasize == 3); -+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1)); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0); - } - } -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index e0638ef..a188724 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -860,6 +860,7 @@ static struct regression_test_case regression_test_cases[] = { - { MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" }, - { MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" }, - { MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" }, -+ { MU, A, 0, 0, "(?=(*THEN: ))* ", " " }, - - /* Recurse and control verbs. */ - { MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" }, --- -2.21.1 - diff --git a/pcre2-10.34-Fix-a-crash-which-occurs-when-the-character-type-of-.patch b/pcre2-10.34-Fix-a-crash-which-occurs-when-the-character-type-of-.patch deleted file mode 100644 index 905900b..0000000 --- a/pcre2-10.34-Fix-a-crash-which-occurs-when-the-character-type-of-.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 5446ab8fa22b7e685c01cbfc5a673d2c7f994c93 Mon Sep 17 00:00:00 2001 -From: zherczeg -Date: Thu, 20 Feb 2020 07:42:47 +0000 -Subject: [PATCH] Fix a crash which occurs when the character type of an - invalid UTF character is decoded in JIT. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1221 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.34. ---- - src/pcre2_jit_compile.c | 6 ++++++ - src/pcre2_jit_test.c | 2 ++ - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index 10665a8..ef29a76 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -7224,7 +7224,13 @@ cc = ccbegin; - if ((cc[-1] & XCL_NOT) != 0) - read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR); - else -+ { -+#ifdef SUPPORT_UNICODE -+ read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0); -+#else /* !SUPPORT_UNICODE */ - read_char(common, min, max, NULL, 0); -+#endif /* SUPPORT_UNICODE */ -+ } - - if ((cc[-1] & XCL_HASPROP) == 0) - { -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index 187e565..619e738 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -1965,6 +1965,8 @@ static struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cas - { PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" }, - { PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" }, - -+ { PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" }, -+ - /* These two are not invalid UTF tests, but this infrastructure fits better for them. */ - { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" }, - { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" }, --- -2.21.1 - diff --git a/pcre2-10.34-Fix-bad-lookbehind-compilation-when-preceded-by-a-DE.patch b/pcre2-10.34-Fix-bad-lookbehind-compilation-when-preceded-by-a-DE.patch deleted file mode 100644 index 34b5781..0000000 --- a/pcre2-10.34-Fix-bad-lookbehind-compilation-when-preceded-by-a-DE.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 5bf35f661b7a73e892f6181f22988e5531960501 Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Mon, 24 Feb 2020 17:29:00 +0000 -Subject: [PATCH] Fix bad lookbehind compilation when preceded by a DEFINE - group. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1228 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.34. ---- - src/pcre2_compile.c | 4 ++++ - testdata/testinput1 | 3 +++ - testdata/testoutput1 | 4 ++++ - 3 files changed, 11 insertions(+) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 628503c..8083338 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -9588,6 +9588,10 @@ for (; *pptr != META_END; pptr++) - break; - - case META_COND_DEFINE: -+ pptr += SIZEOFFSET; -+ nestlevel++; -+ break; -+ - case META_COND_NAME: - case META_COND_NUMBER: - case META_COND_RNAME: -diff --git a/testdata/testinput1 b/testdata/testinput1 -index 959d4b8..6135681 100644 ---- a/testdata/testinput1 -+++ b/testdata/testinput1 -@@ -6399,4 +6399,7 @@ ef) x/x,mark - "(?<=X(?(DEFINE)(Y))(?1))." - AXYZ - -+"(?(DEFINE)(?bar))(?bar))(? -Date: Mon, 24 Feb 2020 15:39:56 +0000 -Subject: [PATCH] Fix bug in UTF-16 checker returning wrong offset for missing - low surrogate. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1226 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.34. ---- - src/pcre2_valid_utf.c | 4 ++-- - testdata/testinput12 | 6 ++++++ - testdata/testoutput12-16 | 11 ++++++++++- - testdata/testoutput12-32 | 9 +++++++++ - testdata/testoutput14-16 | 2 +- - -diff --git a/src/pcre2_valid_utf.c b/src/pcre2_valid_utf.c -index 96e8bff..e47ea78 100644 ---- a/src/pcre2_valid_utf.c -+++ b/src/pcre2_valid_utf.c -@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Original API code Copyright (c) 1997-2012 University of Cambridge -- New API code Copyright (c) 2016-2017 University of Cambridge -+ New API code Copyright (c) 2016-2020 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -347,7 +347,7 @@ for (p = string; length > 0; p++) - length--; - if ((*p & 0xfc00) != 0xdc00) - { -- *erroroffset = p - string; -+ *erroroffset = p - string - 1; - return PCRE2_ERROR_UTF16_ERR2; - } - } -diff --git a/testdata/testinput12 b/testdata/testinput12 -index 32e97b5..beaf643 100644 ---- a/testdata/testinput12 -+++ b/testdata/testinput12 -@@ -444,6 +444,12 @@ - \= Expect no match - A\x{d800}B - A\x{110000}B -+ -+/aa/utf,ucp,match_invalid_utf,global -+ aa\x{d800}aa -+ -+/aa/utf,ucp,match_invalid_utf,global -+ \x{d800}aa - - # ---------------------------------------------------- - -diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 -index b944311..6e545c3 100644 ---- a/testdata/testoutput12-16 -+++ b/testdata/testoutput12-16 -@@ -533,7 +533,7 @@ Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 - XX\x{110000} - ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 - XX\x{d800}\x{1234} --Failed: error -25: UTF-16 error: invalid low surrogate at offset 3 -+Failed: error -25: UTF-16 error: invalid low surrogate at offset 2 - \= Expect no match - XX\x{d800}\=offset=3 - No match -@@ -1576,6 +1576,15 @@ No match - No match - A\x{110000}B - ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 -+ -+/aa/utf,ucp,match_invalid_utf,global -+ aa\x{d800}aa -+ 0: aa -+ 0: aa -+ -+/aa/utf,ucp,match_invalid_utf,global -+ \x{d800}aa -+ 0: aa - - # ---------------------------------------------------- - -diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 -index 74ccac8..1a0783a 100644 ---- a/testdata/testoutput12-32 -+++ b/testdata/testoutput12-32 -@@ -1574,6 +1574,15 @@ No match - No match - A\x{110000}B - No match -+ -+/aa/utf,ucp,match_invalid_utf,global -+ aa\x{d800}aa -+ 0: aa -+ 0: aa -+ -+/aa/utf,ucp,match_invalid_utf,global -+ \x{d800}aa -+ 0: aa - - # ---------------------------------------------------- - -diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16 -index 2d58f1c..61541f6 100644 ---- a/testdata/testoutput14-16 -+++ b/testdata/testoutput14-16 -@@ -33,7 +33,7 @@ Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 - XX\x{110000} - ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 - XX\x{d800}\x{1234} --Failed: error -25: UTF-16 error: invalid low surrogate at offset 3 -+Failed: error -25: UTF-16 error: invalid low surrogate at offset 2 - - /badutf/utf - X\xdf --- -2.21.1 - diff --git a/pcre2-10.34-Fix-bug-in-processing-DEFINE-.-within-lookbehind-ass.patch b/pcre2-10.34-Fix-bug-in-processing-DEFINE-.-within-lookbehind-ass.patch deleted file mode 100644 index 1dd9b7c..0000000 --- a/pcre2-10.34-Fix-bug-in-processing-DEFINE-.-within-lookbehind-ass.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 6f516ffef41280fbd9fd451fc7eab0c9ce98efad Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Sun, 26 Jan 2020 15:31:27 +0000 -Subject: [PATCH] Fix bug in processing (?(DEFINE)...) within lookbehind - assertions. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1212 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.34. - -Signed-off-by: Petr Písař ---- - src/pcre2_compile.c | 20 ++++++++++++++------ - testdata/testinput1 | 13 +++++++++++++ - testdata/testinput2 | 4 ++++ - testdata/testoutput1 | 17 +++++++++++++++++ - testdata/testoutput2 | 5 +++++ - 5 files changed, 53 insertions(+), 6 deletions(-) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index f2e6b6b..628503c 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -8836,9 +8836,10 @@ memset(slot + IMM2_SIZE + length, 0, - - /* This function is called to skip parts of the parsed pattern when finding the - length of a lookbehind branch. It is called after (*ACCEPT) and (*FAIL) to find --the end of the branch, it is called to skip over an internal lookaround, and it --is also called to skip to the end of a class, during which it will never --encounter nested groups (but there's no need to have special code for that). -+the end of the branch, it is called to skip over an internal lookaround or -+(DEFINE) group, and it is also called to skip to the end of a class, during -+which it will never encounter nested groups (but there's no need to have -+special code for that). - - When called to find the end of a branch or group, pptr must point to the first - meta code inside the branch, not the branch-starting code. In other cases it -@@ -9316,14 +9317,21 @@ for (;; pptr++) - itemlength = grouplength; - break; - -- /* Check nested groups - advance past the initial data for each type and -- then seek a fixed length with get_grouplength(). */ -+ /* A (DEFINE) group is never obeyed inline and so it does not contribute to -+ the length of this branch. Skip from the following item to the next -+ unpaired ket. */ -+ -+ case META_COND_DEFINE: -+ pptr = parsed_skip(pptr + 1, PSKIP_KET); -+ break; -+ -+ /* Check other nested groups - advance past the initial data for each type -+ and then seek a fixed length with get_grouplength(). */ - - case META_COND_NAME: - case META_COND_NUMBER: - case META_COND_RNAME: - case META_COND_RNUMBER: -- case META_COND_DEFINE: - pptr += 2 + SIZEOFFSET; - goto CHECK_GROUP; - -diff --git a/testdata/testinput1 b/testdata/testinput1 -index f5159d6..959d4b8 100644 ---- a/testdata/testinput1 -+++ b/testdata/testinput1 -@@ -6386,4 +6386,17 @@ ef) x/x,mark - /^(?a)(?()b)((?<=b).*)$/ - abc - -+"(?<=X(?(DEFINE)(A)))X(*F)" -+\= Expect no match -+ AXYZ -+ -+"(?<=X(?(DEFINE)(A)))." -+ AXYZ -+ -+"(?<=X(?(DEFINE)(.*))Y)." -+ AXYZ -+ -+"(?<=X(?(DEFINE)(Y))(?1))." -+ AXYZ -+ - # End of testinput1 -diff --git a/testdata/testinput2 b/testdata/testinput2 -index 655e519..7f70860 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -5772,4 +5772,8 @@ a)"xI - /(a)?a/I - manm - -+# Expect non-fixed-length error -+ -+"(?<=X(?(DEFINE)(.*))(?1))." -+ - # End of testinput2 -diff --git a/testdata/testoutput1 b/testdata/testoutput1 -index ad2175b..dfb6366 100644 ---- a/testdata/testoutput1 -+++ b/testdata/testoutput1 -@@ -10112,4 +10112,21 @@ No match - 1: a - 2: c - -+"(?<=X(?(DEFINE)(A)))X(*F)" -+\= Expect no match -+ AXYZ -+No match -+ -+"(?<=X(?(DEFINE)(A)))." -+ AXYZ -+ 0: Y -+ -+"(?<=X(?(DEFINE)(.*))Y)." -+ AXYZ -+ 0: Z -+ -+"(?<=X(?(DEFINE)(Y))(?1))." -+ AXYZ -+ 0: Z -+ - # End of testinput1 -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index c733c12..69d1a7b 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -17435,6 +17435,11 @@ Subject length lower bound = 1 - manm - 0: a - -+# Expect non-fixed-length error -+ -+"(?<=X(?(DEFINE)(.*))(?1))." -+Failed: error 125 at offset 0: lookbehind assertion is not fixed length -+ - # End of testinput2 - Error -70: PCRE2_ERROR_BADDATA (unknown error number) - Error -62: bad serialized data --- -2.21.1 - diff --git a/pcre2-10.34-Fix-control-verb-chain-restoration-issue-in-JIT.patch b/pcre2-10.34-Fix-control-verb-chain-restoration-issue-in-JIT.patch deleted file mode 100644 index 04ae437..0000000 --- a/pcre2-10.34-Fix-control-verb-chain-restoration-issue-in-JIT.patch +++ /dev/null @@ -1,55 +0,0 @@ -From a6749bb6c7c6fbfe849fb7e4e8dcf9d0e767d3e4 Mon Sep 17 00:00:00 2001 -From: zherczeg -Date: Mon, 10 Feb 2020 10:18:01 +0000 -Subject: [PATCH] Fix control verb chain restoration issue in JIT. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1217 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.34. ---- - src/pcre2_jit_compile.c | 8 ++++---- - src/pcre2_jit_test.c | 1 + - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index 7874fac..10665a8 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -2693,8 +2693,8 @@ while (cc < ccend) - } - if (common->control_head_ptr != 0 && !control_head_found) - { -- shared_srcw[0] = common->control_head_ptr; -- shared_count = 1; -+ private_srcw[0] = common->control_head_ptr; -+ private_count = 1; - control_head_found = TRUE; - } - cc += 1 + 2 + cc[1]; -@@ -2704,8 +2704,8 @@ while (cc < ccend) - SLJIT_ASSERT(common->control_head_ptr != 0); - if (!control_head_found) - { -- shared_srcw[0] = common->control_head_ptr; -- shared_count = 1; -+ private_srcw[0] = common->control_head_ptr; -+ private_count = 1; - control_head_found = TRUE; - } - cc++; -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index a188724..187e565 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -861,6 +861,7 @@ static struct regression_test_case regression_test_cases[] = { - { MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" }, - { MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" }, - { MU, A, 0, 0, "(?=(*THEN: ))* ", " " }, -+ { MU, A, 0, 0, "a(*THEN)(?R) |", "a" }, - - /* Recurse and control verbs. */ - { MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" }, --- -2.21.1 - diff --git a/pcre2-10.34-Fix-the-too-early-access-of-the-fields-of-a-compiled.patch b/pcre2-10.34-Fix-the-too-early-access-of-the-fields-of-a-compiled.patch deleted file mode 100644 index ab2627a..0000000 --- a/pcre2-10.34-Fix-the-too-early-access-of-the-fields-of-a-compiled.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 75e399f77b5ffd82194b461e837a32cf48a5d970 Mon Sep 17 00:00:00 2001 -From: zherczeg -Date: Sat, 7 Dec 2019 16:00:53 +0000 -Subject: [PATCH] Fix the too early access of the fields of a compiled pattern - in JIT. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1192 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.34. ---- - src/pcre2_jit_compile.c | 10 +++++----- - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index 1d64536..303c68f 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -13742,11 +13742,6 @@ pcre2_jit_compile(pcre2_code *code, uint32_t options) - { - pcre2_real_code *re = (pcre2_real_code *)code; - --#ifdef SUPPORT_JIT --executable_functions *functions = (executable_functions *)re->executable_jit; --static int executable_allocator_is_working = 0; --#endif -- - if (code == NULL) - return PCRE2_ERROR_NULL; - -@@ -13779,6 +13774,11 @@ actions are needed: - avoid compiler warnings. - */ - -+#ifdef SUPPORT_JIT -+executable_functions *functions = (executable_functions *)re->executable_jit; -+static int executable_allocator_is_working = 0; -+#endif -+ - if ((options & PCRE2_JIT_INVALID_UTF) != 0) - { - if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0) --- -2.21.0 - diff --git a/pcre2-10.34-Limit-function-recursion-in-pcre2_study-to-avoid-sta.patch b/pcre2-10.34-Limit-function-recursion-in-pcre2_study-to-avoid-sta.patch deleted file mode 100644 index 38513d4..0000000 --- a/pcre2-10.34-Limit-function-recursion-in-pcre2_study-to-avoid-sta.patch +++ /dev/null @@ -1,117 +0,0 @@ -From b251f0bc17a4d5a3b3f7690432113c773bcbe13f Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Mon, 27 Jan 2020 10:28:19 +0000 -Subject: [PATCH] Limit function recursion in pcre2_study to avoid stack - overflow issues. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1213 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Port to 10.34. ---- - src/pcre2_study.c | 31 ++++++++++++++++++++++--------- - -diff --git a/src/pcre2_study.c b/src/pcre2_study.c -index 2883868..5af01b5 100644 ---- a/src/pcre2_study.c -+++ b/src/pcre2_study.c -@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Original API code Copyright (c) 1997-2012 University of Cambridge -- New API code Copyright (c) 2016-2019 University of Cambridge -+ New API code Copyright (c) 2016-2020 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -58,7 +58,7 @@ collecting data (e.g. minimum matching length). */ - - /* Returns from set_start_bits() */ - --enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN }; -+enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN, SSB_TOODEEP }; - - - /************************************************* -@@ -924,19 +924,24 @@ The SSB_CONTINUE return is useful for parenthesized groups in patterns such as - must continue at the outer level to find at least one mandatory code unit. At - the outermost level, this function fails unless the result is SSB_DONE. - -+We restrict recursion (for nested groups) to 1000 to avoid stack overflow -+issues. -+ - Arguments: - re points to the compiled regex block - code points to an expression - utf TRUE if in UTF mode -+ depthptr pointer to recurse depth - - Returns: SSB_FAIL => Failed to find any starting code units - SSB_DONE => Found mandatory starting code units - SSB_CONTINUE => Found optional starting code units - SSB_UNKNOWN => Hit an unrecognized opcode -+ SSB_TOODEEP => Recursion is too deep - */ - - static int --set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf) -+set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf, int *depthptr) - { - uint32_t c; - int yield = SSB_DONE; -@@ -947,6 +952,9 @@ int table_limit = utf? 16:32; - int table_limit = 32; - #endif - -+*depthptr += 1; -+if (*depthptr > 1000) return SSB_TOODEEP; -+ - do - { - BOOL try_next = TRUE; -@@ -1103,13 +1111,17 @@ do - case OP_SCRIPT_RUN: - case OP_ASSERT: - case OP_ASSERT_NA: -- rc = set_start_bits(re, tcode, utf); -- if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; -- if (rc == SSB_DONE) try_next = FALSE; else -+ rc = set_start_bits(re, tcode, utf, depthptr); -+ if (rc == SSB_DONE) -+ { -+ try_next = FALSE; -+ } -+ else if (rc == SSB_CONTINUE) - { - do tcode += GET(tcode, 1); while (*tcode == OP_ALT); - tcode += 1 + LINK_SIZE; - } -+ else return rc; /* FAIL, UNKNOWN, or TOODEEP */ - break; - - /* If we hit ALT or KET, it means we haven't found anything mandatory in -@@ -1155,8 +1167,8 @@ do - case OP_BRAZERO: - case OP_BRAMINZERO: - case OP_BRAPOSZERO: -- rc = set_start_bits(re, ++tcode, utf); -- if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; -+ rc = set_start_bits(re, ++tcode, utf, depthptr); -+ if (rc == SSB_FAIL || rc == SSB_UNKNOWN || rc == SSB_TOODEEP) return rc; - do tcode += GET(tcode,1); while (*tcode == OP_ALT); - tcode += 1 + LINK_SIZE; - break; -@@ -1664,7 +1676,8 @@ code units. */ - - if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0) - { -- int rc = set_start_bits(re, code, utf); -+ int depth = 0; -+ int rc = set_start_bits(re, code, utf, &depth); - if (rc == SSB_UNKNOWN) return 1; - - /* If a list of starting code units was set up, scan the list to see if only --- -2.21.1 - diff --git a/pcre2-10.34-Remove-hackings-in-JIT.patch b/pcre2-10.34-Remove-hackings-in-JIT.patch deleted file mode 100644 index 3424e73..0000000 --- a/pcre2-10.34-Remove-hackings-in-JIT.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 5b90796ca14042e55b046d28c9eee45a5b03bbd4 Mon Sep 17 00:00:00 2001 -From: zherczeg -Date: Thu, 20 Feb 2020 08:57:39 +0000 -Subject: [PATCH] Remove hackings in JIT. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1222 6239d852-aaf2-0410-a92c-79f79f948069 -Signed-off-by: Petr Písař ---- - src/pcre2_jit_compile.c | 43 +++++++++++++---------------------------- - 1 file changed, 13 insertions(+), 30 deletions(-) - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index ef29a76..b42b335 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -7316,16 +7316,11 @@ if (needstype || needsscript) - /* Before anything else, we deal with scripts. */ - if (needsscript) - { --// PH hacking -- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); -- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); -- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); -- -- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); -- -- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0); -+ OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); -+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); -+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); - -- // OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); -+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - - ccbegin = cc; - -@@ -7369,28 +7364,19 @@ if (needstype || needsscript) - { - if (!needschar) - { --// PH hacking -- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); -- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); -- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); -- OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP1, 0); -+ OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); -+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); -+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); - -- OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); -- -- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0); -- --// OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); -+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); - } - else - { --// PH hacking -- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); -- -+ OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); -+ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - -- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); -- OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); -- -+ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); - OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); - typereg = RETURN_ADDR; - } -@@ -8769,16 +8755,13 @@ if (common->utf && *cc == OP_REFI) - - CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); - --// PH hacking - OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); - - add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); - -- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); -- -+ OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); -- -- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); -+ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records)); - --- -2.21.1 - diff --git a/pcre2-10.34-The-JIT-stack-should-be-freed-when-the-low-level-sta.patch b/pcre2-10.34-The-JIT-stack-should-be-freed-when-the-low-level-sta.patch deleted file mode 100644 index d1b2fea..0000000 --- a/pcre2-10.34-The-JIT-stack-should-be-freed-when-the-low-level-sta.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 73417f882ac907a182e1491ead2eecb7c5e559cc Mon Sep 17 00:00:00 2001 -From: zherczeg -Date: Fri, 24 Jan 2020 08:28:23 +0000 -Subject: [PATCH] The JIT stack should be freed when the low-level stack - allocation fails. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1207 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.34. ---- - src/pcre2_jit_misc.c | 5 +++++ - -diff --git a/src/pcre2_jit_misc.c b/src/pcre2_jit_misc.c -index efdb055..36abdba 100644 ---- a/src/pcre2_jit_misc.c -+++ b/src/pcre2_jit_misc.c -@@ -145,6 +145,11 @@ maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); - jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext); - if (jit_stack == NULL) return NULL; - jit_stack->stack = sljit_allocate_stack(startsize, maxsize, &jit_stack->memctl); -+if (jit_stack->stack == NULL) -+ { -+ jit_stack->memctl.free(jit_stack, jit_stack->memctl.memory_data); -+ return NULL; -+ } - return jit_stack; - - #endif --- -2.21.1 - diff --git a/pcre2-10.34-Use-PCRE2_MATCH_EMPTY-flag-to-detect-empty-matches-i.patch b/pcre2-10.34-Use-PCRE2_MATCH_EMPTY-flag-to-detect-empty-matches-i.patch deleted file mode 100644 index b103633..0000000 --- a/pcre2-10.34-Use-PCRE2_MATCH_EMPTY-flag-to-detect-empty-matches-i.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 037a7a81a46898c61e780cd23feddbae73b87839 Mon Sep 17 00:00:00 2001 -From: zherczeg -Date: Thu, 28 Nov 2019 11:35:08 +0000 -Subject: [PATCH] Use PCRE2_MATCH_EMPTY flag to detect empty matches in JIT. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1190 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.34. ---- - src/pcre2_jit_compile.c | 4 ++-- - src/pcre2_jit_test.c | 1 + - -diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c -index f564127..1d64536 100644 ---- a/src/pcre2_jit_compile.c -+++ b/src/pcre2_jit_compile.c -@@ -13122,8 +13122,8 @@ common->read_only_data_head = NULL; - common->fcc = tables + fcc_offset; - common->lcc = (sljit_sw)(tables + lcc_offset); - common->mode = mode; --common->might_be_empty = re->minlength == 0; --common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY) != 0; -+common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY); -+common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY); - common->nltype = NLTYPE_FIXED; - switch(re->newline_convention) - { -diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c -index a9b3880..e0638ef 100644 ---- a/src/pcre2_jit_test.c -+++ b/src/pcre2_jit_test.c -@@ -638,6 +638,7 @@ static struct regression_test_case regression_test_cases[] = { - { MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" }, - { MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" }, - { MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" }, -+ { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" }, - - /* Conditional blocks. */ - { MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" }, --- -2.21.0 - diff --git a/pcre2.spec b/pcre2.spec index b2b9e17..edf5627 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -6,10 +6,10 @@ %bcond_with pcre2_enables_sealloc # This is stable release: -#%%global rcversion RC1 +%global rcversion RC1 Name: pcre2 -Version: 10.34 -Release: %{?rcversion:0.}9%{?rcversion:.%rcversion}%{?dist} +Version: 10.35 +Release: %{?rcversion:0.}1%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library # the library: BSD with exceptions @@ -51,42 +51,6 @@ Source1: https://ftp.pcre.org/pub/pcre/%{?rcversion:Testing/}%{name}-%{myvers Source2: https://ftp.pcre.org/pub/pcre/Public-Key # Do no set RPATH if libdir is not /usr/lib Patch0: pcre2-10.10-Fix-multilib.patch -# Fix JIT to respect NOTEMPTY options, upstream bug #2473, -# in upstream after 10.34 -Patch1: pcre2-10.34-Use-PCRE2_MATCH_EMPTY-flag-to-detect-empty-matches-i.patch -# Fix a crash in pcre2_jit_compile when passing a NULL code argument, -# upstream bug #2487, in upstream after 10.34 -Patch2: pcre2-10.34-Fix-the-too-early-access-of-the-fields-of-a-compiled.patch -# Fix a crash in JITted code when a *THEN verb is used in a lookahead assertion, -# upstream bug #2510, in upstream after 10.34 -Patch3: pcre2-10.34-Fix-THEN-verbs-in-lookahead-assertions-in-JIT.patch -# Fix a memory leak when allocating a JIT stack fails, in upstream after 10.34 -Patch4: pcre2-10.34-The-JIT-stack-should-be-freed-when-the-low-level-sta.patch -# Ensure a newline after the final line in a file is output by pcre2grep, -# upstream bug #2513, in upstream after 10.34 -Patch5: pcre2-10.34-Ensure-a-newline-after-the-final-line-in-a-file-is-o.patch -# Fix processing (?(DEFINE)...) within look-behind assertions, -# in upstream after 10.34 -Patch6: pcre2-10.34-Fix-bug-in-processing-DEFINE-.-within-lookbehind-ass.patch -# Prevent from a stack exhaustion when studying a pattern for nested groups by -# putting a limit of 1000 recursive calls, in upstream after 10.34 -Patch7: pcre2-10.34-Limit-function-recursion-in-pcre2_study-to-avoid-sta.patch -# Fix restoring a verb chain list when exiting a JIT-compiled recursive -# function, in upstream after 10.34 -Patch8: pcre2-10.34-Fix-control-verb-chain-restoration-issue-in-JIT.patch -# Fix a crash in JIT when an invalid UTF-8 character is encountered in -# match_invalid_utf mode, upstream bug #2529, in upstream after 10.34 -Patch9: pcre2-10.34-Fix-a-crash-which-occurs-when-the-character-type-of-.patch -# Fix computing an offest for the start of the UTF-16 error when a high surrogate -# is not followed by a valid low surrogate, upstream bug #2527, -# in upstream after 10.34 -Patch10: pcre2-10.34-Fix-bug-in-UTF-16-checker-returning-wrong-offset-for.patch -# Fix compiling a lookbehind when preceded by a DEFINE group, -# upstream bug #2531, in upstream after 10.34 -Patch11: pcre2-10.34-Fix-bad-lookbehind-compilation-when-preceded-by-a-DE.patch -# Fix a JIT compilation of the Unicode scripts in the extended character classes, -# upstream bug #2432, in upstream after 10.34 -Patch12: pcre2-10.34-Remove-hackings-in-JIT.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -97,6 +61,7 @@ BuildRequires: make %if %{with pcre2_enables_readline} BuildRequires: readline-devel %endif +BuildRequires: sed Requires: %{name}-syntax = %{version}-%{release} Provides: bundled(sljit) @@ -177,23 +142,15 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %{gpgverify} --keyring='%{SOURCE2}' --signature='%{SOURCE1}' --data='%{SOURCE0}' %setup -q -n %{name}-%{myversion} %patch0 -p1 -%patch1 -p1 -%patch2 -p1 -%patch3 -p1 -%patch4 -p1 -%patch5 -p1 -%patch6 -p1 -%patch7 -p1 -%patch8 -p1 -%patch9 -p1 -%patch10 -p1 -%patch11 -p1 -%patch12 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif %build +# Shadow stack built-in functions are required for -fcf-protection. +# Checked in src/sljit/sljitConfigInternal.h, _get_ssp() is used. +%global optflags %(printf -- '%s' '%{optflags}' | \ + sed -E 's/(^|\\s)(-fcf-protection)($|\\s)/\\1\\2\\3 -mshstk /') # There is a strict-aliasing problem on PPC64, bug #881232 %ifarch ppc64 %global optflags %{optflags} -fno-strict-aliasing @@ -302,6 +259,9 @@ make %{?_smp_mflags} check VERBOSE=yes %{_mandir}/man1/pcre2test.* %changelog +* Thu Apr 16 2020 Petr Pisar - 10.35-0.1.RC1 +- 10.35-RC1 bump + * Mon Mar 23 2020 Petr Pisar - 10.34-9 - Fix a JIT compilation of the Unicode scripts in the extended character classes (upstream bug #2432) diff --git a/sources b/sources index cdee74b..c763517 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ -SHA512 (pcre2-10.34.tar.bz2) = 77ad75f8b0b8bbfc2f57932596151bca25b06bd621e0f047e476f38cd127f43e2052460b95c281a7e874aad2b7fd86c8f3413f4a323abb74b9440a42d0ee9524 -SHA512 (pcre2-10.34.tar.bz2.sig) = f4cb8dcbe4ef254a47ccd76e3a62097fb6ee2b6278053d08fb87d4e2e21f788cc84bc54587e41d236b73fb1403816ba1576ec10545d2abdffac5a523d97fd71c +SHA512 (pcre2-10.35-RC1.tar.bz2) = f634d9d68b86672c37e2d66e54507e0cfd5bbfb875c2b5e70fdf0841d41e6aa01091b399d9d0a8318d44eee9894e1dc7ea855ad20db2047f803da017aecf8f02 +SHA512 (pcre2-10.35-RC1.tar.bz2.sig) = 827a6d3127c13427ca2f7ca35a60fdca5bcfc43b5b2c588b8abb8338ec1307b49170a5b91daddf4f9069a584dbb8ebb321d66be959ec8990ff531c20d678ec55