diff --git a/pcre2-10.31-Add-support-to-pcre2grep-for-binary-zeros-in-f-files.patch b/pcre2-10.31-Add-support-to-pcre2grep-for-binary-zeros-in-f-files.patch new file mode 100644 index 0000000..bae2131 --- /dev/null +++ b/pcre2-10.31-Add-support-to-pcre2grep-for-binary-zeros-in-f-files.patch @@ -0,0 +1,417 @@ +From d59c555dcc96b23d0481f901ba617db91b9b2a9a Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Sat, 24 Feb 2018 17:09:19 +0000 +Subject: [PATCH] Add support to pcre2grep for binary zeros in -f files. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@920 6239d852-aaf2-0410-a92c-79f79f948069 +Petr Písař: Ported to 10.31. +--- + RunGrepTest | 6 ++++ + doc/pcre2grep.1 | 59 +++++++++++++++++++++++--------------- + src/pcre2grep.c | 81 +++++++++++++++++++++++++++++------------------------ + testdata/grepoutput | 3 ++ + +diff --git a/RunGrepTest b/RunGrepTest +index a26f677..293e5a5 100755 +--- a/RunGrepTest ++++ b/RunGrepTest +@@ -641,6 +641,12 @@ echo "RC=$?" >>testtrygrep + $valgrind $vjs $pcre2grep --colour=always '(?=[ac]\K)' testNinputgrep >>testtrygrep + echo "RC=$?" >>testtrygrep + ++echo "---------------------------- Test 126 -----------------------------" >>testtrygrep ++printf "Next line pattern has binary zero\nABC\x00XYZ\n" >testtemp1grep ++printf "ABC\x00XYZ\nABCDEF\nDEFABC\n" >testtemp2grep ++$valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep ++echo "RC=$?" >>testtrygrep ++ + + # Now compare the results. + +diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1 +index 5e5cbea..ba6aea6 100644 +--- a/doc/pcre2grep.1 ++++ b/doc/pcre2grep.1 +@@ -1,4 +1,4 @@ +-.TH PCRE2GREP 1 "13 November 2017" "PCRE2 10.31" ++.TH PCRE2GREP 1 "24 February 2018" "PCRE2 10.32" + .SH NAME + pcre2grep - a grep with Perl-compatible regular expressions. + .SH SYNOPSIS +@@ -121,6 +121,14 @@ a binary file is not applied. See the \fB--binary-files\fP option for a means + of changing the way binary files are handled. + . + . ++.SH "BINARY ZEROS IN PATTERNS" ++.rs ++.sp ++Patterns passed from the command line are strings that are terminated by a ++binary zero, so cannot contain internal zeros. However, patterns that are read ++from a file via the \fB-f\fP option may contain binary zeros. ++. ++. + .SH OPTIONS + .rs + .sp +@@ -304,12 +312,15 @@ files; it does not apply to patterns specified by any of the \fB--include\fP or + .TP + \fB-f\fP \fIfilename\fP, \fB--file=\fP\fIfilename\fP + Read patterns from the file, one per line, and match them against each line of +-input. What constitutes a newline when reading the file is the operating +-system's default. The \fB--newline\fP option has no effect on this option. +-Trailing white space is removed from each line, and blank lines are ignored. An +-empty file contains no patterns and therefore matches nothing. See also the +-comments about multiple patterns versus a single pattern with alternatives in +-the description of \fB-e\fP above. ++input. As is the case with patterns on the command line, no delimiters should ++be used. What constitutes a newline when reading the file is the operating ++system's default interpretation of \en. The \fB--newline\fP option has no ++effect on this option. Trailing white space is removed from each line, and ++blank lines are ignored. An empty file contains no patterns and therefore ++matches nothing. Patterns read from a file in this way may contain binary ++zeros, which are treated as ordinary data characters. See also the comments ++about multiple patterns versus a single pattern with alternatives in the ++description of \fB-e\fP above. + .sp + If this option is given more than once, all the specified files are read. A + data line is output if any of the patterns match it. A file name can be given +@@ -320,14 +331,15 @@ command line; all arguments are treated as the names of paths to be searched. + .TP + \fB--file-list\fP=\fIfilename\fP + Read a list of files and/or directories that are to be scanned from the given +-file, one per line. Trailing white space is removed from each line, and blank +-lines are ignored. These paths are processed before any that are listed on the +-command line. The file name can be given as "-" to refer to the standard input. +-If \fB--file\fP and \fB--file-list\fP are both specified as "-", patterns are +-read first. This is useful only when the standard input is a terminal, from +-which further lines (the list of files) can be read after an end-of-file +-indication. If this option is given more than once, all the specified files are +-read. ++file, one per line. What constitutes a newline when reading the file is the ++operating system's default. Trailing white space is removed from each line, and ++blank lines are ignored. These paths are processed before any that are listed ++on the command line. The file name can be given as "-" to refer to the standard ++input. If \fB--file\fP and \fB--file-list\fP are both specified as "-", ++patterns are read first. This is useful only when the standard input is a ++terminal, from which further lines (the list of files) can be read after an ++end-of-file indication. If this option is given more than once, all the ++specified files are read. + .TP + \fB--file-offsets\fP + Instead of showing lines or parts of lines that match, show each match as an +@@ -679,12 +691,13 @@ The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with + different newline conventions from the default. Any parts of the input files + that are written to the standard output are copied identically, with whatever + newline sequences they have in the input. However, the setting of this option +-does not affect the interpretation of files specified by the \fB-f\fP, +-\fB--exclude-from\fP, or \fB--include-from\fP options, which are assumed to use +-the operating system's standard newline sequence, nor does it affect the way in +-which \fBpcre2grep\fP writes informational messages to the standard error and +-output streams. For these it uses the string "\en" to indicate newlines, +-relying on the C I/O library to convert this to an appropriate sequence. ++affects only the way scanned files are processed. It does not affect the ++interpretation of files specified by the \fB-f\fP, \fB--file-list\fP, ++\fB--exclude-from\fP, or \fB--include-from\fP options, nor does it affect the ++way in which \fBpcre2grep\fP writes informational messages to the standard ++error and output streams. For these it uses the string "\en" to indicate ++newlines, relying on the C I/O library to convert this to an appropriate ++sequence. + . + . + .SH "OPTIONS COMPATIBILITY" +@@ -862,6 +875,6 @@ Cambridge, England. + .rs + .sp + .nf +-Last updated: 13 November 2017 +-Copyright (c) 1997-2017 University of Cambridge. ++Last updated: 24 February 2018 ++Copyright (c) 1997-2018 University of Cambridge. + .fi +diff --git a/src/pcre2grep.c b/src/pcre2grep.c +index 02339f5..78121ad 100644 +--- a/src/pcre2grep.c ++++ b/src/pcre2grep.c +@@ -13,7 +13,7 @@ distribution because other apparatus is needed to compile pcre2grep for z/OS. + The header can be found in the special z/OS distribution, which is available + from www.zaconsultants.net or from www.cbttape.org. + +- Copyright (c) 1997-2017 University of Cambridge ++ Copyright (c) 1997-2018 University of Cambridge + + ----------------------------------------------------------------------------- + Redistribution and use in source and binary forms, with or without +@@ -303,6 +303,7 @@ also for include/exclude patterns. */ + typedef struct patstr { + struct patstr *next; + char *string; ++ PCRE2_SIZE length; + pcre2_code *compiled; + } patstr; + +@@ -557,13 +558,14 @@ exit(rc); + + Arguments: + s pattern string to add ++ patlen length of pattern + after if not NULL points to item to insert after + + Returns: new pattern block or NULL on error + */ + + static patstr * +-add_pattern(char *s, patstr *after) ++add_pattern(char *s, PCRE2_SIZE patlen, patstr *after) + { + patstr *p = (patstr *)malloc(sizeof(patstr)); + if (p == NULL) +@@ -571,7 +573,7 @@ if (p == NULL) + fprintf(stderr, "pcre2grep: malloc failed\n"); + pcre2grep_exit(2); + } +-if (strlen(s) > MAXPATLEN) ++if (patlen > MAXPATLEN) + { + fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n", + MAXPATLEN); +@@ -580,6 +582,7 @@ if (strlen(s) > MAXPATLEN) + } + p->next = NULL; + p->string = s; ++p->length = patlen; + p->compiled = NULL; + + if (after != NULL) +@@ -1276,12 +1279,14 @@ return om; + * Read one line of input * + *************************************************/ + +-/* Normally, input is read using fread() (or gzread, or BZ2_read) into a large +-buffer, so many lines may be read at once. However, doing this for tty input +-means that no output appears until a lot of input has been typed. Instead, tty +-input is handled line by line. We cannot use fgets() for this, because it does +-not stop at a binary zero, and therefore there is no way of telling how many +-characters it has read, because there may be binary zeros embedded in the data. ++/* Normally, input that is to be scanned is read using fread() (or gzread, or ++BZ2_read) into a large buffer, so many lines may be read at once. However, ++doing this for tty input means that no output appears until a lot of input has ++been typed. Instead, tty input is handled line by line. We cannot use fgets() ++for this, because it does not stop at a binary zero, and therefore there is no ++way of telling how many characters it has read, because there may be binary ++zeros embedded in the data. This function is also used for reading patterns ++from files (the -f option). + + Arguments: + buffer the buffer to read into +@@ -1291,7 +1296,7 @@ Arguments: + Returns: the number of characters read, zero at end of file + */ + +-static unsigned int ++static PCRE2_SIZE + read_one_line(char *buffer, int length, FILE *f) + { + int c; +@@ -1651,11 +1656,11 @@ Returns: TRUE if there was a match + */ + + static BOOL +-match_patterns(char *matchptr, size_t length, unsigned int options, +- size_t startoffset, int *mrc) ++match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options, ++ PCRE2_SIZE startoffset, int *mrc) + { + int i; +-size_t slen = length; ++PCRE2_SIZE slen = length; + patstr *p = patterns; + const char *msg = "this text:\n\n"; + +@@ -2317,7 +2322,7 @@ unsigned long int count = 0; + char *lastmatchrestart = NULL; + char *ptr = main_buffer; + char *endptr; +-size_t bufflength; ++PCRE2_SIZE bufflength; + BOOL binary = FALSE; + BOOL endhyphenpending = FALSE; + BOOL input_line_buffered = line_buffered; +@@ -2339,7 +2344,7 @@ bufflength = fill_buffer(handle, frtype, main_buffer, bufsize, + input_line_buffered); + + #ifdef SUPPORT_LIBBZ2 +-if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */ ++if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is PCRE2_SIZE; */ + #endif + + endptr = main_buffer + bufflength; +@@ -2368,8 +2373,8 @@ while (ptr < endptr) + unsigned int options = 0; + BOOL match; + char *t = ptr; +- size_t length, linelength; +- size_t startoffset = 0; ++ PCRE2_SIZE length, linelength; ++ PCRE2_SIZE startoffset = 0; + + /* At this point, ptr is at the start of a line. We need to find the length + of the subject string to pass to pcre2_match(). In multiline mode, it is the +@@ -2381,7 +2386,7 @@ while (ptr < endptr) + + t = end_of_line(t, endptr, &endlinelength); + linelength = t - ptr - endlinelength; +- length = multiline? (size_t)(endptr - ptr) : linelength; ++ length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength; + + /* Check to see if the line we are looking at extends right to the very end + of the buffer without a line terminator. This means the line is too long to +@@ -2560,7 +2565,7 @@ while (ptr < endptr) + { + if (!invert) + { +- size_t oldstartoffset; ++ PCRE2_SIZE oldstartoffset; + + if (printname != NULL) fprintf(stdout, "%s:", printname); + if (number) fprintf(stdout, "%lu:", linenumber); +@@ -2647,7 +2652,7 @@ while (ptr < endptr) + startoffset -= (int)(linelength + endlinelength); + t = end_of_line(ptr, endptr, &endlinelength); + linelength = t - ptr - endlinelength; +- length = (size_t)(endptr - ptr); ++ length = (PCRE2_SIZE)(endptr - ptr); + } + + goto ONLY_MATCHING_RESTART; +@@ -2812,7 +2817,7 @@ while (ptr < endptr) + endprevious -= (int)(linelength + endlinelength); + t = end_of_line(ptr, endptr, &endlinelength); + linelength = t - ptr - endlinelength; +- length = (size_t)(endptr - ptr); ++ length = (PCRE2_SIZE)(endptr - ptr); + } + + /* If startoffset is at the exact end of the line it means this +@@ -2895,7 +2900,7 @@ while (ptr < endptr) + /* If input is line buffered, and the buffer is not yet full, read another + line and add it into the buffer. */ + +- if (input_line_buffered && bufflength < (size_t)bufsize) ++ if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize) + { + int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in); + bufflength += add; +@@ -2907,7 +2912,7 @@ while (ptr < endptr) + 1/3 and refill it. Before we do this, if some unprinted "after" lines are + about to be lost, print them. */ + +- if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird) ++ if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird) + { + if (after_context > 0 && + lastmatchnumber > 0 && +@@ -3395,9 +3400,8 @@ PCRE2_SIZE patlen, erroffset; + PCRE2_UCHAR errmessbuffer[ERRBUFSIZ]; + + if (p->compiled != NULL) return TRUE; +- + ps = p->string; +-patlen = strlen(ps); ++patlen = p->length; + + if ((options & PCRE2_LITERAL) != 0) + { +@@ -3407,8 +3411,8 @@ if ((options & PCRE2_LITERAL) != 0) + + if (ellength != 0) + { +- if (add_pattern(pe, p) == NULL) return FALSE; +- patlen = (int)(pe - ps - ellength); ++ patlen = pe - ps - ellength; ++ if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE; + } + } + +@@ -3470,6 +3474,7 @@ static BOOL + read_pattern_file(char *name, patstr **patptr, patstr **patlastptr) + { + int linenumber = 0; ++PCRE2_SIZE patlen; + FILE *f; + const char *filename; + char buffer[MAXPATLEN+20]; +@@ -3490,20 +3495,18 @@ else + filename = name; + } + +-while (fgets(buffer, sizeof(buffer), f) != NULL) ++while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0) + { +- char *s = buffer + (int)strlen(buffer); +- while (s > buffer && isspace((unsigned char)(s[-1]))) s--; +- *s = 0; ++ while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--; + linenumber++; +- if (buffer[0] == 0) continue; /* Skip blank lines */ ++ if (patlen == 0) continue; /* Skip blank lines */ + + /* Note: this call to add_pattern() puts a pointer to the local variable + "buffer" into the pattern chain. However, that pointer is used only when + compiling the pattern, which happens immediately below, so we flatten it + afterwards, as a precaution against any later code trying to use it. */ + +- *patlastptr = add_pattern(buffer, *patlastptr); ++ *patlastptr = add_pattern(buffer, patlen, *patlastptr); + if (*patlastptr == NULL) + { + if (f != stdin) fclose(f); +@@ -3513,8 +3516,9 @@ while (fgets(buffer, sizeof(buffer), f) != NULL) + + /* This loop is needed because compiling a "pattern" when -F is set may add + on additional literal patterns if the original contains a newline. In the +- common case, it never will, because fgets() stops at a newline. However, +- the -N option can be used to give pcre2grep a different newline setting. */ ++ common case, it never will, because read_one_line() stops at a newline. ++ However, the -N option can be used to give pcre2grep a different newline ++ setting. */ + + for(;;) + { +@@ -3833,7 +3837,8 @@ for (i = 1; i < argc; i++) + else if (op->type == OP_PATLIST) + { + patdatastr *pd = (patdatastr *)op->dataptr; +- *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr)); ++ *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data), ++ *(pd->lastptr)); + if (*(pd->lastptr) == NULL) goto EXIT2; + if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr); + } +@@ -4095,7 +4100,9 @@ the first argument is the one and only pattern, and it must exist. */ + if (patterns == NULL && pattern_files == NULL) + { + if (i >= argc) return usage(2); +- patterns = patterns_last = add_pattern(argv[i++], NULL); ++ patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]), ++ NULL); ++ i++; + if (patterns == NULL) goto EXIT2; + } + +diff --git a/testdata/grepoutput b/testdata/grepoutput +index e49c2b2..9329248 100644 +--- a/testdata/grepoutput ++++ b/testdata/grepoutput +@@ -945,3 +945,6 @@ RC=0 + RC=0 + abcd + RC=0 ++---------------------------- Test 126 ----------------------------- ++ABCXYZ ++RC=0 +-- +2.13.6 + diff --git a/pcre2.spec b/pcre2.spec index ba9b37d..f362b2b 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -9,7 +9,7 @@ #%%global rcversion RC1 Name: pcre2 Version: 10.31 -Release: %{?rcversion:0.}2%{?rcversion:.%rcversion}%{?dist} +Release: %{?rcversion:0.}3%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library # the library: BSD with exceptions @@ -60,6 +60,9 @@ Patch3: pcre2-10.31-Oops-forgot-about-C-bsr-in-previous-patch.patch # Fix matching repeated character classes against an 8-bit string containting # multi-code-unit characters, in upstream after 10.31 Patch4: pcre2-10.31-Fix-C-bug-with-repeated-character-classes-in-UTF-8-m.patch +# Add support to pcre2grep for binary zeros in -f files, upstream bug #2222, +# in upstream after 10.31 +Patch5: pcre2-10.31-Add-support-to-pcre2grep-for-binary-zeros-in-f-files.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -140,6 +143,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %patch2 -p1 %patch3 -p1 %patch4 -p1 +%patch5 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -242,6 +246,9 @@ make %{?_smp_mflags} check VERBOSE=yes %{_mandir}/man1/pcre2test.* %changelog +* Mon Feb 26 2018 Petr Pisar - 10.31-3 +- Add support to pcre2grep for binary zeros in -f files (upstream bug #2222) + * Tue Feb 20 2018 Petr Pisar - 10.31-2 - Fix returning unset groups in POSIX interface if REG_STARTEND has a non-zero starting offset (upstream bug #2244)