From 7002d51cb1b4bb4c9c00967b9b724e82a80c75e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Thu, 16 Aug 2018 11:58:51 +0200 Subject: [PATCH] 10.32-RC1 bump --- .gitignore | 1 + ...cre2grep-to-avoid-compiler-warnings-.patch | 91 --- ...cre2grep-for-binary-zeros-in-f-files.patch | 417 ----------- ...epeated-character-classes-in-UTF-8-m.patch | 120 --- ...31-Fix-bug-in-VERSION-number-reading.patch | 73 -- ...s-used-in-a-lookbehind-in-a-substitu.patch | 179 ----- ....31-Fix-dynamic-options-changing-bug.patch | 139 ---- ...h-replace-in-pcre2test-and-pcre2_sub.patch | 708 ------------------ ...t-C-to-correctly-show-what-R-matches.patch | 30 - ...ssed-back-for-POSIX-unset-groups-whe.patch | 133 ---- ...-Fixed-atomic-group-backtracking-bug.patch | 67 -- ...s-on-lookaheads-within-lookbehinds-w.patch | 133 ---- ...-compatible-by-recognizing-all-of-Un.patch | 174 ----- ...forgot-about-C-bsr-in-previous-patch.patch | 29 - ...-zero-for-early-errors-in-pcre2_patt.patch | 65 -- pcre2.spec | 64 +- sources | 2 +- 17 files changed, 8 insertions(+), 2417 deletions(-) delete mode 100644 pcre2-10.31-A-small-fix-to-pcre2grep-to-avoid-compiler-warnings-.patch delete mode 100644 pcre2-10.31-Add-support-to-pcre2grep-for-binary-zeros-in-f-files.patch delete mode 100644 pcre2-10.31-Fix-C-bug-with-repeated-character-classes-in-UTF-8-m.patch delete mode 100644 pcre2-10.31-Fix-bug-in-VERSION-number-reading.patch delete mode 100644 pcre2-10.31-Fix-bug-when-K-is-used-in-a-lookbehind-in-a-substitu.patch delete mode 100644 pcre2-10.31-Fix-dynamic-options-changing-bug.patch delete mode 100644 pcre2-10.31-Fix-global-search-replace-in-pcre2test-and-pcre2_sub.patch delete mode 100644 pcre2-10.31-Fix-pcre2test-C-to-correctly-show-what-R-matches.patch delete mode 100644 pcre2-10.31-Fix-the-value-passed-back-for-POSIX-unset-groups-whe.patch delete mode 100644 pcre2-10.31-Fixed-atomic-group-backtracking-bug.patch delete mode 100644 pcre2-10.31-Ignore-qualifiers-on-lookaheads-within-lookbehinds-w.patch delete mode 100644 pcre2-10.31-Make-x-more-Perl-compatible-by-recognizing-all-of-Un.patch delete mode 100644 pcre2-10.31-Oops-forgot-about-C-bsr-in-previous-patch.patch delete mode 100644 pcre2-10.31-Set-error-offset-zero-for-early-errors-in-pcre2_patt.patch diff --git a/.gitignore b/.gitignore index 5e8d14e..1db9300 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ /pcre2-10.30.tar.bz2 /pcre2-10.31-RC1.tar.bz2 /pcre2-10.31.tar.bz2 +/pcre2-10.32-RC1.tar.bz2 diff --git a/pcre2-10.31-A-small-fix-to-pcre2grep-to-avoid-compiler-warnings-.patch b/pcre2-10.31-A-small-fix-to-pcre2grep-to-avoid-compiler-warnings-.patch deleted file mode 100644 index 05ab874..0000000 --- a/pcre2-10.31-A-small-fix-to-pcre2grep-to-avoid-compiler-warnings-.patch +++ /dev/null @@ -1,91 +0,0 @@ -From ecf1a253d8b7c41f8700eb78e598bfddfeb97215 Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Sun, 25 Feb 2018 12:12:48 +0000 -Subject: [PATCH] A small fix to pcre2grep to avoid compiler warnings for - -Wformat-overflow=2. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@921 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.31. - -diff --git a/src/pcre2grep.c b/src/pcre2grep.c -index 78121ad..a9379cf 100644 ---- a/src/pcre2grep.c -+++ b/src/pcre2grep.c -@@ -303,7 +303,7 @@ also for include/exclude patterns. */ - typedef struct patstr { - struct patstr *next; - char *string; -- PCRE2_SIZE length; -+ PCRE2_SIZE length; - pcre2_code *compiled; - } patstr; - -@@ -558,7 +558,7 @@ exit(rc); - - Arguments: - s pattern string to add -- patlen length of pattern -+ patlen length of pattern - after if not NULL points to item to insert after - - Returns: new pattern block or NULL on error -@@ -1285,7 +1285,7 @@ doing this for tty input means that no output appears until a lot of input has - been typed. Instead, tty input is handled line by line. We cannot use fgets() - for this, because it does not stop at a binary zero, and therefore there is no - way of telling how many characters it has read, because there may be binary --zeros embedded in the data. This function is also used for reading patterns -+zeros embedded in the data. This function is also used for reading patterns - from files (the -f option). - - Arguments: -@@ -3497,7 +3497,7 @@ else - - while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0) - { -- while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--; -+ while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--; - linenumber++; - if (patlen == 0) continue; /* Skip blank lines */ - -@@ -3669,8 +3669,15 @@ for (i = 1; i < argc; i++) - int arglen = (argequals == NULL || equals == NULL)? - (int)strlen(arg) : (int)(argequals - arg); - -- sprintf(buff1, "%.*s", baselen, op->long_name); -- sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1); -+ if (snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name) > -+ (int)sizeof(buff1) || -+ snprintf(buff2, sizeof(buff2), "%s%.*s", buff1, -+ fulllen - baselen - 2, opbra + 1) > (int)sizeof(buff2)) -+ { -+ fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n", -+ op->long_name); -+ pcre2grep_exit(2); -+ } - - if (strncmp(arg, buff1, arglen) == 0 || - strncmp(arg, buff2, arglen) == 0) -@@ -3837,7 +3844,7 @@ for (i = 1; i < argc; i++) - else if (op->type == OP_PATLIST) - { - patdatastr *pd = (patdatastr *)op->dataptr; -- *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data), -+ *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data), - *(pd->lastptr)); - if (*(pd->lastptr) == NULL) goto EXIT2; - if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr); -@@ -4102,7 +4109,7 @@ if (patterns == NULL && pattern_files == NULL) - if (i >= argc) return usage(2); - patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]), - NULL); -- i++; -+ i++; - if (patterns == NULL) goto EXIT2; - } - --- -2.13.6 - diff --git a/pcre2-10.31-Add-support-to-pcre2grep-for-binary-zeros-in-f-files.patch b/pcre2-10.31-Add-support-to-pcre2grep-for-binary-zeros-in-f-files.patch deleted file mode 100644 index bae2131..0000000 --- a/pcre2-10.31-Add-support-to-pcre2grep-for-binary-zeros-in-f-files.patch +++ /dev/null @@ -1,417 +0,0 @@ -From d59c555dcc96b23d0481f901ba617db91b9b2a9a Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Sat, 24 Feb 2018 17:09:19 +0000 -Subject: [PATCH] Add support to pcre2grep for binary zeros in -f files. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@920 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.31. ---- - RunGrepTest | 6 ++++ - doc/pcre2grep.1 | 59 +++++++++++++++++++++++--------------- - src/pcre2grep.c | 81 +++++++++++++++++++++++++++++------------------------ - testdata/grepoutput | 3 ++ - -diff --git a/RunGrepTest b/RunGrepTest -index a26f677..293e5a5 100755 ---- a/RunGrepTest -+++ b/RunGrepTest -@@ -641,6 +641,12 @@ echo "RC=$?" >>testtrygrep - $valgrind $vjs $pcre2grep --colour=always '(?=[ac]\K)' testNinputgrep >>testtrygrep - echo "RC=$?" >>testtrygrep - -+echo "---------------------------- Test 126 -----------------------------" >>testtrygrep -+printf "Next line pattern has binary zero\nABC\x00XYZ\n" >testtemp1grep -+printf "ABC\x00XYZ\nABCDEF\nDEFABC\n" >testtemp2grep -+$valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep -+echo "RC=$?" >>testtrygrep -+ - - # Now compare the results. - -diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1 -index 5e5cbea..ba6aea6 100644 ---- a/doc/pcre2grep.1 -+++ b/doc/pcre2grep.1 -@@ -1,4 +1,4 @@ --.TH PCRE2GREP 1 "13 November 2017" "PCRE2 10.31" -+.TH PCRE2GREP 1 "24 February 2018" "PCRE2 10.32" - .SH NAME - pcre2grep - a grep with Perl-compatible regular expressions. - .SH SYNOPSIS -@@ -121,6 +121,14 @@ a binary file is not applied. See the \fB--binary-files\fP option for a means - of changing the way binary files are handled. - . - . -+.SH "BINARY ZEROS IN PATTERNS" -+.rs -+.sp -+Patterns passed from the command line are strings that are terminated by a -+binary zero, so cannot contain internal zeros. However, patterns that are read -+from a file via the \fB-f\fP option may contain binary zeros. -+. -+. - .SH OPTIONS - .rs - .sp -@@ -304,12 +312,15 @@ files; it does not apply to patterns specified by any of the \fB--include\fP or - .TP - \fB-f\fP \fIfilename\fP, \fB--file=\fP\fIfilename\fP - Read patterns from the file, one per line, and match them against each line of --input. What constitutes a newline when reading the file is the operating --system's default. The \fB--newline\fP option has no effect on this option. --Trailing white space is removed from each line, and blank lines are ignored. An --empty file contains no patterns and therefore matches nothing. See also the --comments about multiple patterns versus a single pattern with alternatives in --the description of \fB-e\fP above. -+input. As is the case with patterns on the command line, no delimiters should -+be used. What constitutes a newline when reading the file is the operating -+system's default interpretation of \en. The \fB--newline\fP option has no -+effect on this option. Trailing white space is removed from each line, and -+blank lines are ignored. An empty file contains no patterns and therefore -+matches nothing. Patterns read from a file in this way may contain binary -+zeros, which are treated as ordinary data characters. See also the comments -+about multiple patterns versus a single pattern with alternatives in the -+description of \fB-e\fP above. - .sp - If this option is given more than once, all the specified files are read. A - data line is output if any of the patterns match it. A file name can be given -@@ -320,14 +331,15 @@ command line; all arguments are treated as the names of paths to be searched. - .TP - \fB--file-list\fP=\fIfilename\fP - Read a list of files and/or directories that are to be scanned from the given --file, one per line. Trailing white space is removed from each line, and blank --lines are ignored. These paths are processed before any that are listed on the --command line. The file name can be given as "-" to refer to the standard input. --If \fB--file\fP and \fB--file-list\fP are both specified as "-", patterns are --read first. This is useful only when the standard input is a terminal, from --which further lines (the list of files) can be read after an end-of-file --indication. If this option is given more than once, all the specified files are --read. -+file, one per line. What constitutes a newline when reading the file is the -+operating system's default. Trailing white space is removed from each line, and -+blank lines are ignored. These paths are processed before any that are listed -+on the command line. The file name can be given as "-" to refer to the standard -+input. If \fB--file\fP and \fB--file-list\fP are both specified as "-", -+patterns are read first. This is useful only when the standard input is a -+terminal, from which further lines (the list of files) can be read after an -+end-of-file indication. If this option is given more than once, all the -+specified files are read. - .TP - \fB--file-offsets\fP - Instead of showing lines or parts of lines that match, show each match as an -@@ -679,12 +691,13 @@ The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with - different newline conventions from the default. Any parts of the input files - that are written to the standard output are copied identically, with whatever - newline sequences they have in the input. However, the setting of this option --does not affect the interpretation of files specified by the \fB-f\fP, --\fB--exclude-from\fP, or \fB--include-from\fP options, which are assumed to use --the operating system's standard newline sequence, nor does it affect the way in --which \fBpcre2grep\fP writes informational messages to the standard error and --output streams. For these it uses the string "\en" to indicate newlines, --relying on the C I/O library to convert this to an appropriate sequence. -+affects only the way scanned files are processed. It does not affect the -+interpretation of files specified by the \fB-f\fP, \fB--file-list\fP, -+\fB--exclude-from\fP, or \fB--include-from\fP options, nor does it affect the -+way in which \fBpcre2grep\fP writes informational messages to the standard -+error and output streams. For these it uses the string "\en" to indicate -+newlines, relying on the C I/O library to convert this to an appropriate -+sequence. - . - . - .SH "OPTIONS COMPATIBILITY" -@@ -862,6 +875,6 @@ Cambridge, England. - .rs - .sp - .nf --Last updated: 13 November 2017 --Copyright (c) 1997-2017 University of Cambridge. -+Last updated: 24 February 2018 -+Copyright (c) 1997-2018 University of Cambridge. - .fi -diff --git a/src/pcre2grep.c b/src/pcre2grep.c -index 02339f5..78121ad 100644 ---- a/src/pcre2grep.c -+++ b/src/pcre2grep.c -@@ -13,7 +13,7 @@ distribution because other apparatus is needed to compile pcre2grep for z/OS. - The header can be found in the special z/OS distribution, which is available - from www.zaconsultants.net or from www.cbttape.org. - -- Copyright (c) 1997-2017 University of Cambridge -+ Copyright (c) 1997-2018 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -303,6 +303,7 @@ also for include/exclude patterns. */ - typedef struct patstr { - struct patstr *next; - char *string; -+ PCRE2_SIZE length; - pcre2_code *compiled; - } patstr; - -@@ -557,13 +558,14 @@ exit(rc); - - Arguments: - s pattern string to add -+ patlen length of pattern - after if not NULL points to item to insert after - - Returns: new pattern block or NULL on error - */ - - static patstr * --add_pattern(char *s, patstr *after) -+add_pattern(char *s, PCRE2_SIZE patlen, patstr *after) - { - patstr *p = (patstr *)malloc(sizeof(patstr)); - if (p == NULL) -@@ -571,7 +573,7 @@ if (p == NULL) - fprintf(stderr, "pcre2grep: malloc failed\n"); - pcre2grep_exit(2); - } --if (strlen(s) > MAXPATLEN) -+if (patlen > MAXPATLEN) - { - fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n", - MAXPATLEN); -@@ -580,6 +582,7 @@ if (strlen(s) > MAXPATLEN) - } - p->next = NULL; - p->string = s; -+p->length = patlen; - p->compiled = NULL; - - if (after != NULL) -@@ -1276,12 +1279,14 @@ return om; - * Read one line of input * - *************************************************/ - --/* Normally, input is read using fread() (or gzread, or BZ2_read) into a large --buffer, so many lines may be read at once. However, doing this for tty input --means that no output appears until a lot of input has been typed. Instead, tty --input is handled line by line. We cannot use fgets() for this, because it does --not stop at a binary zero, and therefore there is no way of telling how many --characters it has read, because there may be binary zeros embedded in the data. -+/* Normally, input that is to be scanned is read using fread() (or gzread, or -+BZ2_read) into a large buffer, so many lines may be read at once. However, -+doing this for tty input means that no output appears until a lot of input has -+been typed. Instead, tty input is handled line by line. We cannot use fgets() -+for this, because it does not stop at a binary zero, and therefore there is no -+way of telling how many characters it has read, because there may be binary -+zeros embedded in the data. This function is also used for reading patterns -+from files (the -f option). - - Arguments: - buffer the buffer to read into -@@ -1291,7 +1296,7 @@ Arguments: - Returns: the number of characters read, zero at end of file - */ - --static unsigned int -+static PCRE2_SIZE - read_one_line(char *buffer, int length, FILE *f) - { - int c; -@@ -1651,11 +1656,11 @@ Returns: TRUE if there was a match - */ - - static BOOL --match_patterns(char *matchptr, size_t length, unsigned int options, -- size_t startoffset, int *mrc) -+match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options, -+ PCRE2_SIZE startoffset, int *mrc) - { - int i; --size_t slen = length; -+PCRE2_SIZE slen = length; - patstr *p = patterns; - const char *msg = "this text:\n\n"; - -@@ -2317,7 +2322,7 @@ unsigned long int count = 0; - char *lastmatchrestart = NULL; - char *ptr = main_buffer; - char *endptr; --size_t bufflength; -+PCRE2_SIZE bufflength; - BOOL binary = FALSE; - BOOL endhyphenpending = FALSE; - BOOL input_line_buffered = line_buffered; -@@ -2339,7 +2344,7 @@ bufflength = fill_buffer(handle, frtype, main_buffer, bufsize, - input_line_buffered); - - #ifdef SUPPORT_LIBBZ2 --if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */ -+if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is PCRE2_SIZE; */ - #endif - - endptr = main_buffer + bufflength; -@@ -2368,8 +2373,8 @@ while (ptr < endptr) - unsigned int options = 0; - BOOL match; - char *t = ptr; -- size_t length, linelength; -- size_t startoffset = 0; -+ PCRE2_SIZE length, linelength; -+ PCRE2_SIZE startoffset = 0; - - /* At this point, ptr is at the start of a line. We need to find the length - of the subject string to pass to pcre2_match(). In multiline mode, it is the -@@ -2381,7 +2386,7 @@ while (ptr < endptr) - - t = end_of_line(t, endptr, &endlinelength); - linelength = t - ptr - endlinelength; -- length = multiline? (size_t)(endptr - ptr) : linelength; -+ length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength; - - /* Check to see if the line we are looking at extends right to the very end - of the buffer without a line terminator. This means the line is too long to -@@ -2560,7 +2565,7 @@ while (ptr < endptr) - { - if (!invert) - { -- size_t oldstartoffset; -+ PCRE2_SIZE oldstartoffset; - - if (printname != NULL) fprintf(stdout, "%s:", printname); - if (number) fprintf(stdout, "%lu:", linenumber); -@@ -2647,7 +2652,7 @@ while (ptr < endptr) - startoffset -= (int)(linelength + endlinelength); - t = end_of_line(ptr, endptr, &endlinelength); - linelength = t - ptr - endlinelength; -- length = (size_t)(endptr - ptr); -+ length = (PCRE2_SIZE)(endptr - ptr); - } - - goto ONLY_MATCHING_RESTART; -@@ -2812,7 +2817,7 @@ while (ptr < endptr) - endprevious -= (int)(linelength + endlinelength); - t = end_of_line(ptr, endptr, &endlinelength); - linelength = t - ptr - endlinelength; -- length = (size_t)(endptr - ptr); -+ length = (PCRE2_SIZE)(endptr - ptr); - } - - /* If startoffset is at the exact end of the line it means this -@@ -2895,7 +2900,7 @@ while (ptr < endptr) - /* If input is line buffered, and the buffer is not yet full, read another - line and add it into the buffer. */ - -- if (input_line_buffered && bufflength < (size_t)bufsize) -+ if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize) - { - int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in); - bufflength += add; -@@ -2907,7 +2912,7 @@ while (ptr < endptr) - 1/3 and refill it. Before we do this, if some unprinted "after" lines are - about to be lost, print them. */ - -- if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird) -+ if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird) - { - if (after_context > 0 && - lastmatchnumber > 0 && -@@ -3395,9 +3400,8 @@ PCRE2_SIZE patlen, erroffset; - PCRE2_UCHAR errmessbuffer[ERRBUFSIZ]; - - if (p->compiled != NULL) return TRUE; -- - ps = p->string; --patlen = strlen(ps); -+patlen = p->length; - - if ((options & PCRE2_LITERAL) != 0) - { -@@ -3407,8 +3411,8 @@ if ((options & PCRE2_LITERAL) != 0) - - if (ellength != 0) - { -- if (add_pattern(pe, p) == NULL) return FALSE; -- patlen = (int)(pe - ps - ellength); -+ patlen = pe - ps - ellength; -+ if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE; - } - } - -@@ -3470,6 +3474,7 @@ static BOOL - read_pattern_file(char *name, patstr **patptr, patstr **patlastptr) - { - int linenumber = 0; -+PCRE2_SIZE patlen; - FILE *f; - const char *filename; - char buffer[MAXPATLEN+20]; -@@ -3490,20 +3495,18 @@ else - filename = name; - } - --while (fgets(buffer, sizeof(buffer), f) != NULL) -+while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0) - { -- char *s = buffer + (int)strlen(buffer); -- while (s > buffer && isspace((unsigned char)(s[-1]))) s--; -- *s = 0; -+ while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--; - linenumber++; -- if (buffer[0] == 0) continue; /* Skip blank lines */ -+ if (patlen == 0) continue; /* Skip blank lines */ - - /* Note: this call to add_pattern() puts a pointer to the local variable - "buffer" into the pattern chain. However, that pointer is used only when - compiling the pattern, which happens immediately below, so we flatten it - afterwards, as a precaution against any later code trying to use it. */ - -- *patlastptr = add_pattern(buffer, *patlastptr); -+ *patlastptr = add_pattern(buffer, patlen, *patlastptr); - if (*patlastptr == NULL) - { - if (f != stdin) fclose(f); -@@ -3513,8 +3516,9 @@ while (fgets(buffer, sizeof(buffer), f) != NULL) - - /* This loop is needed because compiling a "pattern" when -F is set may add - on additional literal patterns if the original contains a newline. In the -- common case, it never will, because fgets() stops at a newline. However, -- the -N option can be used to give pcre2grep a different newline setting. */ -+ common case, it never will, because read_one_line() stops at a newline. -+ However, the -N option can be used to give pcre2grep a different newline -+ setting. */ - - for(;;) - { -@@ -3833,7 +3837,8 @@ for (i = 1; i < argc; i++) - else if (op->type == OP_PATLIST) - { - patdatastr *pd = (patdatastr *)op->dataptr; -- *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr)); -+ *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data), -+ *(pd->lastptr)); - if (*(pd->lastptr) == NULL) goto EXIT2; - if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr); - } -@@ -4095,7 +4100,9 @@ the first argument is the one and only pattern, and it must exist. */ - if (patterns == NULL && pattern_files == NULL) - { - if (i >= argc) return usage(2); -- patterns = patterns_last = add_pattern(argv[i++], NULL); -+ patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]), -+ NULL); -+ i++; - if (patterns == NULL) goto EXIT2; - } - -diff --git a/testdata/grepoutput b/testdata/grepoutput -index e49c2b2..9329248 100644 ---- a/testdata/grepoutput -+++ b/testdata/grepoutput -@@ -945,3 +945,6 @@ RC=0 - RC=0 - abcd - RC=0 -+---------------------------- Test 126 ----------------------------- -+ABCXYZ -+RC=0 --- -2.13.6 - diff --git a/pcre2-10.31-Fix-C-bug-with-repeated-character-classes-in-UTF-8-m.patch b/pcre2-10.31-Fix-C-bug-with-repeated-character-classes-in-UTF-8-m.patch deleted file mode 100644 index b920614..0000000 --- a/pcre2-10.31-Fix-C-bug-with-repeated-character-classes-in-UTF-8-m.patch +++ /dev/null @@ -1,120 +0,0 @@ -From ea6f7a508aaa2fd61eb60d7759fe00713f46cd5c Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Mon, 19 Feb 2018 17:26:33 +0000 -Subject: [PATCH] Fix \C bug with repeated character classes in UTF-8 mode. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@918 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.31. ---- - src/pcre2_match.c | 16 ++++++++++++---- - testdata/testinput22 | 3 +++ - testdata/testoutput22-16 | 4 ++++ - testdata/testoutput22-32 | 4 ++++ - testdata/testoutput22-8 | 4 ++++ - -diff --git a/src/pcre2_match.c b/src/pcre2_match.c -index 79cc93f..ce96016 100644 ---- a/src/pcre2_match.c -+++ b/src/pcre2_match.c -@@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode); - - if (reptype == REPTYPE_POS) continue; /* No backtracking */ - -+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a -+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't -+ go too far. */ -+ - for (;;) - { - RMATCH(Fecode, RM201); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); -- if (Feptr-- == Lstart_eptr) break; /* Tried at original position */ -+ if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */ - BACKCHAR(Feptr); - } - } -@@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode); - - if (reptype == REPTYPE_POS) continue; /* No backtracking */ - -+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a -+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't -+ go too far. */ -+ - for(;;) - { - RMATCH(Fecode, RM101); - if (rrc != MATCH_NOMATCH) RRETURN(rrc); -- if (Feptr-- == Lstart_eptr) break; /* Tried at original position */ -+ if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */ - #ifdef SUPPORT_UNICODE - if (utf) BACKCHAR(Feptr); - #endif -@@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode); - if (reptype == REPTYPE_POS) continue; /* No backtracking */ - - /* After \C in UTF mode, Lstart_eptr might be in the middle of a -- Unicode character. Use <= pp to ensure backtracking doesn't go too far. -- */ -+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't -+ go too far. */ - - for(;;) - { -diff --git a/testdata/testinput22 b/testdata/testinput22 -index e6d4053..c218ea6 100644 ---- a/testdata/testinput22 -+++ b/testdata/testinput22 -@@ -98,4 +98,7 @@ - \= Expect no match - tests \C at end of subject - ab - -+/\C[^\v]+\x80/utf -+ [AΏBŀC] -+ - # End of testinput22 -diff --git a/testdata/testoutput22-16 b/testdata/testoutput22-16 -index 88f827c..5e23611 100644 ---- a/testdata/testoutput22-16 -+++ b/testdata/testoutput22-16 -@@ -171,4 +171,8 @@ No match - ab - No match - -+/\C[^\v]+\x80/utf -+ [AΏBŀC] -+No match -+ - # End of testinput22 -diff --git a/testdata/testoutput22-32 b/testdata/testoutput22-32 -index ac485fc..8576f31 100644 ---- a/testdata/testoutput22-32 -+++ b/testdata/testoutput22-32 -@@ -169,4 +169,8 @@ No match - ab - No match - -+/\C[^\v]+\x80/utf -+ [AΏBŀC] -+No match -+ - # End of testinput22 -diff --git a/testdata/testoutput22-8 b/testdata/testoutput22-8 -index 3d31fbc..8543652 100644 ---- a/testdata/testoutput22-8 -+++ b/testdata/testoutput22-8 -@@ -173,4 +173,8 @@ No match - ab - No match - -+/\C[^\v]+\x80/utf -+ [AΏBŀC] -+No match -+ - # End of testinput22 --- -2.13.6 - diff --git a/pcre2-10.31-Fix-bug-in-VERSION-number-reading.patch b/pcre2-10.31-Fix-bug-in-VERSION-number-reading.patch deleted file mode 100644 index c73030a..0000000 --- a/pcre2-10.31-Fix-bug-in-VERSION-number-reading.patch +++ /dev/null @@ -1,73 +0,0 @@ -From c75868f77eb2ce2ff277355afcd966e3179e65a8 Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Mon, 2 Jul 2018 12:26:04 +0000 -Subject: [PATCH] Fix bug in VERSION number reading. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@957 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.31. ---- - src/pcre2_compile.c | 5 ++--- - testdata/testinput2 | 3 +++ - testdata/testoutput2 | 6 +++++- - 4 files changed, 13 insertions(+), 4 deletions(-) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 5a47f1d..1208012 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -3896,9 +3896,8 @@ while (ptr < ptrend) - if (*ptr == CHAR_DOT) - { - if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION; -- if (!read_number(&ptr, ptrend, -1, 99 , ERR79, &minor, &errorcode)) -- goto FAILED; -- if (minor < 10) minor *= 10; -+ minor = (*ptr++ - CHAR_0) * 10; -+ if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0; - if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) - goto BAD_VERSION_CONDITION; - } -diff --git a/testdata/testinput2 b/testdata/testinput2 -index cc4b59b..9b207ef 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -4007,6 +4007,9 @@ - /(?(VERSION>=10.0)yes|no)/I - yesno - -+/(?(VERSION>=10.04)yes|no)/ -+ yesno -+ - /(?(VERSION=8)yes){3}/BI,aftertext - yesno - -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index aab0c94..124a8b6 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -13483,6 +13483,10 @@ Subject length lower bound = 2 - yesno - 0: yes - -+/(?(VERSION>=10.04)yes|no)/ -+ yesno -+ 0: yes -+ - /(?(VERSION=8)yes){3}/BI,aftertext - ------------------------------------------------------------------ - Bra -@@ -13537,7 +13541,7 @@ Failed: error 179 at offset 11: syntax error or number too big in (?(VERSION con - Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition - - /(?(VERSION=10.101)yes|no)/ --Failed: error 179 at offset 17: syntax error or number too big in (?(VERSION condition -+Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition - - /abcd/I - Capturing subpattern count = 0 --- -2.14.4 - diff --git a/pcre2-10.31-Fix-bug-when-K-is-used-in-a-lookbehind-in-a-substitu.patch b/pcre2-10.31-Fix-bug-when-K-is-used-in-a-lookbehind-in-a-substitu.patch deleted file mode 100644 index 84e7aa5..0000000 --- a/pcre2-10.31-Fix-bug-when-K-is-used-in-a-lookbehind-in-a-substitu.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 0efedaf8864d1caa8ed0e7f8fb0b50d5231cacfa Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Fri, 22 Jun 2018 16:29:56 +0000 -Subject: [PATCH] Fix bug when \K is used in a lookbehind in a substitute - pattern. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@948 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař : Ported to 10.31. - -Signed-off-by: Petr Písař ---- - doc/html/pcre2api.html | 14 ++++++++++++-- - doc/pcre2.txt | 14 ++++++++++++-- - doc/pcre2api.3 | 3 ++- - src/pcre2_error.c | 2 +- - src/pcre2_substitute.c | 6 +++--- - testdata/testinput2 | 3 +++ - testdata/testoutput2 | 6 +++++- - 7 files changed, 38 insertions(+), 10 deletions(-) - -diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html -index ba3b2ca..af904e6 100644 ---- a/doc/html/pcre2api.html -+++ b/doc/html/pcre2api.html -@@ -2549,7 +2549,7 @@ calls to pcre2_match() if you are making repeated calls to find other - matches in the same subject string. -

-

--WARNING: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid -+Warning: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid - string as a subject, or an invalid value of startoffset, is undefined. - Your program may crash or loop indefinitely. -

-@@ -2756,6 +2756,15 @@ branch of the group, but it is not on the matching path. On the other hand,
- when this pattern fails to match "bx", the returned name is B.
- 

-

-+Warning: By default, certain start-of-match optimizations are used to -+give a fast "no match" result in some situations. For example, if the anchoring -+is removed from the pattern above, there is an initial check for the presence -+of "c" in the subject before running the matching engine. This check fails for -+"bx", causing a match failure without seeing any marks. You can disable the -+start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option for -+pcre2_compile() or starting the pattern with (*NO_START_OPT). -+

-+

- After a successful match, a partial match, or one of the invalid UTF errors - (for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can be - called. After a successful or partial match it returns the code unit offset of -@@ -3310,7 +3319,8 @@ replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE - (invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket - not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group - substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before --it started, which can happen if \K is used in an assertion). -+it started or the match started earlier than the current position in the -+subject, which can happen if \K is used in an assertion). -

-

- As for all PCRE2 errors, a text message that describes the error can be -diff --git a/doc/pcre2.txt b/doc/pcre2.txt -index 79d94e3..e5b941f 100644 ---- a/doc/pcre2.txt -+++ b/doc/pcre2.txt -@@ -2498,7 +2498,7 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION - second and subsequent calls to pcre2_match() if you are making repeated - calls to find other matches in the same subject string. - -- WARNING: When PCRE2_NO_UTF_CHECK is set, the effect of passing an -+ Warning: When PCRE2_NO_UTF_CHECK is set, the effect of passing an - invalid string as a subject, or an invalid value of startoffset, is - undefined. Your program may crash or loop indefinitely. - -@@ -2683,6 +2683,15 @@ OTHER INFORMATION ABOUT A MATCH - the other hand, when this pattern fails to match "bx", the returned - name is B. - -+ Warning: By default, certain start-of-match optimizations are used to -+ give a fast "no match" result in some situations. For example, if the -+ anchoring is removed from the pattern above, there is an initial check -+ for the presence of "c" in the subject before running the matching -+ engine. This check fails for "bx", causing a match failure without see- -+ ing any marks. You can disable the start-of-match optimizations by set- -+ ting the PCRE2_NO_START_OPTIMIZE option for pcre2_compile() or starting -+ the pattern with (*NO_START_OPT). -+ - After a successful match, a partial match, or one of the invalid UTF - errors (for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can - be called. After a successful or partial match it returns the code unit -@@ -3209,7 +3218,8 @@ CREATING A NEW STRING WITH SUBSTITUTIONS - PCRE2_ERROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REP- - MISSINGBRACE (closing curly bracket not found), PCRE2_ERROR_BADSUBSTI- - TUTION (syntax error in extended group substitution), and -- PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before it started, -+ PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before it started -+ or the match started earlier than the current position in the subject, - which can happen if \K is used in an assertion). - - As for all PCRE2 errors, a text message that describes the error can be -diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 -index 786b314..ac6e246 100644 ---- a/doc/pcre2api.3 -+++ b/doc/pcre2api.3 -@@ -3302,7 +3302,8 @@ replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE - (invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket - not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group - substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before --it started, which can happen if \eK is used in an assertion). -+it started or the match started earlier than the current position in the -+subject, which can happen if \eK is used in an assertion). - .P - As for all PCRE2 errors, a text message that describes the error can be - obtained by calling the \fBpcre2_get_error_message()\fP function (see -diff --git a/src/pcre2_error.c b/src/pcre2_error.c -index d98cae9..a1f98d4 100644 ---- a/src/pcre2_error.c -+++ b/src/pcre2_error.c -@@ -255,7 +255,7 @@ static const unsigned char match_error_texts[] = - "expected closing curly bracket in replacement string\0" - "bad substitution in replacement string\0" - /* 60 */ -- "match with end before start is not supported\0" -+ "match with end before start or start moved backwards is not supported\0" - "too many replacements (more than INT_MAX)\0" - "bad serialized data\0" - "heap limit exceeded\0" -diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c -index 8da951f..955370a 100644 ---- a/src/pcre2_substitute.c -+++ b/src/pcre2_substitute.c -@@ -361,9 +361,9 @@ do - } - - /* Handle a successful match. Matches that use \K to end before they start -- are not supported. */ -- -- if (ovector[1] < ovector[0]) -+ or start before the current point in the subject are not supported. */ -+ -+ if (ovector[1] < ovector[0] || ovector[0] < start_offset) - { - rc = PCRE2_ERROR_BADSUBSPATTERN; - goto EXIT; -diff --git a/testdata/testinput2 b/testdata/testinput2 -index 5d3a80e..3499042 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -4643,6 +4643,9 @@ B)x/alt_verbnames,mark - - /(?=a\K)/replace=z - BaCaD -+ -+/(?<=\K.)/g,replace=- -+ ab - - /(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/ - -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index fcaac8f..f9e128d 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -14899,7 +14899,11 @@ Subject length lower bound = 1 - - /(?=a\K)/replace=z - BaCaD --Failed: error -60: match with end before start is not supported -+Failed: error -60: match with end before start or start moved backwards is not supported -+ -+/(?<=\K.)/g,replace=- -+ ab -+Failed: error -60: match with end before start or start moved backwards is not supported - - /(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/ - Failed: error 148 at offset 36: subpattern name is too long (maximum 32 characters) --- -2.14.4 - diff --git a/pcre2-10.31-Fix-dynamic-options-changing-bug.patch b/pcre2-10.31-Fix-dynamic-options-changing-bug.patch deleted file mode 100644 index 3173c75..0000000 --- a/pcre2-10.31-Fix-dynamic-options-changing-bug.patch +++ /dev/null @@ -1,139 +0,0 @@ -From 1247796cd3cffa4cfea368decfdbaf13b276bfe3 Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Sat, 4 Aug 2018 08:20:18 +0000 -Subject: [PATCH] Fix dynamic options changing bug. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@979 6239d852-aaf2-0410-a92c-79f79f948069 - -Petr Písař: Ported to 10.31. - -Signed-off-by: Petr Písař ---- - src/pcre2_compile.c | 29 +++++++++++++++++------------ - testdata/testinput1 | 5 +++++ - testdata/testoutput1 | 8 ++++++++ - 3 files changed, 30 insertions(+), 12 deletions(-) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 1d62a38..9898d06 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -2251,11 +2251,14 @@ typedef struct nest_save { - #define NSF_RESET 0x0001u - #define NSF_CONDASSERT 0x0002u - --/* Of the options that are changeable within the pattern, these are tracked --during parsing. The rest are used from META_OPTIONS items when compiling. */ -+/* Options that are changeable within the pattern must be tracked during -+parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing, -+but all must be tracked so that META_OPTIONS items set the correct values for -+the main compiling phase. */ - --#define PARSE_TRACKED_OPTIONS \ -- (PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE) -+#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \ -+ PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \ -+ PCRE2_UNGREEDY) - - /* States used for analyzing ranges in character classes. The two OK values - must be last. */ -@@ -2434,16 +2437,16 @@ while (ptr < ptrend) - /* EITHER: not both options set */ - ((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) != - (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) || --#ifdef SUPPORT_UNICODE -+#ifdef SUPPORT_UNICODE - /* OR: character > 255 AND not Unicode Pattern White Space */ - (c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) || --#endif -+#endif - /* OR: not a # comment or isspace() white space */ - (c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0 - #ifdef SUPPORT_UNICODE - /* and not CHAR_NEL when Unicode is supported */ - && c != CHAR_NEL --#endif -+#endif - ))) - { - PCRE2_SIZE verbnamelength; -@@ -2518,16 +2521,16 @@ while (ptr < ptrend) - character, not a code unit, so we must not use MAX_255 to test its size - because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The - whitespace characters are those designated as "Pattern White Space" by -- Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is -- U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a -+ Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is -+ U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a - subset of space characters that match \h and \v. */ - - if ((options & PCRE2_EXTENDED) != 0) - { - if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue; --#ifdef SUPPORT_UNICODE -+#ifdef SUPPORT_UNICODE - if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue; --#endif -+#endif - if (c == CHAR_NUMBER_SIGN) - { - while (ptr < ptrend) -@@ -3534,6 +3537,8 @@ while (ptr < ptrend) - - else - { -+ uint32_t oldoptions = options; -+ - top_nest->reset_group = 0; - top_nest->max_group = 0; - set = unset = 0; -@@ -3604,7 +3609,7 @@ while (ptr < ptrend) - - /* If nothing changed, no need to record. */ - -- if (set != 0 || unset != 0) -+ if (options != oldoptions) - { - *parsed_pattern++ = META_OPTIONS; - *parsed_pattern++ = options; -diff --git a/testdata/testinput1 b/testdata/testinput1 -index cc11288..5b9c4df 100644 ---- a/testdata/testinput1 -+++ b/testdata/testinput1 -@@ -2184,6 +2184,11 @@ - Blah blah - blaH blah - -+/((?i)blah)\s+(?m)A(?i:\1)/ -+ blah ABLAH -+\= Expect no match -+ blah aBLAH -+ - /(?>a*)*/ - a - aa -diff --git a/testdata/testoutput1 b/testdata/testoutput1 -index 2fd2d48..f58076f 100644 ---- a/testdata/testoutput1 -+++ b/testdata/testoutput1 -@@ -3346,6 +3346,14 @@ No match - 0: blaH blah - 1: blaH - -+/((?i)blah)\s+(?m)A(?i:\1)/ -+ blah ABLAH -+ 0: blah ABLAH -+ 1: blah -+\= Expect no match -+ blah aBLAH -+No match -+ - /(?>a*)*/ - a - 0: a --- -2.14.4 - diff --git a/pcre2-10.31-Fix-global-search-replace-in-pcre2test-and-pcre2_sub.patch b/pcre2-10.31-Fix-global-search-replace-in-pcre2test-and-pcre2_sub.patch deleted file mode 100644 index 69f190e..0000000 --- a/pcre2-10.31-Fix-global-search-replace-in-pcre2test-and-pcre2_sub.patch +++ /dev/null @@ -1,708 +0,0 @@ -From 7729d10594572b5e5a3ebfa89064cc176ba50c7e Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Mon, 2 Jul 2018 10:54:03 +0000 -Subject: [PATCH] Fix global search/replace in pcre2test and pcre2_substitute() - when the pattern matches an empty string, but never at the starting offset. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@955 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.31. - -Signed-off-by: Petr Písař ---- - RunTest | 2 +- - doc/html/pcre2api.html | 5 +- - doc/html/pcre2pattern.html | 5 +- - doc/pcre2.txt | 175 ++++++++++++++++++++++++--------------------- - doc/pcre2api.3 | 5 +- - src/pcre2.h.in | 3 +- - src/pcre2_error.c | 4 +- - src/pcre2_substitute.c | 41 +++++++++-- - src/pcre2test.c | 77 ++++++++++++-------- - testdata/testinput1 | 3 + - testdata/testinput2 | 3 + - testdata/testoutput1 | 9 +++ - testdata/testoutput2 | 6 +- - 13 files changed, 214 insertions(+), 124 deletions(-) - -diff --git a/RunTest b/RunTest -index bc912da..f20f194 100755 ---- a/RunTest -+++ b/RunTest -@@ -500,7 +500,7 @@ for bmode in "$test8" "$test16" "$test32"; do - for opt in "" $jitopt; do - $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry - if [ $? = 0 ] ; then -- $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -65,-62,-2,-1,0,100,101,191,200 >>testtry -+ $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -70,-62,-2,-1,0,100,101,191,200 >>testtry - checkresult $? 2 "$opt" - fi - done -diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html -index ba3b2ca..daa32a9 100644 ---- a/doc/html/pcre2api.html -+++ b/doc/html/pcre2api.html -@@ -3108,7 +3108,10 @@ string in outputbuffer, replacing the part that was matched with the - replacement string, whose length is supplied in rlength. This can - be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in - which a \K item in a lookahead in the pattern causes the match to end before --it starts are not supported, and give rise to an error return. -+it starts are not supported, and give rise to an error return. For global -+replacements, matches in which \K in a lookbehind causes the match to start -+earlier than the point that was reached in the previous iteration are also not -+supported. -

-

- The first seven arguments of pcre2_substitute() are the same as for -diff --git a/doc/html/pcre2pattern.html b/doc/html/pcre2pattern.html -index c495cba..bc07e8b 100644 ---- a/doc/html/pcre2pattern.html -+++ b/doc/html/pcre2pattern.html -@@ -1082,8 +1082,9 @@ sequences but the characters that they represent.) - Resetting the match start -
-

--The escape sequence \K causes any previously matched characters not to be --included in the final matched sequence. For example, the pattern: -+In normal use, the escape sequence \K causes any previously matched characters -+not to be included in the final matched sequence that is returned. For example, -+the pattern: -

-   foo\Kbar
- 
-diff --git a/doc/pcre2.txt b/doc/pcre2.txt -index 79d94e3..a82f857 100644 ---- a/doc/pcre2.txt -+++ b/doc/pcre2.txt -@@ -3014,75 +3014,78 @@ CREATING A NEW STRING WITH SUBSTITUTIONS - replacement string, whose length is supplied in rlength. This can be - given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in - which a \K item in a lookahead in the pattern causes the match to end -- before it starts are not supported, and give rise to an error return. -+ before it starts are not supported, and give rise to an error return. -+ For global replacements, matches in which \K in a lookbehind causes the -+ match to start earlier than the point that was reached in the previous -+ iteration are also not supported. - -- The first seven arguments of pcre2_substitute() are the same as for -+ The first seven arguments of pcre2_substitute() are the same as for - pcre2_match(), except that the partial matching options are not permit- -- ted, and match_data may be passed as NULL, in which case a match data -- block is obtained and freed within this function, using memory manage- -- ment functions from the match context, if provided, or else those that -+ ted, and match_data may be passed as NULL, in which case a match data -+ block is obtained and freed within this function, using memory manage- -+ ment functions from the match context, if provided, or else those that - were used to allocate memory for the compiled code. - -- The outlengthptr argument must point to a variable that contains the -- length, in code units, of the output buffer. If the function is suc- -- cessful, the value is updated to contain the length of the new string, -+ The outlengthptr argument must point to a variable that contains the -+ length, in code units, of the output buffer. If the function is suc- -+ cessful, the value is updated to contain the length of the new string, - excluding the trailing zero that is automatically added. - -- If the function is not successful, the value set via outlengthptr -- depends on the type of error. For syntax errors in the replacement -- string, the value is the offset in the replacement string where the -- error was detected. For other errors, the value is PCRE2_UNSET by -- default. This includes the case of the output buffer being too small, -- unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set (see below), in which -- case the value is the minimum length needed, including space for the -- trailing zero. Note that in order to compute the required length, -- pcre2_substitute() has to simulate all the matching and copying, -+ If the function is not successful, the value set via outlengthptr -+ depends on the type of error. For syntax errors in the replacement -+ string, the value is the offset in the replacement string where the -+ error was detected. For other errors, the value is PCRE2_UNSET by -+ default. This includes the case of the output buffer being too small, -+ unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set (see below), in which -+ case the value is the minimum length needed, including space for the -+ trailing zero. Note that in order to compute the required length, -+ pcre2_substitute() has to simulate all the matching and copying, - instead of giving an error return as soon as the buffer overflows. Note - also that the length is in code units, not bytes. - -- In the replacement string, which is interpreted as a UTF string in UTF -- mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK -+ In the replacement string, which is interpreted as a UTF string in UTF -+ mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK - option is set, a dollar character is an escape character that can spec- -- ify the insertion of characters from capturing groups or (*MARK), -- (*PRUNE), or (*THEN) items in the pattern. The following forms are -+ ify the insertion of characters from capturing groups or (*MARK), -+ (*PRUNE), or (*THEN) items in the pattern. The following forms are - always recognized: - - $$ insert a dollar character - $ or ${} insert the contents of group - $*MARK or ${*MARK} insert a (*MARK), (*PRUNE), or (*THEN) name - -- Either a group number or a group name can be given for . Curly -- brackets are required only if the following character would be inter- -+ Either a group number or a group name can be given for . Curly -+ brackets are required only if the following character would be inter- - preted as part of the number or name. The number may be zero to include -- the entire matched string. For example, if the pattern a(b)c is -- matched with "=abc=" and the replacement string "+$1$0$1+", the result -+ the entire matched string. For example, if the pattern a(b)c is -+ matched with "=abc=" and the replacement string "+$1$0$1+", the result - is "=+babcb+=". - - $*MARK inserts the name from the last encountered (*MARK), (*PRUNE), or -- (*THEN) on the matching path that has a name. (*MARK) must always -- include a name, but (*PRUNE) and (*THEN) need not. For example, in the -- case of (*MARK:A)(*PRUNE) the name inserted is "A", but for -- (*MARK:A)(*PRUNE:B) the relevant name is "B". This facility can be -- used to perform simple simultaneous substitutions, as this pcre2test -+ (*THEN) on the matching path that has a name. (*MARK) must always -+ include a name, but (*PRUNE) and (*THEN) need not. For example, in the -+ case of (*MARK:A)(*PRUNE) the name inserted is "A", but for -+ (*MARK:A)(*PRUNE:B) the relevant name is "B". This facility can be -+ used to perform simple simultaneous substitutions, as this pcre2test - example shows: - - /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK} - apple lemon - 2: pear orange - -- As well as the usual options for pcre2_match(), a number of additional -+ As well as the usual options for pcre2_match(), a number of additional - options can be set in the options argument of pcre2_substitute(). - - PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject -- string, replacing every matching substring. If this option is not set, -- only the first matching substring is replaced. The search for matches -- takes place in the original subject string (that is, previous replace- -- ments do not affect it). Iteration is implemented by advancing the -- startoffset value for each search, which is always passed the entire -+ string, replacing every matching substring. If this option is not set, -+ only the first matching substring is replaced. The search for matches -+ takes place in the original subject string (that is, previous replace- -+ ments do not affect it). Iteration is implemented by advancing the -+ startoffset value for each search, which is always passed the entire - subject string. If an offset limit is set in the match context, search- - ing stops when that limit is reached. - -- You can restrict the effect of a global substitution to a portion of -+ You can restrict the effect of a global substitution to a portion of - the subject string by setting either or both of startoffset and an off- - set limit. Here is a pcre2test example: - -@@ -3090,87 +3093,87 @@ CREATING A NEW STRING WITH SUBSTITUTIONS - ABC ABC ABC ABC\=offset=3,offset_limit=12 - 2: ABC A!C A!C ABC - -- When continuing with global substitutions after matching a substring -+ When continuing with global substitutions after matching a substring - with zero length, an attempt to find a non-empty match at the same off- - set is performed. If this is not successful, the offset is advanced by - one character except when CRLF is a valid newline sequence and the next -- two characters are CR, LF. In this case, the offset is advanced by two -+ two characters are CR, LF. In this case, the offset is advanced by two - characters. - -- PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output -+ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output - buffer is too small. The default action is to return PCRE2_ERROR_NOMEM- -- ORY immediately. If this option is set, however, pcre2_substitute() -+ ORY immediately. If this option is set, however, pcre2_substitute() - continues to go through the motions of matching and substituting (with- -- out, of course, writing anything) in order to compute the size of buf- -- fer that is needed. This value is passed back via the outlengthptr -- variable, with the result of the function still being -+ out, of course, writing anything) in order to compute the size of buf- -+ fer that is needed. This value is passed back via the outlengthptr -+ variable, with the result of the function still being - PCRE2_ERROR_NOMEMORY. - -- Passing a buffer size of zero is a permitted way of finding out how -- much memory is needed for given substitution. However, this does mean -+ Passing a buffer size of zero is a permitted way of finding out how -+ much memory is needed for given substitution. However, this does mean - that the entire operation is carried out twice. Depending on the appli- -- cation, it may be more efficient to allocate a large buffer and free -- the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER- -+ cation, it may be more efficient to allocate a large buffer and free -+ the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER- - FLOW_LENGTH. - -- PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capturing groups -- that do not appear in the pattern to be treated as unset groups. This -- option should be used with care, because it means that a typo in a -- group name or number no longer causes the PCRE2_ERROR_NOSUBSTRING -+ PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capturing groups -+ that do not appear in the pattern to be treated as unset groups. This -+ option should be used with care, because it means that a typo in a -+ group name or number no longer causes the PCRE2_ERROR_NOSUBSTRING - error. - -- PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capturing groups (including -+ PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capturing groups (including - unknown groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be -- treated as empty strings when inserted as described above. If this -- option is not set, an attempt to insert an unset group causes the -- PCRE2_ERROR_UNSET error. This option does not influence the extended -+ treated as empty strings when inserted as described above. If this -+ option is not set, an attempt to insert an unset group causes the -+ PCRE2_ERROR_UNSET error. This option does not influence the extended - substitution syntax described below. - -- PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the -- replacement string. Without this option, only the dollar character is -- special, and only the group insertion forms listed above are valid. -+ PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the -+ replacement string. Without this option, only the dollar character is -+ special, and only the group insertion forms listed above are valid. - When PCRE2_SUBSTITUTE_EXTENDED is set, two things change: - -- Firstly, backslash in a replacement string is interpreted as an escape -+ Firstly, backslash in a replacement string is interpreted as an escape - character. The usual forms such as \n or \x{ddd} can be used to specify -- particular character codes, and backslash followed by any non-alphanu- -- meric character quotes that character. Extended quoting can be coded -+ particular character codes, and backslash followed by any non-alphanu- -+ meric character quotes that character. Extended quoting can be coded - using \Q...\E, exactly as in pattern strings. - -- There are also four escape sequences for forcing the case of inserted -- letters. The insertion mechanism has three states: no case forcing, -+ There are also four escape sequences for forcing the case of inserted -+ letters. The insertion mechanism has three states: no case forcing, - force upper case, and force lower case. The escape sequences change the - current state: \U and \L change to upper or lower case forcing, respec- -- tively, and \E (when not terminating a \Q quoted sequence) reverts to -- no case forcing. The sequences \u and \l force the next character (if -- it is a letter) to upper or lower case, respectively, and then the -+ tively, and \E (when not terminating a \Q quoted sequence) reverts to -+ no case forcing. The sequences \u and \l force the next character (if -+ it is a letter) to upper or lower case, respectively, and then the - state automatically reverts to no case forcing. Case forcing applies to - all inserted characters, including those from captured groups and let- - ters within \Q...\E quoted sequences. - - Note that case forcing sequences such as \U...\E do not nest. For exam- -- ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final -+ ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final - \E has no effect. - -- The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more -- flexibility to group substitution. The syntax is similar to that used -+ The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more -+ flexibility to group substitution. The syntax is similar to that used - by Bash: - - ${:-} - ${:+:} - -- As before, may be a group number or a name. The first form speci- -- fies a default value. If group is set, its value is inserted; if -- not, is expanded and the result inserted. The second form -- specifies strings that are expanded and inserted when group is set -- or unset, respectively. The first form is just a convenient shorthand -+ As before, may be a group number or a name. The first form speci- -+ fies a default value. If group is set, its value is inserted; if -+ not, is expanded and the result inserted. The second form -+ specifies strings that are expanded and inserted when group is set -+ or unset, respectively. The first form is just a convenient shorthand - for - - ${:+${}:} - -- Backslash can be used to escape colons and closing curly brackets in -- the replacement strings. A change of the case forcing state within a -- replacement string remains in force afterwards, as shown in this -+ Backslash can be used to escape colons and closing curly brackets in -+ the replacement strings. A change of the case forcing state within a -+ replacement string remains in force afterwards, as shown in this - pcre2test example: - - /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo -@@ -6614,8 +6617,9 @@ BACKSLASH - - Resetting the match start - -- The escape sequence \K causes any previously matched characters not to -- be included in the final matched sequence. For example, the pattern: -+ In normal use, the escape sequence \K causes any previously matched -+ characters not to be included in the final matched sequence that is -+ returned. For example, the pattern: - - foo\Kbar - -@@ -6634,7 +6638,16 @@ BACKSLASH - defined". In PCRE2, \K is acted upon when it occurs inside positive - assertions, but is ignored in negative assertions. Note that when a - pattern such as (?=ab\K) matches, the reported start of the match can -- be greater than the end of the match. -+ be greater than the end of the match. Using \K in a lookbehind asser- -+ tion at the start of a pattern can also lead to odd effects. For exam- -+ ple, consider this pattern: -+ -+ (?<=\Kfoo)bar -+ -+ If the subject is "foobar", a call to pcre2_match() with a starting -+ offset of 3 succeeds and reports the matching string as "foobar", that -+ is, the start of the reported match is earlier than where the match -+ started. - - Simple assertions - -diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 -index 786b314..57b6d31 100644 ---- a/doc/pcre2api.3 -+++ b/doc/pcre2api.3 -@@ -3122,7 +3122,10 @@ string in \fIoutputbuffer\fP, replacing the part that was matched with the - \fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can - be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in - which a \eK item in a lookahead in the pattern causes the match to end before --it starts are not supported, and give rise to an error return. -+it starts are not supported, and give rise to an error return. For global -+replacements, matches in which \eK in a lookbehind causes the match to start -+earlier than the point that was reached in the previous iteration are also not -+supported. - .P - The first seven arguments of \fBpcre2_substitute()\fP are the same as for - \fBpcre2_match()\fP, except that the partial matching options are not -diff --git a/src/pcre2.h.in b/src/pcre2.h.in -index a3a3fa6..0bc8cca 100644 ---- a/src/pcre2.h.in -+++ b/src/pcre2.h.in -@@ -5,7 +5,7 @@ - /* This is the public header file for the PCRE library, second API, to be - #included by applications that call PCRE2 functions. - -- Copyright (c) 2016-2017 University of Cambridge -+ Copyright (c) 2016-2018 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -387,6 +387,7 @@ released, the numbers must not be changed. */ - #define PCRE2_ERROR_BADSERIALIZEDDATA (-62) - #define PCRE2_ERROR_HEAPLIMIT (-63) - #define PCRE2_ERROR_CONVERT_SYNTAX (-64) -+#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65) - - - /* Request types for pcre2_pattern_info() */ -diff --git a/src/pcre2_error.c b/src/pcre2_error.c -index d98cae9..dce1efb 100644 ---- a/src/pcre2_error.c -+++ b/src/pcre2_error.c -@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Original API code Copyright (c) 1997-2012 University of Cambridge -- New API code Copyright (c) 2016-2017 University of Cambridge -+ New API code Copyright (c) 2016-2018 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -260,6 +260,8 @@ static const unsigned char match_error_texts[] = - "bad serialized data\0" - "heap limit exceeded\0" - "invalid syntax\0" -+ /* 65 */ -+ "internal error - duplicate substitution match\0" - ; - - -diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c -index 8da951f..582a42d 100644 ---- a/src/pcre2_substitute.c -+++ b/src/pcre2_substitute.c -@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Original API code Copyright (c) 1997-2012 University of Cambridge -- New API code Copyright (c) 2016 University of Cambridge -+ New API code Copyright (c) 2016-2018 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -238,10 +238,12 @@ PCRE2_SPTR repend; - PCRE2_SIZE extra_needed = 0; - PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength; - PCRE2_SIZE *ovector; -+PCRE2_SIZE ovecsave[3]; - - buff_offset = 0; - lengthleft = buff_length = *blength; - *blength = PCRE2_UNSET; -+ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET; - - /* Partial matching is not valid. */ - -@@ -368,6 +370,26 @@ do - rc = PCRE2_ERROR_BADSUBSPATTERN; - goto EXIT; - } -+ -+ /* Check for the same match as previous. This is legitimate after matching an -+ empty string that starts after the initial match offset. We have tried again -+ at the match point in case the pattern is one like /(?<=\G.)/ which can never -+ match at its starting point, so running the match achieves the bumpalong. If -+ we do get the same (null) match at the original match point, it isn't such a -+ pattern, so we now do the empty string magic. In all other cases, a repeat -+ match should never occur. */ -+ -+ if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) -+ { -+ if (ovector[0] == ovector[1] && ovecsave[2] != start_offset) -+ { -+ goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; -+ ovecsave[2] = start_offset; -+ continue; /* Back to the top of the loop */ -+ } -+ rc = PCRE2_ERROR_INTERNAL_DUPMATCH; -+ goto EXIT; -+ } - - /* Count substitutions with a paranoid check for integer overflow; surely no - real call to this function would ever hit this! */ -@@ -799,13 +821,18 @@ do - } /* End handling a literal code unit */ - } /* End of loop for scanning the replacement. */ - -- /* The replacement has been copied to the output. Update the start offset to -- point to the rest of the subject string. If we matched an empty string, -- do the magic for global matches. */ -- -- start_offset = ovector[1]; -- goptions = (ovector[0] != ovector[1])? 0 : -+ /* The replacement has been copied to the output. Save the details of this -+ match. See above for how this data is used. If we matched an empty string, do -+ the magic for global matches. Finally, update the start offset to point to -+ the rest of the subject string. */ -+ -+ ovecsave[0] = ovector[0]; -+ ovecsave[1] = ovector[1]; -+ ovecsave[2] = start_offset; -+ -+ goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 : - PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART; -+ start_offset = ovector[1]; - } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */ - - /* Copy the rest of the subject. */ -diff --git a/src/pcre2test.c b/src/pcre2test.c -index ad3db2c..d83aa43 100644 ---- a/src/pcre2test.c -+++ b/src/pcre2test.c -@@ -6283,6 +6283,7 @@ size_t needlen; - void *use_dat_context; - BOOL utf; - BOOL subject_literal; -+PCRE2_SIZE ovecsave[3]; - - #ifdef SUPPORT_PCRE2_8 - uint8_t *q8 = NULL; -@@ -6929,6 +6930,9 @@ if (dat_datctl.replacement[0] != 0) - - if (timeitm) - fprintf(outfile, "** Timing is not supported with replace: ignored\n"); -+ -+ if ((dat_datctl.control & CTL_ALTGLOBAL) != 0) -+ fprintf(outfile, "** Altglobal is not supported with replace: ignored\n"); - - xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 : - PCRE2_SUBSTITUTE_GLOBAL) | -@@ -7048,35 +7052,24 @@ if (dat_datctl.replacement[0] != 0) - } - - fprintf(outfile, "\n"); -+ show_memory = FALSE; -+ return PR_OK; - } /* End of substitution handling */ - - /* When a replacement string is not provided, run a loop for global matching --with one of the basic matching functions. */ -+with one of the basic matching functions. For altglobal (or first time round -+the loop), set an "unset" value for the previous match info. */ -+ -+ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET; - --else for (gmatched = 0;; gmatched++) -+for (gmatched = 0;; gmatched++) - { - PCRE2_SIZE j; - int capcount; - PCRE2_SIZE *ovector; -- PCRE2_SIZE ovecsave[2]; - - ovector = FLD(match_data, ovector); - -- /* After the first time round a global loop, for a normal global (/g) -- iteration, save the current ovector[0,1] so that we can check that they do -- change each time. Otherwise a matching bug that returns the same string -- causes an infinite loop. It has happened! */ -- -- if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0) -- { -- ovecsave[0] = ovector[0]; -- ovecsave[1] = ovector[1]; -- } -- -- /* For altglobal (or first time round the loop), set an "unset" value. */ -- -- else ovecsave[0] = ovecsave[1] = PCRE2_UNSET; -- - /* Fill the ovector with junk to detect elements that do not get set - when they should be. */ - -@@ -7243,12 +7236,23 @@ else for (gmatched = 0;; gmatched++) - } - - /* If this is not the first time round a global loop, check that the -- returned string has changed. If not, there is a bug somewhere and we must -- break the loop because it will go on for ever. We know that there are -- always at least two elements in the ovector. */ -- -+ returned string has changed. If it has not, check for an empty string match -+ at different starting offset from the previous match. This is a failed test -+ retry for null-matching patterns that don't match at their starting offset, -+ for example /(?<=\G.)/. A repeated match at the same point is not such a -+ pattern, and must be discarded, and we then proceed to seek a non-null -+ match at the current point. For any other repeated match, there is a bug -+ somewhere and we must break the loop because it will go on for ever. We -+ know that there are always at least two elements in the ovector. */ -+ - if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1]) - { -+ if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset) -+ { -+ g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; -+ ovecsave[2] = dat_datctl.offset; -+ continue; /* Back to the top of the loop */ -+ } - fprintf(outfile, - "** PCRE2 error: global repeat returned the same string as previous\n"); - fprintf(outfile, "** Global loop abandoned\n"); -@@ -7556,6 +7560,7 @@ else for (gmatched = 0;; gmatched++) - - if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else - { -+ PCRE2_SIZE match_offset = FLD(match_data, ovector)[0]; - PCRE2_SIZE end_offset = FLD(match_data, ovector)[1]; - - /* We must now set up for the next iteration of a global search. If we have -@@ -7563,12 +7568,19 @@ else for (gmatched = 0;; gmatched++) - subject. If so, the loop is over. Otherwise, mimic what Perl's /g option - does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again - at the same point. If this fails it will be picked up above, where a fake -- match is set up so that at this point we advance to the next character. */ -- -- if (FLD(match_data, ovector)[0] == end_offset) -+ match is set up so that at this point we advance to the next character. -+ -+ However, in order to cope with patterns that never match at their starting -+ offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater -+ than the starting offset. This means there will be a retry with the -+ starting offset at the match offset. If this returns the same match again, -+ it is picked up above and ignored, and the special action is then taken. */ -+ -+ if (match_offset == end_offset) - { -- if (end_offset == ulen) break; /* End of subject */ -- g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; -+ if (end_offset == ulen) break; /* End of subject */ -+ if (match_offset <= dat_datctl.offset) -+ g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; - } - - /* However, even after matching a non-empty string, there is still one -@@ -7606,10 +7618,19 @@ else for (gmatched = 0;; gmatched++) - } - } - -- /* For /g (global), update the start offset, leaving the rest alone. */ -+ /* For a normal global (/g) iteration, save the current ovector[0,1] and -+ the starting offset so that we can check that they do change each time. -+ Otherwise a matching bug that returns the same string causes an infinite -+ loop. It has happened! Then update the start offset, leaving other -+ parameters alone. */ - - if ((dat_datctl.control & CTL_GLOBAL) != 0) -+ { -+ ovecsave[0] = ovector[0]; -+ ovecsave[1] = ovector[1]; -+ ovecsave[2] = dat_datctl.offset; - dat_datctl.offset = end_offset; -+ } - - /* For altglobal, just update the pointer and length. */ - -diff --git a/testdata/testinput1 b/testdata/testinput1 -index 9a9c5fd..fb50238 100644 ---- a/testdata/testinput1 -+++ b/testdata/testinput1 -@@ -6189,4 +6189,7 @@ ef) x/x,mark - /(?=a+)a(a+)++b/ - aab - -+/(?<=\G.)/g,aftertext -+ abc -+ - # End of testinput1 -diff --git a/testdata/testinput2 b/testdata/testinput2 -index 5d3a80e..797b0f7 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -4935,6 +4935,9 @@ a)"xI - //replace=0 - \=offset=7 - -+/(?<=\G.)/g,replace=+ -+ abc -+ - ".+\QX\E+"B,no_auto_possess - - ".+\QX\E+"B,auto_callout,no_auto_possess -diff --git a/testdata/testoutput1 b/testdata/testoutput1 -index 9c55be9..348dcbc 100644 ---- a/testdata/testoutput1 -+++ b/testdata/testoutput1 -@@ -9822,4 +9822,13 @@ No match - 0: aab - 1: a - -+/(?<=\G.)/g,aftertext -+ abc -+ 0: -+ 0+ bc -+ 0: -+ 0+ c -+ 0: -+ 0+ -+ - # End of testinput1 -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index fcaac8f..5c13f5b 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -15545,6 +15545,10 @@ Failed: error -57 at offset 2 in replacement: bad escape sequence in replacement - \=offset=7 - Failed: error -33: bad offset value - -+/(?<=\G.)/g,replace=+ -+ abc -+ 3: a+b+c+ -+ - ".+\QX\E+"B,no_auto_possess - ------------------------------------------------------------------ - Bra -@@ -16576,7 +16580,7 @@ No match - ------------------------------------------------------------------ - - # End of testinput2 --Error -65: PCRE2_ERROR_BADDATA (unknown error number) -+Error -70: PCRE2_ERROR_BADDATA (unknown error number) - Error -62: bad serialized data - Error -2: partial match - Error -1: no match --- -2.14.4 - diff --git a/pcre2-10.31-Fix-pcre2test-C-to-correctly-show-what-R-matches.patch b/pcre2-10.31-Fix-pcre2test-C-to-correctly-show-what-R-matches.patch deleted file mode 100644 index 6417ddf..0000000 --- a/pcre2-10.31-Fix-pcre2test-C-to-correctly-show-what-R-matches.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 93c716bf538a172222aa60f77cff5ef46103c125 Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Mon, 19 Feb 2018 16:55:47 +0000 -Subject: [PATCH 1/2] Fix pcre2test -C to correctly show what \R matches. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@916 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.31. ---- - src/pcre2test.c | 3 ++- - -diff --git a/src/pcre2test.c b/src/pcre2test.c -index 7eca618..40e2161 100644 ---- a/src/pcre2test.c -+++ b/src/pcre2test.c -@@ -7877,7 +7877,8 @@ else - (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval); - print_newline_config(optval, FALSE); - (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval); --printf(" \\R matches %s\n", optval? "CR, LF, or CRLF only" : -+printf(" \\R matches %s\n", -+ (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" : - "all Unicode newlines"); - (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval); - printf(" \\C is %ssupported\n", optval? "not ":""); --- -2.13.6 - diff --git a/pcre2-10.31-Fix-the-value-passed-back-for-POSIX-unset-groups-whe.patch b/pcre2-10.31-Fix-the-value-passed-back-for-POSIX-unset-groups-whe.patch deleted file mode 100644 index af1164e..0000000 --- a/pcre2-10.31-Fix-the-value-passed-back-for-POSIX-unset-groups-whe.patch +++ /dev/null @@ -1,133 +0,0 @@ -From 1890db00e66f40d659470a8a988d71daf59a29f9 Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Mon, 19 Feb 2018 14:49:42 +0000 -Subject: [PATCH] Fix the value passed back for POSIX unset groups when - REG_STARTEND has a non-zero starting offset, and make pcre2test show relevant - POSIX unset groups. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@915 6239d852-aaf2-0410-a92c-79f79f948069 - -Petr Písař: Ported to 10.31. ---- - src/pcre2posix.c | 10 ++++++---- - src/pcre2test.c | 8 ++++++-- - testdata/testinput18 | 4 ++++ - testdata/testoutput18 | 15 +++++++++++++++ - -diff --git a/src/pcre2posix.c b/src/pcre2posix.c -index 026943e..5a2f7cd 100644 ---- a/src/pcre2posix.c -+++ b/src/pcre2posix.c -@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Original API code Copyright (c) 1997-2012 University of Cambridge -- New API code Copyright (c) 2016 University of Cambridge -+ New API code Copyright (c) 2016-2018 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -93,7 +93,7 @@ information; I know nothing about MSVC myself). For example, something like - - void __cdecl function(....) - --might be needed. In order so make this easy, all the exported functions have -+might be needed. In order to make this easy, all the exported functions have - PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not - set, we ensure here that it has no effect. */ - -@@ -344,8 +344,10 @@ if (rc >= 0) - if ((size_t)rc > nmatch) rc = (int)nmatch; - for (i = 0; i < (size_t)rc; i++) - { -- pmatch[i].rm_so = ovector[i*2] + so; -- pmatch[i].rm_eo = ovector[i*2+1] + so; -+ pmatch[i].rm_so = (ovector[i*2] == PCRE2_UNSET)? -1 : -+ (int)(ovector[i*2] + so); -+ pmatch[i].rm_eo = (ovector[i*2+1] == PCRE2_UNSET)? -1 : -+ (int)(ovector[i*2+1] + so); - } - for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; - return 0; -diff --git a/src/pcre2test.c b/src/pcre2test.c -index 15bf404..7eca618 100644 ---- a/src/pcre2test.c -+++ b/src/pcre2test.c -@@ -11,7 +11,7 @@ hacked-up (non-) design had also run out of steam. - - Written by Philip Hazel - Original code Copyright (c) 1997-2012 University of Cambridge -- Rewritten code Copyright (c) 2016-2017 University of Cambridge -+ Rewritten code Copyright (c) 2016-2018 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -6761,13 +6761,17 @@ if ((pat_patctl.control & CTL_POSIX) != 0) - fprintf(outfile, "Matched without capture\n"); - else - { -- size_t i; -+ size_t i, j; -+ size_t last_printed = (size_t)dat_datctl.oveccount; - for (i = 0; i < (size_t)dat_datctl.oveccount; i++) - { - if (pmatch[i].rm_so >= 0) - { - PCRE2_SIZE start = pmatch[i].rm_so; - PCRE2_SIZE end = pmatch[i].rm_eo; -+ for (j = last_printed + 1; j < i; j++) -+ fprintf(outfile, "%2d: \n", (int)j); -+ last_printed = i; - if (start > end) - { - start = pmatch[i].rm_eo; -diff --git a/testdata/testinput18 b/testdata/testinput18 -index 755a0c9..563a506 100644 ---- a/testdata/testinput18 -+++ b/testdata/testinput18 -@@ -134,4 +134,8 @@ - - /a\b(c/literal,posix,dotall - -+/((a)(b)?(c))/posix -+ 123ace -+ 123ace\=posix_startend=2:6 -+ - # End of testdata/testinput18 -diff --git a/testdata/testoutput18 b/testdata/testoutput18 -index d51423d..d6e3c71 100644 ---- a/testdata/testoutput18 -+++ b/testdata/testoutput18 -@@ -46,6 +46,7 @@ - defabc\=noteol - 0: def - 1: def -+ 2: - 3: def - - /the quick brown fox/ -@@ -206,4 +207,18 @@ No match: POSIX code 17: match failed - /a\b(c/literal,posix,dotall - Failed: POSIX code 16: bad argument at offset 0 - -+/((a)(b)?(c))/posix -+ 123ace -+ 0: ac -+ 1: ac -+ 2: a -+ 3: -+ 4: c -+ 123ace\=posix_startend=2:6 -+ 0: ac -+ 1: ac -+ 2: a -+ 3: -+ 4: c -+ - # End of testdata/testinput18 --- -2.13.6 - diff --git a/pcre2-10.31-Fixed-atomic-group-backtracking-bug.patch b/pcre2-10.31-Fixed-atomic-group-backtracking-bug.patch deleted file mode 100644 index 5903feb..0000000 --- a/pcre2-10.31-Fixed-atomic-group-backtracking-bug.patch +++ /dev/null @@ -1,67 +0,0 @@ -From bba8a81a820ffcfc9c88adb0055e12bad66eee4e Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Mon, 16 Jul 2018 15:24:32 +0000 -Subject: [PATCH] Fixed atomic group backtracking bug. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@965 6239d852-aaf2-0410-a92c-79f79f948069 - -Petr Písař: Ported to 8.31. - -Signed-off-by: Petr Písař ---- - src/pcre2_match.c | 2 +- - testdata/testinput1 | 6 ++++++ - testdata/testoutput1 | 8 ++++++++ - 3 files changed, 15 insertions(+), 1 deletion(-) - -diff --git a/src/pcre2_match.c b/src/pcre2_match.c -index ce96016..c294049 100644 ---- a/src/pcre2_match.c -+++ b/src/pcre2_match.c -@@ -5509,7 +5509,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); - frame so that it points to the final branch. */ - - case OP_ONCE: -- Fback_frame = ((char *)F - (char *)P) + frame_size; -+ Fback_frame = ((char *)F - (char *)P); - for (;;) - { - uint32_t y = GET(P->ecode,1); -diff --git a/testdata/testinput1 b/testdata/testinput1 -index 1b3191c..cc11288 100644 ---- a/testdata/testinput1 -+++ b/testdata/testinput1 -@@ -6202,4 +6202,10 @@ ef) x/x,mark - - /(?<=(?=.){4,5}x)/ - -+/(?>a(*:1))(?>b(*:1))(*SKIP:1)x|.*/no_start_optimize -+ abc -+ -+/(?>a(*:1))(?>b)(*SKIP:1)x|.*/no_start_optimize -+ abc -+ - # End of testinput1 -diff --git a/testdata/testoutput1 b/testdata/testoutput1 -index 06469fa..2fd2d48 100644 ---- a/testdata/testoutput1 -+++ b/testdata/testoutput1 -@@ -9841,4 +9841,12 @@ No match - - /(?<=(?=.){4,5}x)/ - -+/(?>a(*:1))(?>b(*:1))(*SKIP:1)x|.*/no_start_optimize -+ abc -+ 0: abc -+ -+/(?>a(*:1))(?>b)(*SKIP:1)x|.*/no_start_optimize -+ abc -+ 0: abc -+ - # End of testinput1 --- -2.14.4 - diff --git a/pcre2-10.31-Ignore-qualifiers-on-lookaheads-within-lookbehinds-w.patch b/pcre2-10.31-Ignore-qualifiers-on-lookaheads-within-lookbehinds-w.patch deleted file mode 100644 index 800c246..0000000 --- a/pcre2-10.31-Ignore-qualifiers-on-lookaheads-within-lookbehinds-w.patch +++ /dev/null @@ -1,133 +0,0 @@ -From 8ed58f1ae9e82a5c88c3960af38d5c96b191c554 Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Mon, 2 Jul 2018 11:23:45 +0000 -Subject: [PATCH] Ignore qualifiers on lookaheads within lookbehinds when - checking for a fixed length. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@956 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.31. ---- - src/pcre2_compile.c | 26 ++++++++++++++++++++++++++ - testdata/testinput1 | 10 ++++++++++ - testdata/testinput2 | 2 ++ - testdata/testoutput1 | 10 ++++++++++ - testdata/testoutput2 | 14 ++++++++++++++ - 6 files changed, 66 insertions(+) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 0f75f36..5a47f1d 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -8572,6 +8572,32 @@ for (;; pptr++) - case META_LOOKAHEADNOT: - pptr = parsed_skip(pptr + 1, PSKIP_KET); - if (pptr == NULL) goto PARSED_SKIP_FAILED; -+ -+ /* Also ignore any qualifiers that follow a lookahead assertion. */ -+ -+ switch (pptr[1]) -+ { -+ case META_ASTERISK: -+ case META_ASTERISK_PLUS: -+ case META_ASTERISK_QUERY: -+ case META_PLUS: -+ case META_PLUS_PLUS: -+ case META_PLUS_QUERY: -+ case META_QUERY: -+ case META_QUERY_PLUS: -+ case META_QUERY_QUERY: -+ pptr++; -+ break; -+ -+ case META_MINMAX: -+ case META_MINMAX_PLUS: -+ case META_MINMAX_QUERY: -+ pptr += 3; -+ break; -+ -+ default: -+ break; -+ } - break; - - /* Lookbehinds can be ignored, but must themselves be checked. */ -diff --git a/testdata/testinput1 b/testdata/testinput1 -index fb50238..1b3191c 100644 ---- a/testdata/testinput1 -+++ b/testdata/testinput1 -@@ -6192,4 +6192,14 @@ ef) x/x,mark - /(?<=\G.)/g,aftertext - abc - -+/(?<=(?=.)?)/ -+ -+/(?<=(?=.)?+)/ -+ -+/(?<=(?=.)*)/ -+ -+/(?<=(?=.){4,5})/ -+ -+/(?<=(?=.){4,5}x)/ -+ - # End of testinput1 -diff --git a/testdata/testinput2 b/testdata/testinput2 -index 7e703d5..cc4b59b 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -5435,4 +5435,6 @@ a)"xI - - /(?=a+)a(a+)++b/B - -+/(?<=(?=.){4,5}x)/B -+ - # End of testinput2 -diff --git a/testdata/testoutput1 b/testdata/testoutput1 -index 348dcbc..06469fa 100644 ---- a/testdata/testoutput1 -+++ b/testdata/testoutput1 -@@ -9831,4 +9831,14 @@ No match - 0: - 0+ - -+/(?<=(?=.)?)/ -+ -+/(?<=(?=.)?+)/ -+ -+/(?<=(?=.)*)/ -+ -+/(?<=(?=.){4,5})/ -+ -+/(?<=(?=.){4,5}x)/ -+ - # End of testinput1 -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index 9f504f6..aab0c94 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -16583,6 +16583,20 @@ No match - End - ------------------------------------------------------------------ - -+/(?<=(?=.){4,5}x)/B -+------------------------------------------------------------------ -+ Bra -+ AssertB -+ Reverse -+ Assert -+ Any -+ Ket -+ x -+ Ket -+ Ket -+ End -+------------------------------------------------------------------ -+ - # End of testinput2 - Error -70: PCRE2_ERROR_BADDATA (unknown error number) - Error -62: bad serialized data --- -2.14.4 - diff --git a/pcre2-10.31-Make-x-more-Perl-compatible-by-recognizing-all-of-Un.patch b/pcre2-10.31-Make-x-more-Perl-compatible-by-recognizing-all-of-Un.patch deleted file mode 100644 index fc4bcc2..0000000 --- a/pcre2-10.31-Make-x-more-Perl-compatible-by-recognizing-all-of-Un.patch +++ /dev/null @@ -1,174 +0,0 @@ -From 71340653cd375c01ada053c63d7c55d0ca475b47 Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Fri, 3 Aug 2018 09:38:36 +0000 -Subject: [PATCH] Make /x more Perl-compatible by recognizing all of Unicode's - "Pattern White Space" characters, not just the ASCII ones. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@977 6239d852-aaf2-0410-a92c-79f79f948069 - -Petr Písař: Ported to 10.31. - -Signed-off-by: Petr Písař ---- - src/pcre2_compile.c | 25 +++++++++++++++++++------ - testdata/testinput4 | 15 +++++++++++++++ - testdata/testinput5 | 13 +++++++++++++ - testdata/testoutput4 | 18 ++++++++++++++++++ - testdata/testoutput5 | 16 ++++++++++++++++ - 5 files changed, 81 insertions(+), 6 deletions(-) - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 7ff8b4c..1d62a38 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -2434,11 +2434,17 @@ while (ptr < ptrend) - /* EITHER: not both options set */ - ((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) != - (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) || -- /* OR: character > 255 */ -- c > 255 || -- /* OR: not a # comment or white space */ -- (c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0) -- )) -+#ifdef SUPPORT_UNICODE -+ /* OR: character > 255 AND not Unicode Pattern White Space */ -+ (c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) || -+#endif -+ /* OR: not a # comment or isspace() white space */ -+ (c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0 -+#ifdef SUPPORT_UNICODE -+ /* and not CHAR_NEL when Unicode is supported */ -+ && c != CHAR_NEL -+#endif -+ ))) - { - PCRE2_SIZE verbnamelength; - -@@ -2510,11 +2516,18 @@ while (ptr < ptrend) - - /* Skip over whitespace and # comments in extended mode. Note that c is a - character, not a code unit, so we must not use MAX_255 to test its size -- because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */ -+ because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The -+ whitespace characters are those designated as "Pattern White Space" by -+ Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is -+ U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a -+ subset of space characters that match \h and \v. */ - - if ((options & PCRE2_EXTENDED) != 0) - { - if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue; -+#ifdef SUPPORT_UNICODE -+ if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue; -+#endif - if (c == CHAR_NUMBER_SIGN) - { - while (ptr < ptrend) -diff --git a/testdata/testinput4 b/testdata/testinput4 -index 0ef7b8e..6884f60 100644 ---- a/testdata/testinput4 -+++ b/testdata/testinput4 -@@ -2300,5 +2300,20 @@ - \x{123}\x{122}\x{123} - \= Expect no match - \x{123}\x{124}\x{123} -+ -+# Test the full list of Unicode "Pattern White Space" characters that are to -+# be ignored by /x. The pattern lines below may show up oddly in text editors -+# or when listed to the screen. Note that characters such as U+2002, which are -+# matched as space by \h and \v are *not* "Pattern White Space". -+ -+/A…‎‏

B/x,utf -+ AB -+ -+/A B/x,utf -+ A\x{2002}B -+\= Expect no match -+ AB -+ -+# ------- - - # End of testinput4 -diff --git a/testdata/testinput5 b/testdata/testinput5 -index 0366136..ebeee07 100644 ---- a/testdata/testinput5 -+++ b/testdata/testinput5 -@@ -2059,5 +2059,18 @@ - \x{1F1E6}\x{1F1E7}\x{1F1E7}B - \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B - -+# This tests the non-UTF Unicode NEL pattern whitespace character, only -+# recognized by PCRE2 with /x when there is Unicode support. -+ -+/A -+ ?B/x -+ AB -+ -+# This tests Unicode Pattern White Space characters in verb names when they -+# are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters -+# with code points greater than 255 between A, B, and C in the pattern. -+ -+/(*: A‎B
C)abc/x,utf,mark,alt_verbnames -+ abc - - # End of testinput5 -diff --git a/testdata/testoutput4 b/testdata/testoutput4 -index 6056e6d..51c8219 100644 ---- a/testdata/testoutput4 -+++ b/testdata/testoutput4 -@@ -3728,5 +3728,23 @@ No match - \= Expect no match - \x{123}\x{124}\x{123} - No match -+ -+# Test the full list of Unicode "Pattern White Space" characters that are to -+# be ignored by /x. The pattern lines below may show up oddly in text editors -+# or when listed to the screen. Note that characters such as U+2002, which are -+# matched as space by \h and \v are *not* "Pattern White Space". -+ -+/A…‎‏

B/x,utf -+ AB -+ 0: AB -+ -+/A B/x,utf -+ A\x{2002}B -+ 0: A\x{2002}B -+\= Expect no match -+ AB -+No match -+ -+# ------- - - # End of testinput4 -diff --git a/testdata/testoutput5 b/testdata/testoutput5 -index 4b3171c..1392e98 100644 ---- a/testdata/testoutput5 -+++ b/testdata/testoutput5 -@@ -4700,5 +4700,21 @@ Callout 0: last capture = 1 - 1: \x{1f1e6}\x{1f1e7} - 2: \x{1f1e7}\x{1f1e6} - -+# This tests the non-UTF Unicode NEL pattern whitespace character, only -+# recognized by PCRE2 with /x when there is Unicode support. -+ -+/A -+ ?B/x -+ AB -+ 0: AB -+ -+# This tests Unicode Pattern White Space characters in verb names when they -+# are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters -+# with code points greater than 255 between A, B, and C in the pattern. -+ -+/(*: A‎B
C)abc/x,utf,mark,alt_verbnames -+ abc -+ 0: abc -+MK: ABC - - # End of testinput5 --- -2.14.4 - diff --git a/pcre2-10.31-Oops-forgot-about-C-bsr-in-previous-patch.patch b/pcre2-10.31-Oops-forgot-about-C-bsr-in-previous-patch.patch deleted file mode 100644 index b3887f3..0000000 --- a/pcre2-10.31-Oops-forgot-about-C-bsr-in-previous-patch.patch +++ /dev/null @@ -1,29 +0,0 @@ -From e3ac8929b8152e6a30eff90f791b76339e44d91b Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Mon, 19 Feb 2018 17:00:45 +0000 -Subject: [PATCH 2/2] Oops, forgot about "-C bsr" in previous patch. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@917 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.31. ---- - src/pcre2test.c | 2 +- - -diff --git a/src/pcre2test.c b/src/pcre2test.c -index 40e2161..ad3db2c 100644 ---- a/src/pcre2test.c -+++ b/src/pcre2test.c -@@ -7793,7 +7793,7 @@ if (arg != NULL && arg[0] != CHAR_MINUS) - { - case CONF_BSR: - (void)PCRE2_CONFIG(coptlist[i].value, &optval); -- printf("%s\n", optval? "ANYCRLF" : "ANY"); -+ printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY"); - break; - - case CONF_FIX: --- -2.13.6 - diff --git a/pcre2-10.31-Set-error-offset-zero-for-early-errors-in-pcre2_patt.patch b/pcre2-10.31-Set-error-offset-zero-for-early-errors-in-pcre2_patt.patch deleted file mode 100644 index 48394dc..0000000 --- a/pcre2-10.31-Set-error-offset-zero-for-early-errors-in-pcre2_patt.patch +++ /dev/null @@ -1,65 +0,0 @@ -From c82ef7bef66138a85362473df622d16bf728499e Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Sun, 4 Mar 2018 15:13:37 +0000 -Subject: [PATCH] Set error offset zero for early errors in - pcre2_pattern_convert(). -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@926 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.31. ---- - src/pcre2_convert.c | 12 +++++++++--- - -diff --git a/src/pcre2_convert.c b/src/pcre2_convert.c -index bdf9b86..1dd5c33 100644 ---- a/src/pcre2_convert.c -+++ b/src/pcre2_convert.c -@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Original API code Copyright (c) 1997-2012 University of Cambridge -- New API code Copyright (c) 2016-2017 University of Cambridge -+ New API code Copyright (c) 2016-2018 University of Cambridge - - ----------------------------------------------------------------------------- - Redistribution and use in source and binary forms, with or without -@@ -1066,11 +1066,12 @@ BOOL utf = (options & PCRE2_CONVERT_UTF) != 0; - uint32_t pattype = options & TYPE_OPTIONS; - - if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL; -+ - if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */ - (pattype & (~pattype+1)) != pattype || /* More than one type set */ - pattype == 0) /* No type set */ - { -- *bufflenptr = 0; /* Error offset */ -+ *bufflenptr = 0; /* Error offset */ - return PCRE2_ERROR_BADOPTION; - } - -@@ -1081,7 +1082,11 @@ if (ccontext == NULL) ccontext = - /* Check UTF if required. */ - - #ifndef SUPPORT_UNICODE --if (utf) return PCRE2_ERROR_UNICODE_NOT_SUPPORTED; -+if (utf) -+ { -+ *bufflenptr = 0; /* Error offset */ -+ return PCRE2_ERROR_UNICODE_NOT_SUPPORTED; -+ } - #else - if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0) - { -@@ -1126,6 +1131,7 @@ for (i = 0; i < 2; i++) - break; - - default: -+ *bufflenptr = 0; /* Error offset */ - return PCRE2_ERROR_INTERNAL; - } - --- -2.14.3 - diff --git a/pcre2.spec b/pcre2.spec index d25e586..f18b84d 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -6,10 +6,10 @@ %bcond_with pcre2_enables_sealloc # This is stable release: -#%%global rcversion RC1 +%global rcversion RC1 Name: pcre2 -Version: 10.31 -Release: %{?rcversion:0.}9%{?rcversion:.%rcversion}%{?dist} +Version: 10.32 +Release: %{?rcversion:0.}1%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library # the library: BSD with exceptions @@ -49,47 +49,6 @@ URL: http://www.pcre.org/ Source: ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/%{?rcversion:Testing/}%{name}-%{myversion}.tar.bz2 # Do no set RPATH if libdir is not /usr/lib Patch0: pcre2-10.10-Fix-multilib.patch -# Fix returning unset groups in POSIX interface if REG_STARTEND has a non-zero -# starting offset, upstream bug #2244, in upstream after 10.31 -Patch1: pcre2-10.31-Fix-the-value-passed-back-for-POSIX-unset-groups-whe.patch -# 1/2 Fix pcre2test -C to correctly show what \R matches, -# in upstream after 10.31 -Patch2: pcre2-10.31-Fix-pcre2test-C-to-correctly-show-what-R-matches.patch -# 2/2 Fix pcre2test -C to correctly show what \R matches, -# in upstream after 10.31 -Patch3: pcre2-10.31-Oops-forgot-about-C-bsr-in-previous-patch.patch -# Fix matching repeated character classes against an 8-bit string containting -# multi-code-unit characters, in upstream after 10.31 -Patch4: pcre2-10.31-Fix-C-bug-with-repeated-character-classes-in-UTF-8-m.patch -# Add support to pcre2grep for binary zeros in -f files, upstream bug #2222, -# in upstream after 10.31 -Patch5: pcre2-10.31-Add-support-to-pcre2grep-for-binary-zeros-in-f-files.patch -# Fix compiler warnings in pcre2grep, in upstream after 10.31 -Patch6: pcre2-10.31-A-small-fix-to-pcre2grep-to-avoid-compiler-warnings-.patch -# Fix setting error offset zero for early errors in pcre2_pattern_convert(), -# in upstream after 10.31 -Patch7: pcre2-10.31-Set-error-offset-zero-for-early-errors-in-pcre2_patt.patch -# Fix bug when \K is used in a lookbehind in a substitute pattern, -# in upstream after 10.31 -Patch8: pcre2-10.31-Fix-bug-when-K-is-used-in-a-lookbehind-in-a-substitu.patch -# Fix global search/replace in pcre2test and pcre2_substitute() when the pattern -# matches an empty string, but never at the starting offset, -# in upstream after 10.31 -Patch9: pcre2-10.31-Fix-global-search-replace-in-pcre2test-and-pcre2_sub.patch -# Fix checking that a lookbehind assertion has a fixed length if the -# lookbehind assertion is used inside a lookahead assertion, -# in upstream after 10.31 -Patch10: pcre2-10.31-Ignore-qualifiers-on-lookaheads-within-lookbehinds-w.patch -# Fix parsing VERSION conditions, in upstream after pcre-10.31 -Patch11: pcre2-10.31-Fix-bug-in-VERSION-number-reading.patch -# Fix backtracking atomic groups when they are not separated by something with -# a backtracking point, in upstream after 10.31 -Patch12: pcre2-10.31-Fixed-atomic-group-backtracking-bug.patch -# Recognize all Unicode space characters with /x option in a pattern, -# in upstream after 10.31 -Patch13: pcre2-10.31-Make-x-more-Perl-compatible-by-recognizing-all-of-Un.patch -# Fix changing dynamic options, in upstream after 10.31 -Patch14: pcre2-10.31-Fix-dynamic-options-changing-bug.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -166,20 +125,6 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %prep %setup -q -n %{name}-%{myversion} %patch0 -p1 -%patch1 -p1 -%patch2 -p1 -%patch3 -p1 -%patch4 -p1 -%patch5 -p1 -%patch6 -p1 -%patch7 -p1 -%patch8 -p1 -%patch9 -p1 -%patch10 -p1 -%patch11 -p1 -%patch12 -p1 -%patch13 -p1 -%patch14 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -282,6 +227,9 @@ make %{?_smp_mflags} check VERBOSE=yes %{_mandir}/man1/pcre2test.* %changelog +* Thu Aug 16 2018 Petr Pisar - 10.32-0.1.RC1 +- 10.32-RC1 bump + * Thu Aug 16 2018 Petr Pisar - 10.31-9 - Recognize all Unicode space characters with /x option in a pattern - Fix changing dynamic options diff --git a/sources b/sources index a0e24ca..d1bf8c8 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (pcre2-10.31.tar.bz2) = 44d7db2513d9415dcdf6541366fea585e016f572f3e4379f6e959a38114b2337851092049ab4a1576ae8f19b9de413edbcfa62f434c77fc8470747ee5413e967 +SHA512 (pcre2-10.32-RC1.tar.bz2) = 387d9060eef3553d254b48d510859f028eed0f6fbdc7b5067b7d84ec81ed9356972defdb97dce5f28e6188421336f77052700eb530caf8c6e245b079b8258558