10.32-RC1 bump

This commit is contained in:
Petr Písař 2018-08-16 11:58:51 +02:00
parent 0e29ad8a15
commit 7002d51cb1
17 changed files with 8 additions and 2417 deletions

1
.gitignore vendored
View File

@ -11,3 +11,4 @@
/pcre2-10.30.tar.bz2 /pcre2-10.30.tar.bz2
/pcre2-10.31-RC1.tar.bz2 /pcre2-10.31-RC1.tar.bz2
/pcre2-10.31.tar.bz2 /pcre2-10.31.tar.bz2
/pcre2-10.32-RC1.tar.bz2

View File

@ -1,91 +0,0 @@
From ecf1a253d8b7c41f8700eb78e598bfddfeb97215 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sun, 25 Feb 2018 12:12:48 +0000
Subject: [PATCH] A small fix to pcre2grep to avoid compiler warnings for
-Wformat-overflow=2.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@921 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.31.
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
index 78121ad..a9379cf 100644
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@@ -303,7 +303,7 @@ also for include/exclude patterns. */
typedef struct patstr {
struct patstr *next;
char *string;
- PCRE2_SIZE length;
+ PCRE2_SIZE length;
pcre2_code *compiled;
} patstr;
@@ -558,7 +558,7 @@ exit(rc);
Arguments:
s pattern string to add
- patlen length of pattern
+ patlen length of pattern
after if not NULL points to item to insert after
Returns: new pattern block or NULL on error
@@ -1285,7 +1285,7 @@ doing this for tty input means that no output appears until a lot of input has
been typed. Instead, tty input is handled line by line. We cannot use fgets()
for this, because it does not stop at a binary zero, and therefore there is no
way of telling how many characters it has read, because there may be binary
-zeros embedded in the data. This function is also used for reading patterns
+zeros embedded in the data. This function is also used for reading patterns
from files (the -f option).
Arguments:
@@ -3497,7 +3497,7 @@ else
while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
{
- while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
+ while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
linenumber++;
if (patlen == 0) continue; /* Skip blank lines */
@@ -3669,8 +3669,15 @@ for (i = 1; i < argc; i++)
int arglen = (argequals == NULL || equals == NULL)?
(int)strlen(arg) : (int)(argequals - arg);
- sprintf(buff1, "%.*s", baselen, op->long_name);
- sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
+ if (snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name) >
+ (int)sizeof(buff1) ||
+ snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
+ fulllen - baselen - 2, opbra + 1) > (int)sizeof(buff2))
+ {
+ fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
+ op->long_name);
+ pcre2grep_exit(2);
+ }
if (strncmp(arg, buff1, arglen) == 0 ||
strncmp(arg, buff2, arglen) == 0)
@@ -3837,7 +3844,7 @@ for (i = 1; i < argc; i++)
else if (op->type == OP_PATLIST)
{
patdatastr *pd = (patdatastr *)op->dataptr;
- *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
+ *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
*(pd->lastptr));
if (*(pd->lastptr) == NULL) goto EXIT2;
if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
@@ -4102,7 +4109,7 @@ if (patterns == NULL && pattern_files == NULL)
if (i >= argc) return usage(2);
patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
NULL);
- i++;
+ i++;
if (patterns == NULL) goto EXIT2;
}
--
2.13.6

View File

@ -1,417 +0,0 @@
From d59c555dcc96b23d0481f901ba617db91b9b2a9a Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 24 Feb 2018 17:09:19 +0000
Subject: [PATCH] Add support to pcre2grep for binary zeros in -f files.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@920 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.31.
---
RunGrepTest | 6 ++++
doc/pcre2grep.1 | 59 +++++++++++++++++++++++---------------
src/pcre2grep.c | 81 +++++++++++++++++++++++++++++------------------------
testdata/grepoutput | 3 ++
diff --git a/RunGrepTest b/RunGrepTest
index a26f677..293e5a5 100755
--- a/RunGrepTest
+++ b/RunGrepTest
@@ -641,6 +641,12 @@ echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep --colour=always '(?=[ac]\K)' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
+echo "---------------------------- Test 126 -----------------------------" >>testtrygrep
+printf "Next line pattern has binary zero\nABC\x00XYZ\n" >testtemp1grep
+printf "ABC\x00XYZ\nABCDEF\nDEFABC\n" >testtemp2grep
+$valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
# Now compare the results.
diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1
index 5e5cbea..ba6aea6 100644
--- a/doc/pcre2grep.1
+++ b/doc/pcre2grep.1
@@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "13 November 2017" "PCRE2 10.31"
+.TH PCRE2GREP 1 "24 February 2018" "PCRE2 10.32"
.SH NAME
pcre2grep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -121,6 +121,14 @@ a binary file is not applied. See the \fB--binary-files\fP option for a means
of changing the way binary files are handled.
.
.
+.SH "BINARY ZEROS IN PATTERNS"
+.rs
+.sp
+Patterns passed from the command line are strings that are terminated by a
+binary zero, so cannot contain internal zeros. However, patterns that are read
+from a file via the \fB-f\fP option may contain binary zeros.
+.
+.
.SH OPTIONS
.rs
.sp
@@ -304,12 +312,15 @@ files; it does not apply to patterns specified by any of the \fB--include\fP or
.TP
\fB-f\fP \fIfilename\fP, \fB--file=\fP\fIfilename\fP
Read patterns from the file, one per line, and match them against each line of
-input. What constitutes a newline when reading the file is the operating
-system's default. The \fB--newline\fP option has no effect on this option.
-Trailing white space is removed from each line, and blank lines are ignored. An
-empty file contains no patterns and therefore matches nothing. See also the
-comments about multiple patterns versus a single pattern with alternatives in
-the description of \fB-e\fP above.
+input. As is the case with patterns on the command line, no delimiters should
+be used. What constitutes a newline when reading the file is the operating
+system's default interpretation of \en. The \fB--newline\fP option has no
+effect on this option. Trailing white space is removed from each line, and
+blank lines are ignored. An empty file contains no patterns and therefore
+matches nothing. Patterns read from a file in this way may contain binary
+zeros, which are treated as ordinary data characters. See also the comments
+about multiple patterns versus a single pattern with alternatives in the
+description of \fB-e\fP above.
.sp
If this option is given more than once, all the specified files are read. A
data line is output if any of the patterns match it. A file name can be given
@@ -320,14 +331,15 @@ command line; all arguments are treated as the names of paths to be searched.
.TP
\fB--file-list\fP=\fIfilename\fP
Read a list of files and/or directories that are to be scanned from the given
-file, one per line. Trailing white space is removed from each line, and blank
-lines are ignored. These paths are processed before any that are listed on the
-command line. The file name can be given as "-" to refer to the standard input.
-If \fB--file\fP and \fB--file-list\fP are both specified as "-", patterns are
-read first. This is useful only when the standard input is a terminal, from
-which further lines (the list of files) can be read after an end-of-file
-indication. If this option is given more than once, all the specified files are
-read.
+file, one per line. What constitutes a newline when reading the file is the
+operating system's default. Trailing white space is removed from each line, and
+blank lines are ignored. These paths are processed before any that are listed
+on the command line. The file name can be given as "-" to refer to the standard
+input. If \fB--file\fP and \fB--file-list\fP are both specified as "-",
+patterns are read first. This is useful only when the standard input is a
+terminal, from which further lines (the list of files) can be read after an
+end-of-file indication. If this option is given more than once, all the
+specified files are read.
.TP
\fB--file-offsets\fP
Instead of showing lines or parts of lines that match, show each match as an
@@ -679,12 +691,13 @@ The \fB-N\fP (\fB--newline\fP) option allows \fBpcre2grep\fP to scan files with
different newline conventions from the default. Any parts of the input files
that are written to the standard output are copied identically, with whatever
newline sequences they have in the input. However, the setting of this option
-does not affect the interpretation of files specified by the \fB-f\fP,
-\fB--exclude-from\fP, or \fB--include-from\fP options, which are assumed to use
-the operating system's standard newline sequence, nor does it affect the way in
-which \fBpcre2grep\fP writes informational messages to the standard error and
-output streams. For these it uses the string "\en" to indicate newlines,
-relying on the C I/O library to convert this to an appropriate sequence.
+affects only the way scanned files are processed. It does not affect the
+interpretation of files specified by the \fB-f\fP, \fB--file-list\fP,
+\fB--exclude-from\fP, or \fB--include-from\fP options, nor does it affect the
+way in which \fBpcre2grep\fP writes informational messages to the standard
+error and output streams. For these it uses the string "\en" to indicate
+newlines, relying on the C I/O library to convert this to an appropriate
+sequence.
.
.
.SH "OPTIONS COMPATIBILITY"
@@ -862,6 +875,6 @@ Cambridge, England.
.rs
.sp
.nf
-Last updated: 13 November 2017
-Copyright (c) 1997-2017 University of Cambridge.
+Last updated: 24 February 2018
+Copyright (c) 1997-2018 University of Cambridge.
.fi
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
index 02339f5..78121ad 100644
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@@ -13,7 +13,7 @@ distribution because other apparatus is needed to compile pcre2grep for z/OS.
The header can be found in the special z/OS distribution, which is available
from www.zaconsultants.net or from www.cbttape.org.
- Copyright (c) 1997-2017 University of Cambridge
+ Copyright (c) 1997-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -303,6 +303,7 @@ also for include/exclude patterns. */
typedef struct patstr {
struct patstr *next;
char *string;
+ PCRE2_SIZE length;
pcre2_code *compiled;
} patstr;
@@ -557,13 +558,14 @@ exit(rc);
Arguments:
s pattern string to add
+ patlen length of pattern
after if not NULL points to item to insert after
Returns: new pattern block or NULL on error
*/
static patstr *
-add_pattern(char *s, patstr *after)
+add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
{
patstr *p = (patstr *)malloc(sizeof(patstr));
if (p == NULL)
@@ -571,7 +573,7 @@ if (p == NULL)
fprintf(stderr, "pcre2grep: malloc failed\n");
pcre2grep_exit(2);
}
-if (strlen(s) > MAXPATLEN)
+if (patlen > MAXPATLEN)
{
fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
MAXPATLEN);
@@ -580,6 +582,7 @@ if (strlen(s) > MAXPATLEN)
}
p->next = NULL;
p->string = s;
+p->length = patlen;
p->compiled = NULL;
if (after != NULL)
@@ -1276,12 +1279,14 @@ return om;
* Read one line of input *
*************************************************/
-/* Normally, input is read using fread() (or gzread, or BZ2_read) into a large
-buffer, so many lines may be read at once. However, doing this for tty input
-means that no output appears until a lot of input has been typed. Instead, tty
-input is handled line by line. We cannot use fgets() for this, because it does
-not stop at a binary zero, and therefore there is no way of telling how many
-characters it has read, because there may be binary zeros embedded in the data.
+/* Normally, input that is to be scanned is read using fread() (or gzread, or
+BZ2_read) into a large buffer, so many lines may be read at once. However,
+doing this for tty input means that no output appears until a lot of input has
+been typed. Instead, tty input is handled line by line. We cannot use fgets()
+for this, because it does not stop at a binary zero, and therefore there is no
+way of telling how many characters it has read, because there may be binary
+zeros embedded in the data. This function is also used for reading patterns
+from files (the -f option).
Arguments:
buffer the buffer to read into
@@ -1291,7 +1296,7 @@ Arguments:
Returns: the number of characters read, zero at end of file
*/
-static unsigned int
+static PCRE2_SIZE
read_one_line(char *buffer, int length, FILE *f)
{
int c;
@@ -1651,11 +1656,11 @@ Returns: TRUE if there was a match
*/
static BOOL
-match_patterns(char *matchptr, size_t length, unsigned int options,
- size_t startoffset, int *mrc)
+match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
+ PCRE2_SIZE startoffset, int *mrc)
{
int i;
-size_t slen = length;
+PCRE2_SIZE slen = length;
patstr *p = patterns;
const char *msg = "this text:\n\n";
@@ -2317,7 +2322,7 @@ unsigned long int count = 0;
char *lastmatchrestart = NULL;
char *ptr = main_buffer;
char *endptr;
-size_t bufflength;
+PCRE2_SIZE bufflength;
BOOL binary = FALSE;
BOOL endhyphenpending = FALSE;
BOOL input_line_buffered = line_buffered;
@@ -2339,7 +2344,7 @@ bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
input_line_buffered);
#ifdef SUPPORT_LIBBZ2
-if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
+if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is PCRE2_SIZE; */
#endif
endptr = main_buffer + bufflength;
@@ -2368,8 +2373,8 @@ while (ptr < endptr)
unsigned int options = 0;
BOOL match;
char *t = ptr;
- size_t length, linelength;
- size_t startoffset = 0;
+ PCRE2_SIZE length, linelength;
+ PCRE2_SIZE startoffset = 0;
/* At this point, ptr is at the start of a line. We need to find the length
of the subject string to pass to pcre2_match(). In multiline mode, it is the
@@ -2381,7 +2386,7 @@ while (ptr < endptr)
t = end_of_line(t, endptr, &endlinelength);
linelength = t - ptr - endlinelength;
- length = multiline? (size_t)(endptr - ptr) : linelength;
+ length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
/* Check to see if the line we are looking at extends right to the very end
of the buffer without a line terminator. This means the line is too long to
@@ -2560,7 +2565,7 @@ while (ptr < endptr)
{
if (!invert)
{
- size_t oldstartoffset;
+ PCRE2_SIZE oldstartoffset;
if (printname != NULL) fprintf(stdout, "%s:", printname);
if (number) fprintf(stdout, "%lu:", linenumber);
@@ -2647,7 +2652,7 @@ while (ptr < endptr)
startoffset -= (int)(linelength + endlinelength);
t = end_of_line(ptr, endptr, &endlinelength);
linelength = t - ptr - endlinelength;
- length = (size_t)(endptr - ptr);
+ length = (PCRE2_SIZE)(endptr - ptr);
}
goto ONLY_MATCHING_RESTART;
@@ -2812,7 +2817,7 @@ while (ptr < endptr)
endprevious -= (int)(linelength + endlinelength);
t = end_of_line(ptr, endptr, &endlinelength);
linelength = t - ptr - endlinelength;
- length = (size_t)(endptr - ptr);
+ length = (PCRE2_SIZE)(endptr - ptr);
}
/* If startoffset is at the exact end of the line it means this
@@ -2895,7 +2900,7 @@ while (ptr < endptr)
/* If input is line buffered, and the buffer is not yet full, read another
line and add it into the buffer. */
- if (input_line_buffered && bufflength < (size_t)bufsize)
+ if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
{
int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
bufflength += add;
@@ -2907,7 +2912,7 @@ while (ptr < endptr)
1/3 and refill it. Before we do this, if some unprinted "after" lines are
about to be lost, print them. */
- if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
+ if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
{
if (after_context > 0 &&
lastmatchnumber > 0 &&
@@ -3395,9 +3400,8 @@ PCRE2_SIZE patlen, erroffset;
PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
if (p->compiled != NULL) return TRUE;
-
ps = p->string;
-patlen = strlen(ps);
+patlen = p->length;
if ((options & PCRE2_LITERAL) != 0)
{
@@ -3407,8 +3411,8 @@ if ((options & PCRE2_LITERAL) != 0)
if (ellength != 0)
{
- if (add_pattern(pe, p) == NULL) return FALSE;
- patlen = (int)(pe - ps - ellength);
+ patlen = pe - ps - ellength;
+ if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
}
}
@@ -3470,6 +3474,7 @@ static BOOL
read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
{
int linenumber = 0;
+PCRE2_SIZE patlen;
FILE *f;
const char *filename;
char buffer[MAXPATLEN+20];
@@ -3490,20 +3495,18 @@ else
filename = name;
}
-while (fgets(buffer, sizeof(buffer), f) != NULL)
+while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
{
- char *s = buffer + (int)strlen(buffer);
- while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
- *s = 0;
+ while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
linenumber++;
- if (buffer[0] == 0) continue; /* Skip blank lines */
+ if (patlen == 0) continue; /* Skip blank lines */
/* Note: this call to add_pattern() puts a pointer to the local variable
"buffer" into the pattern chain. However, that pointer is used only when
compiling the pattern, which happens immediately below, so we flatten it
afterwards, as a precaution against any later code trying to use it. */
- *patlastptr = add_pattern(buffer, *patlastptr);
+ *patlastptr = add_pattern(buffer, patlen, *patlastptr);
if (*patlastptr == NULL)
{
if (f != stdin) fclose(f);
@@ -3513,8 +3516,9 @@ while (fgets(buffer, sizeof(buffer), f) != NULL)
/* This loop is needed because compiling a "pattern" when -F is set may add
on additional literal patterns if the original contains a newline. In the
- common case, it never will, because fgets() stops at a newline. However,
- the -N option can be used to give pcre2grep a different newline setting. */
+ common case, it never will, because read_one_line() stops at a newline.
+ However, the -N option can be used to give pcre2grep a different newline
+ setting. */
for(;;)
{
@@ -3833,7 +3837,8 @@ for (i = 1; i < argc; i++)
else if (op->type == OP_PATLIST)
{
patdatastr *pd = (patdatastr *)op->dataptr;
- *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
+ *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
+ *(pd->lastptr));
if (*(pd->lastptr) == NULL) goto EXIT2;
if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
}
@@ -4095,7 +4100,9 @@ the first argument is the one and only pattern, and it must exist. */
if (patterns == NULL && pattern_files == NULL)
{
if (i >= argc) return usage(2);
- patterns = patterns_last = add_pattern(argv[i++], NULL);
+ patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
+ NULL);
+ i++;
if (patterns == NULL) goto EXIT2;
}
diff --git a/testdata/grepoutput b/testdata/grepoutput
index e49c2b2..9329248 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -945,3 +945,6 @@ RC=0
RC=0
abcd
RC=0
+---------------------------- Test 126 -----------------------------
+ABCXYZ
+RC=0
--
2.13.6

View File

@ -1,120 +0,0 @@
From ea6f7a508aaa2fd61eb60d7759fe00713f46cd5c Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 19 Feb 2018 17:26:33 +0000
Subject: [PATCH] Fix \C bug with repeated character classes in UTF-8 mode.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@918 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.31.
---
src/pcre2_match.c | 16 ++++++++++++----
testdata/testinput22 | 3 +++
testdata/testoutput22-16 | 4 ++++
testdata/testoutput22-32 | 4 ++++
testdata/testoutput22-8 | 4 ++++
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 79cc93f..ce96016 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+ go too far. */
+
for (;;)
{
RMATCH(Fecode, RM201);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
+ if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
BACKCHAR(Feptr);
}
}
@@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+ go too far. */
+
for(;;)
{
RMATCH(Fecode, RM101);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
+ if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
#ifdef SUPPORT_UNICODE
if (utf) BACKCHAR(Feptr);
#endif
@@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
- Unicode character. Use <= pp to ensure backtracking doesn't go too far.
- */
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+ go too far. */
for(;;)
{
diff --git a/testdata/testinput22 b/testdata/testinput22
index e6d4053..c218ea6 100644
--- a/testdata/testinput22
+++ b/testdata/testinput22
@@ -98,4 +98,7 @@
\= Expect no match - tests \C at end of subject
ab
+/\C[^\v]+\x80/utf
+ [AΏBŀC]
+
# End of testinput22
diff --git a/testdata/testoutput22-16 b/testdata/testoutput22-16
index 88f827c..5e23611 100644
--- a/testdata/testoutput22-16
+++ b/testdata/testoutput22-16
@@ -171,4 +171,8 @@ No match
ab
No match
+/\C[^\v]+\x80/utf
+ [AΏBŀC]
+No match
+
# End of testinput22
diff --git a/testdata/testoutput22-32 b/testdata/testoutput22-32
index ac485fc..8576f31 100644
--- a/testdata/testoutput22-32
+++ b/testdata/testoutput22-32
@@ -169,4 +169,8 @@ No match
ab
No match
+/\C[^\v]+\x80/utf
+ [AΏBŀC]
+No match
+
# End of testinput22
diff --git a/testdata/testoutput22-8 b/testdata/testoutput22-8
index 3d31fbc..8543652 100644
--- a/testdata/testoutput22-8
+++ b/testdata/testoutput22-8
@@ -173,4 +173,8 @@ No match
ab
No match
+/\C[^\v]+\x80/utf
+ [AΏBŀC]
+No match
+
# End of testinput22
--
2.13.6

View File

@ -1,73 +0,0 @@
From c75868f77eb2ce2ff277355afcd966e3179e65a8 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 2 Jul 2018 12:26:04 +0000
Subject: [PATCH] Fix bug in VERSION number reading.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@957 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.31.
---
src/pcre2_compile.c | 5 ++---
testdata/testinput2 | 3 +++
testdata/testoutput2 | 6 +++++-
4 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 5a47f1d..1208012 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -3896,9 +3896,8 @@ while (ptr < ptrend)
if (*ptr == CHAR_DOT)
{
if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
- if (!read_number(&ptr, ptrend, -1, 99 , ERR79, &minor, &errorcode))
- goto FAILED;
- if (minor < 10) minor *= 10;
+ minor = (*ptr++ - CHAR_0) * 10;
+ if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;
if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
goto BAD_VERSION_CONDITION;
}
diff --git a/testdata/testinput2 b/testdata/testinput2
index cc4b59b..9b207ef 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4007,6 +4007,9 @@
/(?(VERSION>=10.0)yes|no)/I
yesno
+/(?(VERSION>=10.04)yes|no)/
+ yesno
+
/(?(VERSION=8)yes){3}/BI,aftertext
yesno
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index aab0c94..124a8b6 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -13483,6 +13483,10 @@ Subject length lower bound = 2
yesno
0: yes
+/(?(VERSION>=10.04)yes|no)/
+ yesno
+ 0: yes
+
/(?(VERSION=8)yes){3}/BI,aftertext
------------------------------------------------------------------
Bra
@@ -13537,7 +13541,7 @@ Failed: error 179 at offset 11: syntax error or number too big in (?(VERSION con
Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition
/(?(VERSION=10.101)yes|no)/
-Failed: error 179 at offset 17: syntax error or number too big in (?(VERSION condition
+Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition
/abcd/I
Capturing subpattern count = 0
--
2.14.4

View File

@ -1,179 +0,0 @@
From 0efedaf8864d1caa8ed0e7f8fb0b50d5231cacfa Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Fri, 22 Jun 2018 16:29:56 +0000
Subject: [PATCH] Fix bug when \K is used in a lookbehind in a substitute
pattern.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@948 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař <ppisar@redhat.com>: Ported to 10.31.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
doc/html/pcre2api.html | 14 ++++++++++++--
doc/pcre2.txt | 14 ++++++++++++--
doc/pcre2api.3 | 3 ++-
src/pcre2_error.c | 2 +-
src/pcre2_substitute.c | 6 +++---
testdata/testinput2 | 3 +++
testdata/testoutput2 | 6 +++++-
7 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html
index ba3b2ca..af904e6 100644
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@@ -2549,7 +2549,7 @@ calls to <b>pcre2_match()</b> if you are making repeated calls to find other
matches in the same subject string.
</P>
<P>
-WARNING: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid
+<b>Warning:</b> When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid
string as a subject, or an invalid value of <i>startoffset</i>, is undefined.
Your program may crash or loop indefinitely.
<pre>
@@ -2756,6 +2756,15 @@ branch of the group, but it is not on the matching path. On the other hand,
when this pattern fails to match "bx", the returned name is B.
</P>
<P>
+<b>Warning:</b> By default, certain start-of-match optimizations are used to
+give a fast "no match" result in some situations. For example, if the anchoring
+is removed from the pattern above, there is an initial check for the presence
+of "c" in the subject before running the matching engine. This check fails for
+"bx", causing a match failure without seeing any marks. You can disable the
+start-of-match optimizations by setting the PCRE2_NO_START_OPTIMIZE option for
+<b>pcre2_compile()</b> or starting the pattern with (*NO_START_OPT).
+</P>
+<P>
After a successful match, a partial match, or one of the invalid UTF errors
(for example, PCRE2_ERROR_UTF8_ERR5), <b>pcre2_get_startchar()</b> can be
called. After a successful or partial match it returns the code unit offset of
@@ -3310,7 +3319,8 @@ replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket
not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group
substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before
-it started, which can happen if \K is used in an assertion).
+it started or the match started earlier than the current position in the
+subject, which can happen if \K is used in an assertion).
</P>
<P>
As for all PCRE2 errors, a text message that describes the error can be
diff --git a/doc/pcre2.txt b/doc/pcre2.txt
index 79d94e3..e5b941f 100644
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@@ -2498,7 +2498,7 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION
second and subsequent calls to pcre2_match() if you are making repeated
calls to find other matches in the same subject string.
- WARNING: When PCRE2_NO_UTF_CHECK is set, the effect of passing an
+ Warning: When PCRE2_NO_UTF_CHECK is set, the effect of passing an
invalid string as a subject, or an invalid value of startoffset, is
undefined. Your program may crash or loop indefinitely.
@@ -2683,6 +2683,15 @@ OTHER INFORMATION ABOUT A MATCH
the other hand, when this pattern fails to match "bx", the returned
name is B.
+ Warning: By default, certain start-of-match optimizations are used to
+ give a fast "no match" result in some situations. For example, if the
+ anchoring is removed from the pattern above, there is an initial check
+ for the presence of "c" in the subject before running the matching
+ engine. This check fails for "bx", causing a match failure without see-
+ ing any marks. You can disable the start-of-match optimizations by set-
+ ting the PCRE2_NO_START_OPTIMIZE option for pcre2_compile() or starting
+ the pattern with (*NO_START_OPT).
+
After a successful match, a partial match, or one of the invalid UTF
errors (for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can
be called. After a successful or partial match it returns the code unit
@@ -3209,7 +3218,8 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REP-
MISSINGBRACE (closing curly bracket not found), PCRE2_ERROR_BADSUBSTI-
TUTION (syntax error in extended group substitution), and
- PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before it started,
+ PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before it started
+ or the match started earlier than the current position in the subject,
which can happen if \K is used in an assertion).
As for all PCRE2 errors, a text message that describes the error can be
diff --git a/doc/pcre2api.3 b/doc/pcre2api.3
index 786b314..ac6e246 100644
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@@ -3302,7 +3302,8 @@ replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket
not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group
substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before
-it started, which can happen if \eK is used in an assertion).
+it started or the match started earlier than the current position in the
+subject, which can happen if \eK is used in an assertion).
.P
As for all PCRE2 errors, a text message that describes the error can be
obtained by calling the \fBpcre2_get_error_message()\fP function (see
diff --git a/src/pcre2_error.c b/src/pcre2_error.c
index d98cae9..a1f98d4 100644
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@@ -255,7 +255,7 @@ static const unsigned char match_error_texts[] =
"expected closing curly bracket in replacement string\0"
"bad substitution in replacement string\0"
/* 60 */
- "match with end before start is not supported\0"
+ "match with end before start or start moved backwards is not supported\0"
"too many replacements (more than INT_MAX)\0"
"bad serialized data\0"
"heap limit exceeded\0"
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index 8da951f..955370a 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -361,9 +361,9 @@ do
}
/* Handle a successful match. Matches that use \K to end before they start
- are not supported. */
-
- if (ovector[1] < ovector[0])
+ or start before the current point in the subject are not supported. */
+
+ if (ovector[1] < ovector[0] || ovector[0] < start_offset)
{
rc = PCRE2_ERROR_BADSUBSPATTERN;
goto EXIT;
diff --git a/testdata/testinput2 b/testdata/testinput2
index 5d3a80e..3499042 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4643,6 +4643,9 @@ B)x/alt_verbnames,mark
/(?=a\K)/replace=z
BaCaD
+
+/(?<=\K.)/g,replace=-
+ ab
/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index fcaac8f..f9e128d 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14899,7 +14899,11 @@ Subject length lower bound = 1
/(?=a\K)/replace=z
BaCaD
-Failed: error -60: match with end before start is not supported
+Failed: error -60: match with end before start or start moved backwards is not supported
+
+/(?<=\K.)/g,replace=-
+ ab
+Failed: error -60: match with end before start or start moved backwards is not supported
/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/
Failed: error 148 at offset 36: subpattern name is too long (maximum 32 characters)
--
2.14.4

View File

@ -1,139 +0,0 @@
From 1247796cd3cffa4cfea368decfdbaf13b276bfe3 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 4 Aug 2018 08:20:18 +0000
Subject: [PATCH] Fix dynamic options changing bug.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@979 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.31.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_compile.c | 29 +++++++++++++++++------------
testdata/testinput1 | 5 +++++
testdata/testoutput1 | 8 ++++++++
3 files changed, 30 insertions(+), 12 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 1d62a38..9898d06 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -2251,11 +2251,14 @@ typedef struct nest_save {
#define NSF_RESET 0x0001u
#define NSF_CONDASSERT 0x0002u
-/* Of the options that are changeable within the pattern, these are tracked
-during parsing. The rest are used from META_OPTIONS items when compiling. */
+/* Options that are changeable within the pattern must be tracked during
+parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing,
+but all must be tracked so that META_OPTIONS items set the correct values for
+the main compiling phase. */
-#define PARSE_TRACKED_OPTIONS \
- (PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE)
+#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
+ PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
+ PCRE2_UNGREEDY)
/* States used for analyzing ranges in character classes. The two OK values
must be last. */
@@ -2434,16 +2437,16 @@ while (ptr < ptrend)
/* EITHER: not both options set */
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
-#ifdef SUPPORT_UNICODE
+#ifdef SUPPORT_UNICODE
/* OR: character > 255 AND not Unicode Pattern White Space */
(c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) ||
-#endif
+#endif
/* OR: not a # comment or isspace() white space */
(c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0
#ifdef SUPPORT_UNICODE
/* and not CHAR_NEL when Unicode is supported */
&& c != CHAR_NEL
-#endif
+#endif
)))
{
PCRE2_SIZE verbnamelength;
@@ -2518,16 +2521,16 @@ while (ptr < ptrend)
character, not a code unit, so we must not use MAX_255 to test its size
because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The
whitespace characters are those designated as "Pattern White Space" by
- Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
- U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
+ Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
+ U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
subset of space characters that match \h and \v. */
if ((options & PCRE2_EXTENDED) != 0)
{
if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
-#ifdef SUPPORT_UNICODE
+#ifdef SUPPORT_UNICODE
if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue;
-#endif
+#endif
if (c == CHAR_NUMBER_SIGN)
{
while (ptr < ptrend)
@@ -3534,6 +3537,8 @@ while (ptr < ptrend)
else
{
+ uint32_t oldoptions = options;
+
top_nest->reset_group = 0;
top_nest->max_group = 0;
set = unset = 0;
@@ -3604,7 +3609,7 @@ while (ptr < ptrend)
/* If nothing changed, no need to record. */
- if (set != 0 || unset != 0)
+ if (options != oldoptions)
{
*parsed_pattern++ = META_OPTIONS;
*parsed_pattern++ = options;
diff --git a/testdata/testinput1 b/testdata/testinput1
index cc11288..5b9c4df 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -2184,6 +2184,11 @@
Blah blah
blaH blah
+/((?i)blah)\s+(?m)A(?i:\1)/
+ blah ABLAH
+\= Expect no match
+ blah aBLAH
+
/(?>a*)*/
a
aa
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 2fd2d48..f58076f 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -3346,6 +3346,14 @@ No match
0: blaH blah
1: blaH
+/((?i)blah)\s+(?m)A(?i:\1)/
+ blah ABLAH
+ 0: blah ABLAH
+ 1: blah
+\= Expect no match
+ blah aBLAH
+No match
+
/(?>a*)*/
a
0: a
--
2.14.4

View File

@ -1,708 +0,0 @@
From 7729d10594572b5e5a3ebfa89064cc176ba50c7e Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 2 Jul 2018 10:54:03 +0000
Subject: [PATCH] Fix global search/replace in pcre2test and pcre2_substitute()
when the pattern matches an empty string, but never at the starting offset.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@955 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.31.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
RunTest | 2 +-
doc/html/pcre2api.html | 5 +-
doc/html/pcre2pattern.html | 5 +-
doc/pcre2.txt | 175 ++++++++++++++++++++++++---------------------
doc/pcre2api.3 | 5 +-
src/pcre2.h.in | 3 +-
src/pcre2_error.c | 4 +-
src/pcre2_substitute.c | 41 +++++++++--
src/pcre2test.c | 77 ++++++++++++--------
testdata/testinput1 | 3 +
testdata/testinput2 | 3 +
testdata/testoutput1 | 9 +++
testdata/testoutput2 | 6 +-
13 files changed, 214 insertions(+), 124 deletions(-)
diff --git a/RunTest b/RunTest
index bc912da..f20f194 100755
--- a/RunTest
+++ b/RunTest
@@ -500,7 +500,7 @@ for bmode in "$test8" "$test16" "$test32"; do
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
if [ $? = 0 ] ; then
- $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -65,-62,-2,-1,0,100,101,191,200 >>testtry
+ $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -70,-62,-2,-1,0,100,101,191,200 >>testtry
checkresult $? 2 "$opt"
fi
done
diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html
index ba3b2ca..daa32a9 100644
--- a/doc/html/pcre2api.html
+++ b/doc/html/pcre2api.html
@@ -3108,7 +3108,10 @@ string in <i>outputbuffer</i>, replacing the part that was matched with the
<i>replacement</i> string, whose length is supplied in <b>rlength</b>. This can
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
which a \K item in a lookahead in the pattern causes the match to end before
-it starts are not supported, and give rise to an error return.
+it starts are not supported, and give rise to an error return. For global
+replacements, matches in which \K in a lookbehind causes the match to start
+earlier than the point that was reached in the previous iteration are also not
+supported.
</P>
<P>
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
diff --git a/doc/html/pcre2pattern.html b/doc/html/pcre2pattern.html
index c495cba..bc07e8b 100644
--- a/doc/html/pcre2pattern.html
+++ b/doc/html/pcre2pattern.html
@@ -1082,8 +1082,9 @@ sequences but the characters that they represent.)
Resetting the match start
</b><br>
<P>
-The escape sequence \K causes any previously matched characters not to be
-included in the final matched sequence. For example, the pattern:
+In normal use, the escape sequence \K causes any previously matched characters
+not to be included in the final matched sequence that is returned. For example,
+the pattern:
<pre>
foo\Kbar
</pre>
diff --git a/doc/pcre2.txt b/doc/pcre2.txt
index 79d94e3..a82f857 100644
--- a/doc/pcre2.txt
+++ b/doc/pcre2.txt
@@ -3014,75 +3014,78 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
replacement string, whose length is supplied in rlength. This can be
given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
which a \K item in a lookahead in the pattern causes the match to end
- before it starts are not supported, and give rise to an error return.
+ before it starts are not supported, and give rise to an error return.
+ For global replacements, matches in which \K in a lookbehind causes the
+ match to start earlier than the point that was reached in the previous
+ iteration are also not supported.
- The first seven arguments of pcre2_substitute() are the same as for
+ The first seven arguments of pcre2_substitute() are the same as for
pcre2_match(), except that the partial matching options are not permit-
- ted, and match_data may be passed as NULL, in which case a match data
- block is obtained and freed within this function, using memory manage-
- ment functions from the match context, if provided, or else those that
+ ted, and match_data may be passed as NULL, in which case a match data
+ block is obtained and freed within this function, using memory manage-
+ ment functions from the match context, if provided, or else those that
were used to allocate memory for the compiled code.
- The outlengthptr argument must point to a variable that contains the
- length, in code units, of the output buffer. If the function is suc-
- cessful, the value is updated to contain the length of the new string,
+ The outlengthptr argument must point to a variable that contains the
+ length, in code units, of the output buffer. If the function is suc-
+ cessful, the value is updated to contain the length of the new string,
excluding the trailing zero that is automatically added.
- If the function is not successful, the value set via outlengthptr
- depends on the type of error. For syntax errors in the replacement
- string, the value is the offset in the replacement string where the
- error was detected. For other errors, the value is PCRE2_UNSET by
- default. This includes the case of the output buffer being too small,
- unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set (see below), in which
- case the value is the minimum length needed, including space for the
- trailing zero. Note that in order to compute the required length,
- pcre2_substitute() has to simulate all the matching and copying,
+ If the function is not successful, the value set via outlengthptr
+ depends on the type of error. For syntax errors in the replacement
+ string, the value is the offset in the replacement string where the
+ error was detected. For other errors, the value is PCRE2_UNSET by
+ default. This includes the case of the output buffer being too small,
+ unless PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set (see below), in which
+ case the value is the minimum length needed, including space for the
+ trailing zero. Note that in order to compute the required length,
+ pcre2_substitute() has to simulate all the matching and copying,
instead of giving an error return as soon as the buffer overflows. Note
also that the length is in code units, not bytes.
- In the replacement string, which is interpreted as a UTF string in UTF
- mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK
+ In the replacement string, which is interpreted as a UTF string in UTF
+ mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK
option is set, a dollar character is an escape character that can spec-
- ify the insertion of characters from capturing groups or (*MARK),
- (*PRUNE), or (*THEN) items in the pattern. The following forms are
+ ify the insertion of characters from capturing groups or (*MARK),
+ (*PRUNE), or (*THEN) items in the pattern. The following forms are
always recognized:
$$ insert a dollar character
$<n> or ${<n>} insert the contents of group <n>
$*MARK or ${*MARK} insert a (*MARK), (*PRUNE), or (*THEN) name
- Either a group number or a group name can be given for <n>. Curly
- brackets are required only if the following character would be inter-
+ Either a group number or a group name can be given for <n>. Curly
+ brackets are required only if the following character would be inter-
preted as part of the number or name. The number may be zero to include
- the entire matched string. For example, if the pattern a(b)c is
- matched with "=abc=" and the replacement string "+$1$0$1+", the result
+ the entire matched string. For example, if the pattern a(b)c is
+ matched with "=abc=" and the replacement string "+$1$0$1+", the result
is "=+babcb+=".
$*MARK inserts the name from the last encountered (*MARK), (*PRUNE), or
- (*THEN) on the matching path that has a name. (*MARK) must always
- include a name, but (*PRUNE) and (*THEN) need not. For example, in the
- case of (*MARK:A)(*PRUNE) the name inserted is "A", but for
- (*MARK:A)(*PRUNE:B) the relevant name is "B". This facility can be
- used to perform simple simultaneous substitutions, as this pcre2test
+ (*THEN) on the matching path that has a name. (*MARK) must always
+ include a name, but (*PRUNE) and (*THEN) need not. For example, in the
+ case of (*MARK:A)(*PRUNE) the name inserted is "A", but for
+ (*MARK:A)(*PRUNE:B) the relevant name is "B". This facility can be
+ used to perform simple simultaneous substitutions, as this pcre2test
example shows:
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
apple lemon
2: pear orange
- As well as the usual options for pcre2_match(), a number of additional
+ As well as the usual options for pcre2_match(), a number of additional
options can be set in the options argument of pcre2_substitute().
PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject
- string, replacing every matching substring. If this option is not set,
- only the first matching substring is replaced. The search for matches
- takes place in the original subject string (that is, previous replace-
- ments do not affect it). Iteration is implemented by advancing the
- startoffset value for each search, which is always passed the entire
+ string, replacing every matching substring. If this option is not set,
+ only the first matching substring is replaced. The search for matches
+ takes place in the original subject string (that is, previous replace-
+ ments do not affect it). Iteration is implemented by advancing the
+ startoffset value for each search, which is always passed the entire
subject string. If an offset limit is set in the match context, search-
ing stops when that limit is reached.
- You can restrict the effect of a global substitution to a portion of
+ You can restrict the effect of a global substitution to a portion of
the subject string by setting either or both of startoffset and an off-
set limit. Here is a pcre2test example:
@@ -3090,87 +3093,87 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
ABC ABC ABC ABC\=offset=3,offset_limit=12
2: ABC A!C A!C ABC
- When continuing with global substitutions after matching a substring
+ When continuing with global substitutions after matching a substring
with zero length, an attempt to find a non-empty match at the same off-
set is performed. If this is not successful, the offset is advanced by
one character except when CRLF is a valid newline sequence and the next
- two characters are CR, LF. In this case, the offset is advanced by two
+ two characters are CR, LF. In this case, the offset is advanced by two
characters.
- PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output
+ PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output
buffer is too small. The default action is to return PCRE2_ERROR_NOMEM-
- ORY immediately. If this option is set, however, pcre2_substitute()
+ ORY immediately. If this option is set, however, pcre2_substitute()
continues to go through the motions of matching and substituting (with-
- out, of course, writing anything) in order to compute the size of buf-
- fer that is needed. This value is passed back via the outlengthptr
- variable, with the result of the function still being
+ out, of course, writing anything) in order to compute the size of buf-
+ fer that is needed. This value is passed back via the outlengthptr
+ variable, with the result of the function still being
PCRE2_ERROR_NOMEMORY.
- Passing a buffer size of zero is a permitted way of finding out how
- much memory is needed for given substitution. However, this does mean
+ Passing a buffer size of zero is a permitted way of finding out how
+ much memory is needed for given substitution. However, this does mean
that the entire operation is carried out twice. Depending on the appli-
- cation, it may be more efficient to allocate a large buffer and free
- the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER-
+ cation, it may be more efficient to allocate a large buffer and free
+ the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER-
FLOW_LENGTH.
- PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capturing groups
- that do not appear in the pattern to be treated as unset groups. This
- option should be used with care, because it means that a typo in a
- group name or number no longer causes the PCRE2_ERROR_NOSUBSTRING
+ PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capturing groups
+ that do not appear in the pattern to be treated as unset groups. This
+ option should be used with care, because it means that a typo in a
+ group name or number no longer causes the PCRE2_ERROR_NOSUBSTRING
error.
- PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capturing groups (including
+ PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capturing groups (including
unknown groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be
- treated as empty strings when inserted as described above. If this
- option is not set, an attempt to insert an unset group causes the
- PCRE2_ERROR_UNSET error. This option does not influence the extended
+ treated as empty strings when inserted as described above. If this
+ option is not set, an attempt to insert an unset group causes the
+ PCRE2_ERROR_UNSET error. This option does not influence the extended
substitution syntax described below.
- PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the
- replacement string. Without this option, only the dollar character is
- special, and only the group insertion forms listed above are valid.
+ PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the
+ replacement string. Without this option, only the dollar character is
+ special, and only the group insertion forms listed above are valid.
When PCRE2_SUBSTITUTE_EXTENDED is set, two things change:
- Firstly, backslash in a replacement string is interpreted as an escape
+ Firstly, backslash in a replacement string is interpreted as an escape
character. The usual forms such as \n or \x{ddd} can be used to specify
- particular character codes, and backslash followed by any non-alphanu-
- meric character quotes that character. Extended quoting can be coded
+ particular character codes, and backslash followed by any non-alphanu-
+ meric character quotes that character. Extended quoting can be coded
using \Q...\E, exactly as in pattern strings.
- There are also four escape sequences for forcing the case of inserted
- letters. The insertion mechanism has three states: no case forcing,
+ There are also four escape sequences for forcing the case of inserted
+ letters. The insertion mechanism has three states: no case forcing,
force upper case, and force lower case. The escape sequences change the
current state: \U and \L change to upper or lower case forcing, respec-
- tively, and \E (when not terminating a \Q quoted sequence) reverts to
- no case forcing. The sequences \u and \l force the next character (if
- it is a letter) to upper or lower case, respectively, and then the
+ tively, and \E (when not terminating a \Q quoted sequence) reverts to
+ no case forcing. The sequences \u and \l force the next character (if
+ it is a letter) to upper or lower case, respectively, and then the
state automatically reverts to no case forcing. Case forcing applies to
all inserted characters, including those from captured groups and let-
ters within \Q...\E quoted sequences.
Note that case forcing sequences such as \U...\E do not nest. For exam-
- ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final
+ ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final
\E has no effect.
- The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
- flexibility to group substitution. The syntax is similar to that used
+ The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
+ flexibility to group substitution. The syntax is similar to that used
by Bash:
${<n>:-<string>}
${<n>:+<string1>:<string2>}
- As before, <n> may be a group number or a name. The first form speci-
- fies a default value. If group <n> is set, its value is inserted; if
- not, <string> is expanded and the result inserted. The second form
- specifies strings that are expanded and inserted when group <n> is set
- or unset, respectively. The first form is just a convenient shorthand
+ As before, <n> may be a group number or a name. The first form speci-
+ fies a default value. If group <n> is set, its value is inserted; if
+ not, <string> is expanded and the result inserted. The second form
+ specifies strings that are expanded and inserted when group <n> is set
+ or unset, respectively. The first form is just a convenient shorthand
for
${<n>:+${<n>}:<string>}
- Backslash can be used to escape colons and closing curly brackets in
- the replacement strings. A change of the case forcing state within a
- replacement string remains in force afterwards, as shown in this
+ Backslash can be used to escape colons and closing curly brackets in
+ the replacement strings. A change of the case forcing state within a
+ replacement string remains in force afterwards, as shown in this
pcre2test example:
/(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo
@@ -6614,8 +6617,9 @@ BACKSLASH
Resetting the match start
- The escape sequence \K causes any previously matched characters not to
- be included in the final matched sequence. For example, the pattern:
+ In normal use, the escape sequence \K causes any previously matched
+ characters not to be included in the final matched sequence that is
+ returned. For example, the pattern:
foo\Kbar
@@ -6634,7 +6638,16 @@ BACKSLASH
defined". In PCRE2, \K is acted upon when it occurs inside positive
assertions, but is ignored in negative assertions. Note that when a
pattern such as (?=ab\K) matches, the reported start of the match can
- be greater than the end of the match.
+ be greater than the end of the match. Using \K in a lookbehind asser-
+ tion at the start of a pattern can also lead to odd effects. For exam-
+ ple, consider this pattern:
+
+ (?<=\Kfoo)bar
+
+ If the subject is "foobar", a call to pcre2_match() with a starting
+ offset of 3 succeeds and reports the matching string as "foobar", that
+ is, the start of the reported match is earlier than where the match
+ started.
Simple assertions
diff --git a/doc/pcre2api.3 b/doc/pcre2api.3
index 786b314..57b6d31 100644
--- a/doc/pcre2api.3
+++ b/doc/pcre2api.3
@@ -3122,7 +3122,10 @@ string in \fIoutputbuffer\fP, replacing the part that was matched with the
\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
which a \eK item in a lookahead in the pattern causes the match to end before
-it starts are not supported, and give rise to an error return.
+it starts are not supported, and give rise to an error return. For global
+replacements, matches in which \eK in a lookbehind causes the match to start
+earlier than the point that was reached in the previous iteration are also not
+supported.
.P
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
\fBpcre2_match()\fP, except that the partial matching options are not
diff --git a/src/pcre2.h.in b/src/pcre2.h.in
index a3a3fa6..0bc8cca 100644
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions.
- Copyright (c) 2016-2017 University of Cambridge
+ Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -387,6 +387,7 @@ released, the numbers must not be changed. */
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63)
#define PCRE2_ERROR_CONVERT_SYNTAX (-64)
+#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
/* Request types for pcre2_pattern_info() */
diff --git a/src/pcre2_error.c b/src/pcre2_error.c
index d98cae9..dce1efb 100644
--- a/src/pcre2_error.c
+++ b/src/pcre2_error.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -260,6 +260,8 @@ static const unsigned char match_error_texts[] =
"bad serialized data\0"
"heap limit exceeded\0"
"invalid syntax\0"
+ /* 65 */
+ "internal error - duplicate substitution match\0"
;
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index 8da951f..582a42d 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -238,10 +238,12 @@ PCRE2_SPTR repend;
PCRE2_SIZE extra_needed = 0;
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
PCRE2_SIZE *ovector;
+PCRE2_SIZE ovecsave[3];
buff_offset = 0;
lengthleft = buff_length = *blength;
*blength = PCRE2_UNSET;
+ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
/* Partial matching is not valid. */
@@ -368,6 +370,26 @@ do
rc = PCRE2_ERROR_BADSUBSPATTERN;
goto EXIT;
}
+
+ /* Check for the same match as previous. This is legitimate after matching an
+ empty string that starts after the initial match offset. We have tried again
+ at the match point in case the pattern is one like /(?<=\G.)/ which can never
+ match at its starting point, so running the match achieves the bumpalong. If
+ we do get the same (null) match at the original match point, it isn't such a
+ pattern, so we now do the empty string magic. In all other cases, a repeat
+ match should never occur. */
+
+ if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
+ {
+ if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
+ {
+ goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+ ovecsave[2] = start_offset;
+ continue; /* Back to the top of the loop */
+ }
+ rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
+ goto EXIT;
+ }
/* Count substitutions with a paranoid check for integer overflow; surely no
real call to this function would ever hit this! */
@@ -799,13 +821,18 @@ do
} /* End handling a literal code unit */
} /* End of loop for scanning the replacement. */
- /* The replacement has been copied to the output. Update the start offset to
- point to the rest of the subject string. If we matched an empty string,
- do the magic for global matches. */
-
- start_offset = ovector[1];
- goptions = (ovector[0] != ovector[1])? 0 :
+ /* The replacement has been copied to the output. Save the details of this
+ match. See above for how this data is used. If we matched an empty string, do
+ the magic for global matches. Finally, update the start offset to point to
+ the rest of the subject string. */
+
+ ovecsave[0] = ovector[0];
+ ovecsave[1] = ovector[1];
+ ovecsave[2] = start_offset;
+
+ goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
+ start_offset = ovector[1];
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
/* Copy the rest of the subject. */
diff --git a/src/pcre2test.c b/src/pcre2test.c
index ad3db2c..d83aa43 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -6283,6 +6283,7 @@ size_t needlen;
void *use_dat_context;
BOOL utf;
BOOL subject_literal;
+PCRE2_SIZE ovecsave[3];
#ifdef SUPPORT_PCRE2_8
uint8_t *q8 = NULL;
@@ -6929,6 +6930,9 @@ if (dat_datctl.replacement[0] != 0)
if (timeitm)
fprintf(outfile, "** Timing is not supported with replace: ignored\n");
+
+ if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
+ fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
PCRE2_SUBSTITUTE_GLOBAL) |
@@ -7048,35 +7052,24 @@ if (dat_datctl.replacement[0] != 0)
}
fprintf(outfile, "\n");
+ show_memory = FALSE;
+ return PR_OK;
} /* End of substitution handling */
/* When a replacement string is not provided, run a loop for global matching
-with one of the basic matching functions. */
+with one of the basic matching functions. For altglobal (or first time round
+the loop), set an "unset" value for the previous match info. */
+
+ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
-else for (gmatched = 0;; gmatched++)
+for (gmatched = 0;; gmatched++)
{
PCRE2_SIZE j;
int capcount;
PCRE2_SIZE *ovector;
- PCRE2_SIZE ovecsave[2];
ovector = FLD(match_data, ovector);
- /* After the first time round a global loop, for a normal global (/g)
- iteration, save the current ovector[0,1] so that we can check that they do
- change each time. Otherwise a matching bug that returns the same string
- causes an infinite loop. It has happened! */
-
- if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0)
- {
- ovecsave[0] = ovector[0];
- ovecsave[1] = ovector[1];
- }
-
- /* For altglobal (or first time round the loop), set an "unset" value. */
-
- else ovecsave[0] = ovecsave[1] = PCRE2_UNSET;
-
/* Fill the ovector with junk to detect elements that do not get set
when they should be. */
@@ -7243,12 +7236,23 @@ else for (gmatched = 0;; gmatched++)
}
/* If this is not the first time round a global loop, check that the
- returned string has changed. If not, there is a bug somewhere and we must
- break the loop because it will go on for ever. We know that there are
- always at least two elements in the ovector. */
-
+ returned string has changed. If it has not, check for an empty string match
+ at different starting offset from the previous match. This is a failed test
+ retry for null-matching patterns that don't match at their starting offset,
+ for example /(?<=\G.)/. A repeated match at the same point is not such a
+ pattern, and must be discarded, and we then proceed to seek a non-null
+ match at the current point. For any other repeated match, there is a bug
+ somewhere and we must break the loop because it will go on for ever. We
+ know that there are always at least two elements in the ovector. */
+
if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
{
+ if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
+ {
+ g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+ ovecsave[2] = dat_datctl.offset;
+ continue; /* Back to the top of the loop */
+ }
fprintf(outfile,
"** PCRE2 error: global repeat returned the same string as previous\n");
fprintf(outfile, "** Global loop abandoned\n");
@@ -7556,6 +7560,7 @@ else for (gmatched = 0;; gmatched++)
if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
{
+ PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
/* We must now set up for the next iteration of a global search. If we have
@@ -7563,12 +7568,19 @@ else for (gmatched = 0;; gmatched++)
subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
at the same point. If this fails it will be picked up above, where a fake
- match is set up so that at this point we advance to the next character. */
-
- if (FLD(match_data, ovector)[0] == end_offset)
+ match is set up so that at this point we advance to the next character.
+
+ However, in order to cope with patterns that never match at their starting
+ offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
+ than the starting offset. This means there will be a retry with the
+ starting offset at the match offset. If this returns the same match again,
+ it is picked up above and ignored, and the special action is then taken. */
+
+ if (match_offset == end_offset)
{
- if (end_offset == ulen) break; /* End of subject */
- g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+ if (end_offset == ulen) break; /* End of subject */
+ if (match_offset <= dat_datctl.offset)
+ g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
}
/* However, even after matching a non-empty string, there is still one
@@ -7606,10 +7618,19 @@ else for (gmatched = 0;; gmatched++)
}
}
- /* For /g (global), update the start offset, leaving the rest alone. */
+ /* For a normal global (/g) iteration, save the current ovector[0,1] and
+ the starting offset so that we can check that they do change each time.
+ Otherwise a matching bug that returns the same string causes an infinite
+ loop. It has happened! Then update the start offset, leaving other
+ parameters alone. */
if ((dat_datctl.control & CTL_GLOBAL) != 0)
+ {
+ ovecsave[0] = ovector[0];
+ ovecsave[1] = ovector[1];
+ ovecsave[2] = dat_datctl.offset;
dat_datctl.offset = end_offset;
+ }
/* For altglobal, just update the pointer and length. */
diff --git a/testdata/testinput1 b/testdata/testinput1
index 9a9c5fd..fb50238 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -6189,4 +6189,7 @@ ef) x/x,mark
/(?=a+)a(a+)++b/
aab
+/(?<=\G.)/g,aftertext
+ abc
+
# End of testinput1
diff --git a/testdata/testinput2 b/testdata/testinput2
index 5d3a80e..797b0f7 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4935,6 +4935,9 @@ a)"xI
//replace=0
\=offset=7
+/(?<=\G.)/g,replace=+
+ abc
+
".+\QX\E+"B,no_auto_possess
".+\QX\E+"B,auto_callout,no_auto_possess
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 9c55be9..348dcbc 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -9822,4 +9822,13 @@ No match
0: aab
1: a
+/(?<=\G.)/g,aftertext
+ abc
+ 0:
+ 0+ bc
+ 0:
+ 0+ c
+ 0:
+ 0+
+
# End of testinput1
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index fcaac8f..5c13f5b 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -15545,6 +15545,10 @@ Failed: error -57 at offset 2 in replacement: bad escape sequence in replacement
\=offset=7
Failed: error -33: bad offset value
+/(?<=\G.)/g,replace=+
+ abc
+ 3: a+b+c+
+
".+\QX\E+"B,no_auto_possess
------------------------------------------------------------------
Bra
@@ -16576,7 +16580,7 @@ No match
------------------------------------------------------------------
# End of testinput2
-Error -65: PCRE2_ERROR_BADDATA (unknown error number)
+Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
Error -2: partial match
Error -1: no match
--
2.14.4

View File

@ -1,30 +0,0 @@
From 93c716bf538a172222aa60f77cff5ef46103c125 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 19 Feb 2018 16:55:47 +0000
Subject: [PATCH 1/2] Fix pcre2test -C to correctly show what \R matches.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@916 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.31.
---
src/pcre2test.c | 3 ++-
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 7eca618..40e2161 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -7877,7 +7877,8 @@ else
(void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
print_newline_config(optval, FALSE);
(void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
-printf(" \\R matches %s\n", optval? "CR, LF, or CRLF only" :
+printf(" \\R matches %s\n",
+ (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
"all Unicode newlines");
(void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
printf(" \\C is %ssupported\n", optval? "not ":"");
--
2.13.6

View File

@ -1,133 +0,0 @@
From 1890db00e66f40d659470a8a988d71daf59a29f9 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 19 Feb 2018 14:49:42 +0000
Subject: [PATCH] Fix the value passed back for POSIX unset groups when
REG_STARTEND has a non-zero starting offset, and make pcre2test show relevant
POSIX unset groups.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@915 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.31.
---
src/pcre2posix.c | 10 ++++++----
src/pcre2test.c | 8 ++++++--
testdata/testinput18 | 4 ++++
testdata/testoutput18 | 15 +++++++++++++++
diff --git a/src/pcre2posix.c b/src/pcre2posix.c
index 026943e..5a2f7cd 100644
--- a/src/pcre2posix.c
+++ b/src/pcre2posix.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -93,7 +93,7 @@ information; I know nothing about MSVC myself). For example, something like
void __cdecl function(....)
-might be needed. In order so make this easy, all the exported functions have
+might be needed. In order to make this easy, all the exported functions have
PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not
set, we ensure here that it has no effect. */
@@ -344,8 +344,10 @@ if (rc >= 0)
if ((size_t)rc > nmatch) rc = (int)nmatch;
for (i = 0; i < (size_t)rc; i++)
{
- pmatch[i].rm_so = ovector[i*2] + so;
- pmatch[i].rm_eo = ovector[i*2+1] + so;
+ pmatch[i].rm_so = (ovector[i*2] == PCRE2_UNSET)? -1 :
+ (int)(ovector[i*2] + so);
+ pmatch[i].rm_eo = (ovector[i*2+1] == PCRE2_UNSET)? -1 :
+ (int)(ovector[i*2+1] + so);
}
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
return 0;
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 15bf404..7eca618 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -11,7 +11,7 @@ hacked-up (non-) design had also run out of steam.
Written by Philip Hazel
Original code Copyright (c) 1997-2012 University of Cambridge
- Rewritten code Copyright (c) 2016-2017 University of Cambridge
+ Rewritten code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -6761,13 +6761,17 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
fprintf(outfile, "Matched without capture\n");
else
{
- size_t i;
+ size_t i, j;
+ size_t last_printed = (size_t)dat_datctl.oveccount;
for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
{
if (pmatch[i].rm_so >= 0)
{
PCRE2_SIZE start = pmatch[i].rm_so;
PCRE2_SIZE end = pmatch[i].rm_eo;
+ for (j = last_printed + 1; j < i; j++)
+ fprintf(outfile, "%2d: <unset>\n", (int)j);
+ last_printed = i;
if (start > end)
{
start = pmatch[i].rm_eo;
diff --git a/testdata/testinput18 b/testdata/testinput18
index 755a0c9..563a506 100644
--- a/testdata/testinput18
+++ b/testdata/testinput18
@@ -134,4 +134,8 @@
/a\b(c/literal,posix,dotall
+/((a)(b)?(c))/posix
+ 123ace
+ 123ace\=posix_startend=2:6
+
# End of testdata/testinput18
diff --git a/testdata/testoutput18 b/testdata/testoutput18
index d51423d..d6e3c71 100644
--- a/testdata/testoutput18
+++ b/testdata/testoutput18
@@ -46,6 +46,7 @@
defabc\=noteol
0: def
1: def
+ 2: <unset>
3: def
/the quick brown fox/
@@ -206,4 +207,18 @@ No match: POSIX code 17: match failed
/a\b(c/literal,posix,dotall
Failed: POSIX code 16: bad argument at offset 0
+/((a)(b)?(c))/posix
+ 123ace
+ 0: ac
+ 1: ac
+ 2: a
+ 3: <unset>
+ 4: c
+ 123ace\=posix_startend=2:6
+ 0: ac
+ 1: ac
+ 2: a
+ 3: <unset>
+ 4: c
+
# End of testdata/testinput18
--
2.13.6

View File

@ -1,67 +0,0 @@
From bba8a81a820ffcfc9c88adb0055e12bad66eee4e Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 16 Jul 2018 15:24:32 +0000
Subject: [PATCH] Fixed atomic group backtracking bug.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@965 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 8.31.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_match.c | 2 +-
testdata/testinput1 | 6 ++++++
testdata/testoutput1 | 8 ++++++++
3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index ce96016..c294049 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -5509,7 +5509,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
frame so that it points to the final branch. */
case OP_ONCE:
- Fback_frame = ((char *)F - (char *)P) + frame_size;
+ Fback_frame = ((char *)F - (char *)P);
for (;;)
{
uint32_t y = GET(P->ecode,1);
diff --git a/testdata/testinput1 b/testdata/testinput1
index 1b3191c..cc11288 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -6202,4 +6202,10 @@ ef) x/x,mark
/(?<=(?=.){4,5}x)/
+/(?>a(*:1))(?>b(*:1))(*SKIP:1)x|.*/no_start_optimize
+ abc
+
+/(?>a(*:1))(?>b)(*SKIP:1)x|.*/no_start_optimize
+ abc
+
# End of testinput1
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 06469fa..2fd2d48 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -9841,4 +9841,12 @@ No match
/(?<=(?=.){4,5}x)/
+/(?>a(*:1))(?>b(*:1))(*SKIP:1)x|.*/no_start_optimize
+ abc
+ 0: abc
+
+/(?>a(*:1))(?>b)(*SKIP:1)x|.*/no_start_optimize
+ abc
+ 0: abc
+
# End of testinput1
--
2.14.4

View File

@ -1,133 +0,0 @@
From 8ed58f1ae9e82a5c88c3960af38d5c96b191c554 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 2 Jul 2018 11:23:45 +0000
Subject: [PATCH] Ignore qualifiers on lookaheads within lookbehinds when
checking for a fixed length.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@956 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.31.
---
src/pcre2_compile.c | 26 ++++++++++++++++++++++++++
testdata/testinput1 | 10 ++++++++++
testdata/testinput2 | 2 ++
testdata/testoutput1 | 10 ++++++++++
testdata/testoutput2 | 14 ++++++++++++++
6 files changed, 66 insertions(+)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 0f75f36..5a47f1d 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -8572,6 +8572,32 @@ for (;; pptr++)
case META_LOOKAHEADNOT:
pptr = parsed_skip(pptr + 1, PSKIP_KET);
if (pptr == NULL) goto PARSED_SKIP_FAILED;
+
+ /* Also ignore any qualifiers that follow a lookahead assertion. */
+
+ switch (pptr[1])
+ {
+ case META_ASTERISK:
+ case META_ASTERISK_PLUS:
+ case META_ASTERISK_QUERY:
+ case META_PLUS:
+ case META_PLUS_PLUS:
+ case META_PLUS_QUERY:
+ case META_QUERY:
+ case META_QUERY_PLUS:
+ case META_QUERY_QUERY:
+ pptr++;
+ break;
+
+ case META_MINMAX:
+ case META_MINMAX_PLUS:
+ case META_MINMAX_QUERY:
+ pptr += 3;
+ break;
+
+ default:
+ break;
+ }
break;
/* Lookbehinds can be ignored, but must themselves be checked. */
diff --git a/testdata/testinput1 b/testdata/testinput1
index fb50238..1b3191c 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -6192,4 +6192,14 @@ ef) x/x,mark
/(?<=\G.)/g,aftertext
abc
+/(?<=(?=.)?)/
+
+/(?<=(?=.)?+)/
+
+/(?<=(?=.)*)/
+
+/(?<=(?=.){4,5})/
+
+/(?<=(?=.){4,5}x)/
+
# End of testinput1
diff --git a/testdata/testinput2 b/testdata/testinput2
index 7e703d5..cc4b59b 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5435,4 +5435,6 @@ a)"xI
/(?=a+)a(a+)++b/B
+/(?<=(?=.){4,5}x)/B
+
# End of testinput2
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 348dcbc..06469fa 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -9831,4 +9831,14 @@ No match
0:
0+
+/(?<=(?=.)?)/
+
+/(?<=(?=.)?+)/
+
+/(?<=(?=.)*)/
+
+/(?<=(?=.){4,5})/
+
+/(?<=(?=.){4,5}x)/
+
# End of testinput1
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 9f504f6..aab0c94 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16583,6 +16583,20 @@ No match
End
------------------------------------------------------------------
+/(?<=(?=.){4,5}x)/B
+------------------------------------------------------------------
+ Bra
+ AssertB
+ Reverse
+ Assert
+ Any
+ Ket
+ x
+ Ket
+ Ket
+ End
+------------------------------------------------------------------
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.14.4

View File

@ -1,174 +0,0 @@
From 71340653cd375c01ada053c63d7c55d0ca475b47 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Fri, 3 Aug 2018 09:38:36 +0000
Subject: [PATCH] Make /x more Perl-compatible by recognizing all of Unicode's
"Pattern White Space" characters, not just the ASCII ones.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@977 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.31.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
src/pcre2_compile.c | 25 +++++++++++++++++++------
testdata/testinput4 | 15 +++++++++++++++
testdata/testinput5 | 13 +++++++++++++
testdata/testoutput4 | 18 ++++++++++++++++++
testdata/testoutput5 | 16 ++++++++++++++++
5 files changed, 81 insertions(+), 6 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 7ff8b4c..1d62a38 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -2434,11 +2434,17 @@ while (ptr < ptrend)
/* EITHER: not both options set */
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
- /* OR: character > 255 */
- c > 255 ||
- /* OR: not a # comment or white space */
- (c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0)
- ))
+#ifdef SUPPORT_UNICODE
+ /* OR: character > 255 AND not Unicode Pattern White Space */
+ (c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) ||
+#endif
+ /* OR: not a # comment or isspace() white space */
+ (c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0
+#ifdef SUPPORT_UNICODE
+ /* and not CHAR_NEL when Unicode is supported */
+ && c != CHAR_NEL
+#endif
+ )))
{
PCRE2_SIZE verbnamelength;
@@ -2510,11 +2516,18 @@ while (ptr < ptrend)
/* Skip over whitespace and # comments in extended mode. Note that c is a
character, not a code unit, so we must not use MAX_255 to test its size
- because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */
+ because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The
+ whitespace characters are those designated as "Pattern White Space" by
+ Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
+ U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
+ subset of space characters that match \h and \v. */
if ((options & PCRE2_EXTENDED) != 0)
{
if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
+#ifdef SUPPORT_UNICODE
+ if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue;
+#endif
if (c == CHAR_NUMBER_SIGN)
{
while (ptr < ptrend)
diff --git a/testdata/testinput4 b/testdata/testinput4
index 0ef7b8e..6884f60 100644
--- a/testdata/testinput4
+++ b/testdata/testinput4
@@ -2300,5 +2300,20 @@
\x{123}\x{122}\x{123}
\= Expect no match
\x{123}\x{124}\x{123}
+
+# Test the full list of Unicode "Pattern White Space" characters that are to
+# be ignored by /x. The pattern lines below may show up oddly in text editors
+# or when listed to the screen. Note that characters such as U+2002, which are
+# matched as space by \h and \v are *not* "Pattern White Space".
+
+/A…B/x,utf
+ AB
+
+/AB/x,utf
+ A\x{2002}B
+\= Expect no match
+ AB
+
+# -------
# End of testinput4
diff --git a/testdata/testinput5 b/testdata/testinput5
index 0366136..ebeee07 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -2059,5 +2059,18 @@
\x{1F1E6}\x{1F1E7}\x{1F1E7}B
\x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B
+# This tests the non-UTF Unicode NEL pattern whitespace character, only
+# recognized by PCRE2 with /x when there is Unicode support.
+
+/A
+ ?B/x
+ AB
+
+# This tests Unicode Pattern White Space characters in verb names when they
+# are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters
+# with code points greater than 255 between A, B, and C in the pattern.
+
+/(*: ABC)abc/x,utf,mark,alt_verbnames
+ abc
# End of testinput5
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index 6056e6d..51c8219 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -3728,5 +3728,23 @@ No match
\= Expect no match
\x{123}\x{124}\x{123}
No match
+
+# Test the full list of Unicode "Pattern White Space" characters that are to
+# be ignored by /x. The pattern lines below may show up oddly in text editors
+# or when listed to the screen. Note that characters such as U+2002, which are
+# matched as space by \h and \v are *not* "Pattern White Space".
+
+/A…B/x,utf
+ AB
+ 0: AB
+
+/AB/x,utf
+ A\x{2002}B
+ 0: A\x{2002}B
+\= Expect no match
+ AB
+No match
+
+# -------
# End of testinput4
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index 4b3171c..1392e98 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -4700,5 +4700,21 @@ Callout 0: last capture = 1
1: \x{1f1e6}\x{1f1e7}
2: \x{1f1e7}\x{1f1e6}
+# This tests the non-UTF Unicode NEL pattern whitespace character, only
+# recognized by PCRE2 with /x when there is Unicode support.
+
+/A
+ ?B/x
+ AB
+ 0: AB
+
+# This tests Unicode Pattern White Space characters in verb names when they
+# are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters
+# with code points greater than 255 between A, B, and C in the pattern.
+
+/(*: ABC)abc/x,utf,mark,alt_verbnames
+ abc
+ 0: abc
+MK: ABC
# End of testinput5
--
2.14.4

View File

@ -1,29 +0,0 @@
From e3ac8929b8152e6a30eff90f791b76339e44d91b Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Mon, 19 Feb 2018 17:00:45 +0000
Subject: [PATCH 2/2] Oops, forgot about "-C bsr" in previous patch.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@917 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.31.
---
src/pcre2test.c | 2 +-
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 40e2161..ad3db2c 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -7793,7 +7793,7 @@ if (arg != NULL && arg[0] != CHAR_MINUS)
{
case CONF_BSR:
(void)PCRE2_CONFIG(coptlist[i].value, &optval);
- printf("%s\n", optval? "ANYCRLF" : "ANY");
+ printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
break;
case CONF_FIX:
--
2.13.6

View File

@ -1,65 +0,0 @@
From c82ef7bef66138a85362473df622d16bf728499e Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sun, 4 Mar 2018 15:13:37 +0000
Subject: [PATCH] Set error offset zero for early errors in
pcre2_pattern_convert().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@926 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.31.
---
src/pcre2_convert.c | 12 +++++++++---
diff --git a/src/pcre2_convert.c b/src/pcre2_convert.c
index bdf9b86..1dd5c33 100644
--- a/src/pcre2_convert.c
+++ b/src/pcre2_convert.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2017 University of Cambridge
+ New API code Copyright (c) 2016-2018 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -1066,11 +1066,12 @@ BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
uint32_t pattype = options & TYPE_OPTIONS;
if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
+
if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */
(pattype & (~pattype+1)) != pattype || /* More than one type set */
pattype == 0) /* No type set */
{
- *bufflenptr = 0; /* Error offset */
+ *bufflenptr = 0; /* Error offset */
return PCRE2_ERROR_BADOPTION;
}
@@ -1081,7 +1082,11 @@ if (ccontext == NULL) ccontext =
/* Check UTF if required. */
#ifndef SUPPORT_UNICODE
-if (utf) return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
+if (utf)
+ {
+ *bufflenptr = 0; /* Error offset */
+ return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
+ }
#else
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
{
@@ -1126,6 +1131,7 @@ for (i = 0; i < 2; i++)
break;
default:
+ *bufflenptr = 0; /* Error offset */
return PCRE2_ERROR_INTERNAL;
}
--
2.14.3

View File

@ -6,10 +6,10 @@
%bcond_with pcre2_enables_sealloc %bcond_with pcre2_enables_sealloc
# This is stable release: # This is stable release:
#%%global rcversion RC1 %global rcversion RC1
Name: pcre2 Name: pcre2
Version: 10.31 Version: 10.32
Release: %{?rcversion:0.}9%{?rcversion:.%rcversion}%{?dist} Release: %{?rcversion:0.}1%{?rcversion:.%rcversion}%{?dist}
%global myversion %{version}%{?rcversion:-%rcversion} %global myversion %{version}%{?rcversion:-%rcversion}
Summary: Perl-compatible regular expression library Summary: Perl-compatible regular expression library
# the library: BSD with exceptions # the library: BSD with exceptions
@ -49,47 +49,6 @@ URL: http://www.pcre.org/
Source: ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/%{?rcversion:Testing/}%{name}-%{myversion}.tar.bz2 Source: ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/%{?rcversion:Testing/}%{name}-%{myversion}.tar.bz2
# Do no set RPATH if libdir is not /usr/lib # Do no set RPATH if libdir is not /usr/lib
Patch0: pcre2-10.10-Fix-multilib.patch Patch0: pcre2-10.10-Fix-multilib.patch
# Fix returning unset groups in POSIX interface if REG_STARTEND has a non-zero
# starting offset, upstream bug #2244, in upstream after 10.31
Patch1: pcre2-10.31-Fix-the-value-passed-back-for-POSIX-unset-groups-whe.patch
# 1/2 Fix pcre2test -C to correctly show what \R matches,
# in upstream after 10.31
Patch2: pcre2-10.31-Fix-pcre2test-C-to-correctly-show-what-R-matches.patch
# 2/2 Fix pcre2test -C to correctly show what \R matches,
# in upstream after 10.31
Patch3: pcre2-10.31-Oops-forgot-about-C-bsr-in-previous-patch.patch
# Fix matching repeated character classes against an 8-bit string containting
# multi-code-unit characters, in upstream after 10.31
Patch4: pcre2-10.31-Fix-C-bug-with-repeated-character-classes-in-UTF-8-m.patch
# Add support to pcre2grep for binary zeros in -f files, upstream bug #2222,
# in upstream after 10.31
Patch5: pcre2-10.31-Add-support-to-pcre2grep-for-binary-zeros-in-f-files.patch
# Fix compiler warnings in pcre2grep, in upstream after 10.31
Patch6: pcre2-10.31-A-small-fix-to-pcre2grep-to-avoid-compiler-warnings-.patch
# Fix setting error offset zero for early errors in pcre2_pattern_convert(),
# in upstream after 10.31
Patch7: pcre2-10.31-Set-error-offset-zero-for-early-errors-in-pcre2_patt.patch
# Fix bug when \K is used in a lookbehind in a substitute pattern,
# in upstream after 10.31
Patch8: pcre2-10.31-Fix-bug-when-K-is-used-in-a-lookbehind-in-a-substitu.patch
# Fix global search/replace in pcre2test and pcre2_substitute() when the pattern
# matches an empty string, but never at the starting offset,
# in upstream after 10.31
Patch9: pcre2-10.31-Fix-global-search-replace-in-pcre2test-and-pcre2_sub.patch
# Fix checking that a lookbehind assertion has a fixed length if the
# lookbehind assertion is used inside a lookahead assertion,
# in upstream after 10.31
Patch10: pcre2-10.31-Ignore-qualifiers-on-lookaheads-within-lookbehinds-w.patch
# Fix parsing VERSION conditions, in upstream after pcre-10.31
Patch11: pcre2-10.31-Fix-bug-in-VERSION-number-reading.patch
# Fix backtracking atomic groups when they are not separated by something with
# a backtracking point, in upstream after 10.31
Patch12: pcre2-10.31-Fixed-atomic-group-backtracking-bug.patch
# Recognize all Unicode space characters with /x option in a pattern,
# in upstream after 10.31
Patch13: pcre2-10.31-Make-x-more-Perl-compatible-by-recognizing-all-of-Un.patch
# Fix changing dynamic options, in upstream after 10.31
Patch14: pcre2-10.31-Fix-dynamic-options-changing-bug.patch
BuildRequires: autoconf BuildRequires: autoconf
BuildRequires: automake BuildRequires: automake
BuildRequires: coreutils BuildRequires: coreutils
@ -166,20 +125,6 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
%prep %prep
%setup -q -n %{name}-%{myversion} %setup -q -n %{name}-%{myversion}
%patch0 -p1 %patch0 -p1
%patch1 -p1
%patch2 -p1
%patch3 -p1
%patch4 -p1
%patch5 -p1
%patch6 -p1
%patch7 -p1
%patch8 -p1
%patch9 -p1
%patch10 -p1
%patch11 -p1
%patch12 -p1
%patch13 -p1
%patch14 -p1
# Because of multilib patch # Because of multilib patch
libtoolize --copy --force libtoolize --copy --force
autoreconf -vif autoreconf -vif
@ -282,6 +227,9 @@ make %{?_smp_mflags} check VERBOSE=yes
%{_mandir}/man1/pcre2test.* %{_mandir}/man1/pcre2test.*
%changelog %changelog
* Thu Aug 16 2018 Petr Pisar <ppisar@redhat.com> - 10.32-0.1.RC1
- 10.32-RC1 bump
* Thu Aug 16 2018 Petr Pisar <ppisar@redhat.com> - 10.31-9 * Thu Aug 16 2018 Petr Pisar <ppisar@redhat.com> - 10.31-9
- Recognize all Unicode space characters with /x option in a pattern - Recognize all Unicode space characters with /x option in a pattern
- Fix changing dynamic options - Fix changing dynamic options

View File

@ -1 +1 @@
SHA512 (pcre2-10.31.tar.bz2) = 44d7db2513d9415dcdf6541366fea585e016f572f3e4379f6e959a38114b2337851092049ab4a1576ae8f19b9de413edbcfa62f434c77fc8470747ee5413e967 SHA512 (pcre2-10.32-RC1.tar.bz2) = 387d9060eef3553d254b48d510859f028eed0f6fbdc7b5067b7d84ec81ed9356972defdb97dce5f28e6188421336f77052700eb530caf8c6e245b079b8258558