diff --git a/pcre2-10.23-Fix-matching-offsets-from-regexec-in-the-POSIX-wrapp.patch b/pcre2-10.23-Fix-matching-offsets-from-regexec-in-the-POSIX-wrapp.patch new file mode 100644 index 0000000..8c7f918 --- /dev/null +++ b/pcre2-10.23-Fix-matching-offsets-from-regexec-in-the-POSIX-wrapp.patch @@ -0,0 +1,231 @@ +From 398087c31c9eeb80eecdc74440ada9ebe8e2a6d7 Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Sat, 3 Jun 2017 16:42:58 +0000 +Subject: [PATCH] Fix matching offsets from regexec() in the POSIX wrapper when + called with REG_STARTEND and a starting offset greater than zero. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Petr Písař: Ported to 10.23: + +commit 4ed24ba49fc4a584c58509177e5a3ad6d1a000e4 +Author: ph10 +Date: Sat Jun 3 16:42:58 2017 +0000 + + Fix matching offsets from regexec() in the POSIX wrapper when called with + REG_STARTEND and a starting offset greater than zero. + + git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@818 6239d852-aaf2-0410-a92c-79f79f9480 +69 + +Signed-off-by: Petr Písař +--- + doc/pcre2posix.3 | 30 ++++++++++++++++++------------ + doc/pcre2test.1 | 14 ++++++++++++++ + src/pcre2posix.c | 4 ++-- + src/pcre2test.c | 16 +++++++++++++++- + testdata/testinput18 | 10 ++++++++++ + testdata/testoutput18 | 17 +++++++++++++++++ + 6 files changed, 76 insertions(+), 15 deletions(-) + +diff --git a/doc/pcre2posix.3 b/doc/pcre2posix.3 +index 70a86d8..b37046b 100644 +--- a/doc/pcre2posix.3 ++++ b/doc/pcre2posix.3 +@@ -1,4 +1,4 @@ +-.TH PCRE2POSIX 3 "31 January 2016" "PCRE2 10.22" ++.TH PCRE2POSIX 3 "03 June 2017" "PCRE2 10.30" + .SH NAME + PCRE2 - Perl-compatible regular expressions (revised API) + .SH "SYNOPSIS" +@@ -204,15 +204,21 @@ function. + .sp + REG_STARTEND + .sp +-The string is considered to start at \fIstring\fP + \fIpmatch[0].rm_so\fP and +-to have a terminating NUL located at \fIstring\fP + \fIpmatch[0].rm_eo\fP +-(there need not actually be a NUL at that location), regardless of the value of +-\fInmatch\fP. This is a BSD extension, compatible with but not specified by +-IEEE Standard 1003.2 (POSIX.2), and should be used with caution in software +-intended to be portable to other systems. Note that a non-zero \fIrm_so\fP does +-not imply REG_NOTBOL; REG_STARTEND affects only the location of the string, not +-how it is matched. Setting REG_STARTEND and passing \fIpmatch\fP as NULL are +-mutually exclusive; the error REG_INVARG is returned. ++When this option is set, the string is considered to start at \fIstring\fP + ++\fIpmatch[0].rm_so\fP and to have a terminating NUL located at \fIstring\fP + ++\fIpmatch[0].rm_eo\fP (there need not actually be a NUL at that location), ++regardless of the value of \fInmatch\fP. However, the offsets of the matched ++string and any captured substrings are still given relative to the start of ++\fIstring\fP. (Before PCRE2 release 10.30 these were given relative to ++\fIstring\fP + \fIpmatch[0].rm_so\fP, but this differs from other ++implementations.) ++.P ++This is a BSD extension, compatible with but not specified by IEEE Standard ++1003.2 (POSIX.2), and should be used with caution in software intended to be ++portable to other systems. Note that a non-zero \fIrm_so\fP does not imply ++REG_NOTBOL; REG_STARTEND affects only the location of the string, not how it is ++matched. Setting REG_STARTEND and passing \fIpmatch\fP as NULL are mutually ++exclusive; the error REG_INVARG is returned. + .P + If the pattern was compiled with the REG_NOSUB flag, no data about any matched + strings is returned. The \fInmatch\fP and \fIpmatch\fP arguments of +@@ -271,6 +277,6 @@ Cambridge, England. + .rs + .sp + .nf +-Last updated: 31 January 2016 +-Copyright (c) 1997-2016 University of Cambridge. ++Last updated: 03 June 2017 ++Copyright (c) 1997-2017 University of Cambridge. + .fi +diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 +index bd7383e..f8ce5bf 100644 +--- a/doc/pcre2test.1 ++++ b/doc/pcre2test.1 +@@ -1023,6 +1023,20 @@ wrapper API to be used, the only option-setting modifiers that have any effect + are \fBnotbol\fP, \fBnotempty\fP, and \fBnoteol\fP, causing REG_NOTBOL, + REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to \fBregexec()\fP. + The other modifiers are ignored, with a warning message. ++.P ++There is one additional modifier that can be used with the POSIX wrapper. It is ++ignored (with a warning) if used for non-POSIX matching. ++.sp ++ posix_startend=[:] ++.sp ++This causes the subject string to be passed to \fBregexec()\fP using the ++REG_STARTEND option, which uses offsets to restrict which part of the string is ++searched. If only one number is given, the end offset is passed as the end of ++the subject string. For more detail of REG_STARTEND, see the ++.\" HREF ++\fBpcre2posix\fP ++.\" ++documentation. + . + . + .SS "Setting match controls" +diff --git a/src/pcre2posix.c b/src/pcre2posix.c +index 4ecc701..8be969a 100644 +--- a/src/pcre2posix.c ++++ b/src/pcre2posix.c +@@ -338,8 +338,8 @@ if (rc >= 0) + if ((size_t)rc > nmatch) rc = (int)nmatch; + for (i = 0; i < (size_t)rc; i++) + { +- pmatch[i].rm_so = ovector[i*2]; +- pmatch[i].rm_eo = ovector[i*2+1]; ++ pmatch[i].rm_so = ovector[i*2] + so; ++ pmatch[i].rm_eo = ovector[i*2+1] + so; + } + for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; + return 0; +diff --git a/src/pcre2test.c b/src/pcre2test.c +index af05b68..ecb6811 100644 +--- a/src/pcre2test.c ++++ b/src/pcre2test.c +@@ -182,7 +182,7 @@ void vms_setsymbol( char *, char *, int ); + #endif + #endif + +-#define CFORE_UNSET UINT32_MAX /* Unset value for cfail/cerror fields */ ++#define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */ + #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */ + #define DEFAULT_OVECCOUNT 15 /* Default ovector count */ + #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */ +@@ -502,6 +502,7 @@ typedef struct datctl { /* Structure for data line modifiers. */ + uint32_t control; /* Must be in same position as patctl */ + uint32_t control2; /* Must be in same position as patctl */ + uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ ++ uint32_t startend[2]; + uint32_t cerror[2]; + uint32_t cfail[2]; + int32_t callout_data; +@@ -615,6 +616,7 @@ static modstruct modlist[] = { + { "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) }, + { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) }, + { "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) }, ++ { "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) }, + { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, + { "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) }, + { "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) }, +@@ -6193,6 +6195,14 @@ if ((pat_patctl.control & CTL_POSIX) != 0) + } + } + ++ if (dat_datctl.startend[0] != CFORE_UNSET) ++ { ++ pmatch[0].rm_so = dat_datctl.startend[0]; ++ pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)? ++ dat_datctl.startend[1] : len; ++ eflags |= REG_STARTEND; ++ } ++ + if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL; + if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL; + if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY; +@@ -6252,6 +6262,9 @@ if ((dat_datctl.control & (CTL_DFA|CTL_FINDLIMITS)) == (CTL_DFA|CTL_FINDLIMITS)) + dat_datctl.control &= ~CTL_FINDLIMITS; + } + ++if (dat_datctl.startend[0] != CFORE_UNSET) ++ fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n"); ++ + /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA + matching, even if the JIT compiler was used. */ + +@@ -7407,6 +7420,7 @@ memset(&def_datctl, 0, sizeof(datctl)); + def_datctl.oveccount = DEFAULT_OVECCOUNT; + def_datctl.copy_numbers[0] = -1; + def_datctl.get_numbers[0] = -1; ++def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET; + def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET; + def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET; + +diff --git a/testdata/testinput18 b/testdata/testinput18 +index bd1c6ad..d8d8084 100644 +--- a/testdata/testinput18 ++++ b/testdata/testinput18 +@@ -109,4 +109,14 @@ + /(?=(a\K))/ + a + ++/^d(e)$/posix ++ acdef\=posix_startend=2:4 ++ acde\=posix_startend=2 ++\= Expect no match ++ acdef ++ acdef\=posix_startend=2 ++ ++/^a\x{00}b$/posix ++ a\x{00}b\=posix_startend=0:3 ++ + # End of testdata/testinput18 +diff --git a/testdata/testoutput18 b/testdata/testoutput18 +index fd6fac3..0c13abc 100644 +--- a/testdata/testoutput18 ++++ b/testdata/testoutput18 +@@ -168,4 +168,21 @@ Start of matched string is beyond its end - displaying from end to start. + 0: a + 1: a + ++/^d(e)$/posix ++ acdef\=posix_startend=2:4 ++ 0: de ++ 1: e ++ acde\=posix_startend=2 ++ 0: de ++ 1: e ++\= Expect no match ++ acdef ++No match: POSIX code 17: match failed ++ acdef\=posix_startend=2 ++No match: POSIX code 17: match failed ++ ++/^a\x{00}b$/posix ++ a\x{00}b\=posix_startend=0:3 ++ 0: a\x00b ++ + # End of testdata/testinput18 +-- +2.9.4 + diff --git a/pcre2.spec b/pcre2.spec index 175e080..53538e3 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -84,6 +84,9 @@ Patch14: pcre2-10.23-Fix-crash-when-more-than-one-kind-of-push-was-set-in.pat # Fix DFA matching a lookbehind assertion that has a zero-length branch, # PCRE2 oss-fuzz issue 1859, in upstream after 10.23 Patch15: pcre2-10.23-Fix-lookbehind-with-zero-length-branch-in-DFA-matchi.patch +# Fix returned offsets from regexec() when REG_STARTEND is used with starting offset +# greater than zero, upstream bug #2128, in upstream after 10.23 +Patch16: pcre2-10.23-Fix-matching-offsets-from-regexec-in-the-POSIX-wrapp.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -175,6 +178,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %patch13 -p1 %patch14 -p1 %patch15 -p1 +%patch16 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -276,6 +280,8 @@ make %{?_smp_mflags} check VERBOSE=yes * Fri Jun 16 2017 Petr Pisar - 10.23-8 - Fix DFA matching a lookbehind assertion that has a zero-length branch (PCRE2 oss-fuzz issue 1859) +- Fix returned offsets from regexec() when REG_STARTEND is used with starting offset + greater than zero (upstream bug #2128) * Tue May 09 2017 Petr Pisar - 10.23-7 - Fix a pcre2test crash on multiple push statements (upstream bug #2109)