diff --git a/.gitignore b/.gitignore index 1db9300..75e2a59 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ /pcre2-10.31-RC1.tar.bz2 /pcre2-10.31.tar.bz2 /pcre2-10.32-RC1.tar.bz2 +/pcre2-10.32.tar.bz2 diff --git a/pcre-10.32-RC1-Fix-bad-auto-possessification-of-certain-types-of-cl.patch b/pcre-10.32-RC1-Fix-bad-auto-possessification-of-certain-types-of-cl.patch deleted file mode 100644 index 62d0ba3..0000000 --- a/pcre-10.32-RC1-Fix-bad-auto-possessification-of-certain-types-of-cl.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 6f2408cec76565406fb17f2cd51e88f92d8dc0b3 Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Fri, 17 Aug 2018 14:45:35 +0000 -Subject: [PATCH] Fix bad auto-possessification of certain types of class. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@986 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.32-RC1. ---- - src/pcre2_auto_possess.c | 4 ++-- - testdata/testinput4 | 9 +++++++++ - testdata/testoutput4 | 12 ++++++++++++ - -diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c -index 82a43ae..2ce152e 100644 ---- a/src/pcre2_auto_possess.c -+++ b/src/pcre2_auto_possess.c -@@ -505,7 +505,7 @@ Arguments: - utf TRUE in UTF mode - cb compile data block - base_list the data list of the base opcode -- base_end the end of the data list -+ base_end the end of the base opcode - rec_limit points to recursion depth counter - - Returns: TRUE if the auto-possessification is possible -@@ -730,7 +730,7 @@ for(;;) - if ((*xclass_flags & XCL_MAP) == 0) - { - /* No bits are set for characters < 256. */ -- if (list[1] == 0) return TRUE; -+ if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0; - /* Might be an empty repeat. */ - continue; - } -diff --git a/testdata/testinput4 b/testdata/testinput4 -index 3efb1ff..a27b6af 100644 ---- a/testdata/testinput4 -+++ b/testdata/testinput4 -@@ -2309,4 +2309,13 @@ - - # ------- - -+/[^\x{100}-\x{ffff}]*[\x80-\xff]/utf -+ \x{99}\x{99}\x{99} -+ -+/[^\x{100}-\x{ffff}ABC]*[\x80-\xff]/utf -+ \x{99}\x{99}\x{99} -+ -+/[^\x{100}-\x{ffff}]*[\x80-\xff]/i,utf -+ \x{99}\x{99}\x{99} -+ - # End of testinput4 -diff --git a/testdata/testoutput4 b/testdata/testoutput4 -index db474ef..ba3df37 100644 ---- a/testdata/testoutput4 -+++ b/testdata/testoutput4 -@@ -3730,4 +3730,16 @@ No match - - # ------- - -+/[^\x{100}-\x{ffff}]*[\x80-\xff]/utf -+ \x{99}\x{99}\x{99} -+ 0: \x{99}\x{99}\x{99} -+ -+/[^\x{100}-\x{ffff}ABC]*[\x80-\xff]/utf -+ \x{99}\x{99}\x{99} -+ 0: \x{99}\x{99}\x{99} -+ -+/[^\x{100}-\x{ffff}]*[\x80-\xff]/i,utf -+ \x{99}\x{99}\x{99} -+ 0: \x{99}\x{99}\x{99} -+ - # End of testinput4 --- -2.14.4 - diff --git a/pcre2-10.32-RC1-Fix-anchoring-bug-in-conditionals-with-only-one-bran.patch b/pcre2-10.32-RC1-Fix-anchoring-bug-in-conditionals-with-only-one-bran.patch deleted file mode 100644 index 3678e39..0000000 --- a/pcre2-10.32-RC1-Fix-anchoring-bug-in-conditionals-with-only-one-bran.patch +++ /dev/null @@ -1,136 +0,0 @@ -From c2d378421fedba48cb02b6b3dc4a74d2a24b2dd4 Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Sun, 2 Sep 2018 16:53:29 +0000 -Subject: [PATCH] Fix anchoring bug in conditionals with only one branch. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@995 6239d852-aaf2-0410-a92c-79f79f948069 -Petr Písař: Ported to 10.32-RC1. ---- - src/pcre2_compile.c | 11 ++++++----- - testdata/testinput2 | 15 +++++++++++++++ - testdata/testoutput2 | 40 ++++++++++++++++++++++++++++++++++++++++ - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index f6a7e99..3df55e9 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -1454,8 +1454,8 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) - /* \N{U+ can be handled by the \x{ code. However, this construction is - not valid in EBCDIC environments because it specifies a Unicode - character, not a codepoint in the local code. For example \N{U+0041} -- must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode -- casing semantics for the entire pattern, so allow it only in UTF (i.e. -+ must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode -+ casing semantics for the entire pattern, so allow it only in UTF (i.e. - Unicode) mode. */ - - if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS) -@@ -1464,12 +1464,12 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) - *errorcodeptr = ERR93; - #else - if (utf) -- { -+ { - ptr = p + 1; - escape = 0; /* Not a fancy escape after all */ - goto COME_FROM_NU; - } -- else *errorcodeptr = ERR93; -+ else *errorcodeptr = ERR93; - #endif - } - -@@ -7864,10 +7864,11 @@ do { - if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE; - } - -- /* Condition */ -+ /* Condition. If there is no second branch, it can't be anchored. */ - - else if (op == OP_COND) - { -+ if (scode[GET(scode,1)] != OP_ALT) return FALSE; - if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) - return FALSE; - } -diff --git a/testdata/testinput2 b/testdata/testinput2 -index 9b59b3e..c0f4292 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -5459,4 +5459,19 @@ a)"xI - - /(?x-i-i)/ - -+/(?(?=^))b/I -+ abc -+ -+/(?(?=^)|)b/I -+ abc -+ -+/(?(?=^)|^)b/I -+ bbc -+\= Expect no match -+ abc -+ -+/(?(1)^|^())/I -+ -+/(?(1)^())b/I -+ - # End of testinput2 -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index d629771..6f0dd12 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -16631,6 +16631,46 @@ Failed: error 194 at offset 3: invalid hyphen in option setting - /(?x-i-i)/ - Failed: error 194 at offset 5: invalid hyphen in option setting - -+/(?(?=^))b/I -+Capturing subpattern count = 0 -+Last code unit = 'b' -+Subject length lower bound = 1 -+ abc -+ 0: b -+ -+/(?(?=^)|)b/I -+Capturing subpattern count = 0 -+First code unit = 'b' -+Subject length lower bound = 1 -+ abc -+ 0: b -+ -+/(?(?=^)|^)b/I -+Capturing subpattern count = 0 -+Compile options: -+Overall options: anchored -+First code unit = 'b' -+Subject length lower bound = 1 -+ bbc -+ 0: b -+\= Expect no match -+ abc -+No match -+ -+/(?(1)^|^())/I -+Capturing subpattern count = 1 -+Max back reference = 1 -+May match empty string -+Compile options: -+Overall options: anchored -+Subject length lower bound = 0 -+ -+/(?(1)^())b/I -+Capturing subpattern count = 1 -+Max back reference = 1 -+Last code unit = 'b' -+Subject length lower bound = 1 -+ - # End of testinput2 - Error -70: PCRE2_ERROR_BADDATA (unknown error number) - Error -62: bad serialized data --- -2.14.4 - diff --git a/pcre2-10.32-RC1-Lock-out-N-U-hhhh-in-non-UTF-non-Unicode-modes.patch b/pcre2-10.32-RC1-Lock-out-N-U-hhhh-in-non-UTF-non-Unicode-modes.patch deleted file mode 100644 index f1db220..0000000 --- a/pcre2-10.32-RC1-Lock-out-N-U-hhhh-in-non-UTF-non-Unicode-modes.patch +++ /dev/null @@ -1,258 +0,0 @@ -From 11ad23b7e6f2b3b81bc7e1e605e34416b5b900ff Mon Sep 17 00:00:00 2001 -From: ph10 -Date: Sun, 2 Sep 2018 16:03:27 +0000 -Subject: [PATCH] Lock out \N{U+hhhh} in non-UTF (non-Unicode) modes. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@994 6239d852-aaf2-0410-a92c-79f79f948069 -Signed-off-by: Petr Písař ---- - ChangeLog | 2 +- - doc/pcre2api.3 | 5 +++-- - doc/pcre2pattern.3 | 16 +++++++++------- - doc/pcre2syntax.3 | 6 +++--- - doc/pcre2unicode.3 | 13 +++++++++---- - src/pcre2.h.in | 2 +- - src/pcre2_compile.c | 14 ++++++++++---- - src/pcre2_error.c | 2 +- - testdata/testinput5 | 2 ++ - testdata/testoutput5 | 3 +++ - 10 files changed, 42 insertions(+), 23 deletions(-) - -diff --git a/ChangeLog b/ChangeLog -index 264bfae..4b55639 100644 ---- a/ChangeLog -+++ b/ChangeLog -@@ -130,7 +130,7 @@ present. - 28. A (*MARK) name was not being passed back for positive assertions that were - terminated by (*ACCEPT). - --29. Add support for \N{U+dddd}, but not in EBCDIC environments. -+29. Add support for \N{U+dddd}, but only in Unicode mode. - - 30. Add support for (?^) for unsetting all imnsx options. - -diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 -index 5a97739..fff4fcc 100644 ---- a/doc/pcre2api.3 -+++ b/doc/pcre2api.3 -@@ -1,4 +1,4 @@ --.TH PCRE2API 3 "03 August 2018" "PCRE2 10.32" -+.TH PCRE2API 3 "02 September 2018" "PCRE2 10.32" - .SH NAME - PCRE2 - Perl-compatible regular expressions (revised API) - .sp -@@ -1756,7 +1756,8 @@ behaviour of PCRE2 are given in the - .\" HREF - \fBpcre2unicode\fP - .\" --page. -+page. In particular, note that it changes the way PCRE2_CASELESS handles -+characters with code points greater than 127. - . - . - .\" HTML -diff --git a/doc/pcre2pattern.3 b/doc/pcre2pattern.3 -index aa0d652..fd086b4 100644 ---- a/doc/pcre2pattern.3 -+++ b/doc/pcre2pattern.3 -@@ -1,4 +1,4 @@ --.TH PCRE2PATTERN 3 "03 August 2018" "PCRE2 10.32" -+.TH PCRE2PATTERN 3 "02 September 2018" "PCRE2 10.32" - .SH NAME - PCRE2 - Perl-compatible regular expressions (revised API) - .SH "PCRE2 REGULAR EXPRESSION DETAILS" -@@ -376,14 +376,15 @@ these escapes are as follows: - \eddd character with octal code ddd, or backreference - \eo{ddd..} character with octal code ddd.. - \exhh character with hex code hh -- \ex{hhh..} character with hex code hhh.. (default mode) -- \eN{U+hhh..} character with Unicode code point hhh.. -+ \ex{hhh..} character with hex code hhh.. -+ \eN{U+hhh..} character with Unicode hex code point hhh.. - \euhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set) - .sp -+The \eN{U+hhh..} escape sequence is recognized only when the PCRE2_UTF option -+is set, that is, when PCRE2 is operating in a Unicode mode. Perl also uses -+\eN{name} to specify characters by Unicode name; PCRE2 does not support this. - Note that when \eN is not followed by an opening brace (curly bracket) it has - an entirely different meaning, matching any character that is not a newline. --Perl also uses \eN{name} to specify characters by Unicode name; PCRE2 does not --support this. - .P - The precise effect of \ecx on ASCII characters is as follows: if x is a lower - case letter, it is converted to upper case. Then bit 6 of the character (hex -@@ -509,7 +510,8 @@ limited to certain values, as follows: - Invalid Unicode code points are all those in the range 0xd800 to 0xdfff (the - so-called "surrogate" code points). The check for these can be disabled by the - caller of \fBpcre2_compile()\fP by setting the option --PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. -+PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. However, this is possible only in UTF-8 -+and UTF-32 modes, because these values are not representable in UTF-16. - . - . - .SS "Escape sequences in character classes" -@@ -3650,6 +3652,6 @@ Cambridge, England. - .rs - .sp - .nf --Last updated: 03 August 2018 -+Last updated: 02 September 2018 - Copyright (c) 1997-2018 University of Cambridge. - .fi -diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3 -index e5ace25..c392bfb 100644 ---- a/doc/pcre2syntax.3 -+++ b/doc/pcre2syntax.3 -@@ -1,4 +1,4 @@ --.TH PCRE2SYNTAX 3 "01 August 2018" "PCRE2 10.32" -+.TH PCRE2SYNTAX 3 "02 September 2018" "PCRE2 10.32" - .SH NAME - PCRE2 - Perl-compatible regular expressions (revised API) - .SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY" -@@ -35,7 +35,7 @@ This table applies to ASCII and Unicode environments. - \eddd character with octal code ddd, or backreference - \eo{ddd..} character with octal code ddd.. - \eU "U" if PCRE2_ALT_BSUX is set (otherwise is an error) -- \eN{U+hh..} character with Unicode code point hh.. -+ \eN{U+hh..} character with Unicode code point hh.. (Unicode mode only) - \euhhhh character with hex code hhhh (if PCRE2_ALT_BSUX is set) - \exhh character with hex code hh - \ex{hh..} character with hex code hh.. -@@ -621,6 +621,6 @@ Cambridge, England. - .rs - .sp - .nf --Last updated: 01 August 2018 -+Last updated: 02 September 2018 - Copyright (c) 1997-2018 University of Cambridge. - .fi -diff --git a/doc/pcre2unicode.3 b/doc/pcre2unicode.3 -index fdaddc4..edd8dcd 100644 ---- a/doc/pcre2unicode.3 -+++ b/doc/pcre2unicode.3 -@@ -1,4 +1,4 @@ --.TH PCRE2UNICODE 3 "17 May 2017" "PCRE2 10.30" -+.TH PCRE2UNICODE 3 "02 September 2018" "PCRE2 10.32" - .SH NAME - PCRE - Perl-compatible regular expressions (revised API) - .SH "UNICODE AND UTF SUPPORT" -@@ -16,7 +16,8 @@ you must call - with the PCRE2_UTF option flag, or the pattern must start with the sequence - (*UTF). When either of these is the case, both the pattern and any subject - strings that are matched against it are treated as UTF strings instead of --strings of individual one-code-unit characters. -+strings of individual one-code-unit characters. There are also some other -+changes to the way characters are handled, as documented below. - .P - If you do not need Unicode support you can build PCRE2 without it, in which - case the library will be smaller. -@@ -51,6 +52,10 @@ unbraced hexadecimal escape sequences (for example, \ex{b3} or \exb3). Larger - values have to use braced sequences. Unbraced octal code points up to \e777 are - also recognized; larger ones can be coded using \eo{...}. - .P -+The escape sequence \eN{U+} is recognized as another way of -+specifying a Unicode character by code point in a UTF mode. It is not allowed -+in non-UTF modes. -+.P - In UTF modes, repeat quantifiers apply to complete UTF characters, not to - individual code units. - .P -@@ -280,6 +285,6 @@ Cambridge, England. - .rs - .sp - .nf --Last updated: 17 May 2017 --Copyright (c) 1997-2017 University of Cambridge. -+Last updated: 02 September 2018 -+Copyright (c) 1997-2018 University of Cambridge. - .fi -diff --git a/src/pcre2.h.in b/src/pcre2.h.in -index 4c7c674..a9396e0 100644 ---- a/src/pcre2.h.in -+++ b/src/pcre2.h.in -@@ -316,7 +316,7 @@ pcre2_pattern_convert(). */ - #define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190 - #define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191 - #define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192 --#define PCRE2_ERROR_NOT_SUPPORTED_IN_EBCDIC 193 -+#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193 - #define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194 - - -diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 8c30064..f6a7e99 100644 ---- a/src/pcre2_compile.c -+++ b/src/pcre2_compile.c -@@ -1454,16 +1454,22 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) - /* \N{U+ can be handled by the \x{ code. However, this construction is - not valid in EBCDIC environments because it specifies a Unicode - character, not a codepoint in the local code. For example \N{U+0041} -- must be "A" in all environments. */ -+ must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode -+ casing semantics for the entire pattern, so allow it only in UTF (i.e. -+ Unicode) mode. */ - - if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS) - { - #ifdef EBCDIC - *errorcodeptr = ERR93; - #else -- ptr = p + 1; -- escape = 0; /* Not a fancy escape after all */ -- goto COME_FROM_NU; -+ if (utf) -+ { -+ ptr = p + 1; -+ escape = 0; /* Not a fancy escape after all */ -+ goto COME_FROM_NU; -+ } -+ else *errorcodeptr = ERR93; - #endif - } - -diff --git a/src/pcre2_error.c b/src/pcre2_error.c -index 64ab5d9..4b3b3f1 100644 ---- a/src/pcre2_error.c -+++ b/src/pcre2_error.c -@@ -179,7 +179,7 @@ static const unsigned char compile_error_texts[] = - "internal error: bad code value in parsed_skip()\0" - "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0" - "invalid option bits with PCRE2_LITERAL\0" -- "\\N{U+dddd} is not supported in EBCDIC mode\0" -+ "\\N{U+dddd} is supported only in Unicode (UTF) mode\0" - "invalid hyphen in option setting\0" - ; - -diff --git a/testdata/testinput5 b/testdata/testinput5 -index e339808..687de32 100644 ---- a/testdata/testinput5 -+++ b/testdata/testinput5 -@@ -2089,6 +2089,8 @@ - - /\N{U+}/ - -+/\N{U+}/utf -+ - /\N{U}/ - - # This tests the non-UTF Unicode NEL pattern whitespace character, only -diff --git a/testdata/testoutput5 b/testdata/testoutput5 -index a9552f4..51caa18 100644 ---- a/testdata/testoutput5 -+++ b/testdata/testoutput5 -@@ -4751,6 +4751,9 @@ No match - 0: \x{1d1aa} - - /\N{U+}/ -+Failed: error 193 at offset 2: \N{U+dddd} is supported only in Unicode (UTF) mode -+ -+/\N{U+}/utf - Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+} - - /\N{U}/ --- -2.14.4 - diff --git a/pcre2.spec b/pcre2.spec index d1453a3..d88056b 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -6,10 +6,10 @@ %bcond_with pcre2_enables_sealloc # This is stable release: -%global rcversion RC1 +#%%global rcversion RC1 Name: pcre2 Version: 10.32 -Release: %{?rcversion:0.}3%{?rcversion:.%rcversion}%{?dist} +Release: %{?rcversion:0.}1%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library # the library: BSD with exceptions @@ -49,16 +49,6 @@ URL: http://www.pcre.org/ Source: ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/%{?rcversion:Testing/}%{name}-%{myversion}.tar.bz2 # Do no set RPATH if libdir is not /usr/lib Patch0: pcre2-10.10-Fix-multilib.patch -# Fix autopossessifying a repeated negative class with no characters less than -# 256 that is followed by a positive class with only characters less than 256, -# upstream bug #2300, in upstream after 10.32-RC1 -Patch1: pcre-10.32-RC1-Fix-bad-auto-possessification-of-certain-types-of-cl.patch -# Accept \N{U+hhhh} only in UTF mode, upstream bug #2305, -# in upstream after 10.32-RC1 -Patch2: pcre2-10.32-RC1-Lock-out-N-U-hhhh-in-non-UTF-non-Unicode-modes.patch -# Fix anchoring in conditionals with only one branch, upstream bug #2307, -# in upstream after 10.32-RC1 -Patch3: pcre2-10.32-RC1-Fix-anchoring-bug-in-conditionals-with-only-one-bran.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -135,9 +125,6 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %prep %setup -q -n %{name}-%{myversion} %patch0 -p1 -%patch1 -p1 -%patch2 -p1 -%patch3 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -240,6 +227,9 @@ make %{?_smp_mflags} check VERBOSE=yes %{_mandir}/man1/pcre2test.* %changelog +* Wed Sep 12 2018 Petr Pisar - 10.32-1 +- 10.32 bump + * Mon Sep 03 2018 Petr Pisar - 10.32-0.3.RC1 - Accept \N{U+hhhh} only in UTF mode (upstream bug #2305) - Fix anchoring in conditionals with only one branch (upstream bug #2307) diff --git a/sources b/sources index d1bf8c8..3861595 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (pcre2-10.32-RC1.tar.bz2) = 387d9060eef3553d254b48d510859f028eed0f6fbdc7b5067b7d84ec81ed9356972defdb97dce5f28e6188421336f77052700eb530caf8c6e245b079b8258558 +SHA512 (pcre2-10.32.tar.bz2) = 3d712e7d6afbaef227952680ded69fadbdec0b1abdfcac17cf071415c3c93edd152d6d268dec80570aedf24f82ba954f74f5f9e62c9b9991be75080a3d6ab5fe