diff --git a/.gitignore b/.gitignore index 1920ae0..7ab33cd 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ pcre-8.10.tar.bz2 /pcre-8.21-RC1.tar.bz2 /pcre-8.21.tar.bz2 /pcre-8.30.tar.bz2 +/pcre-8.31-RC1.tar.bz2 diff --git a/pcre-8.30-Fix-look-behind-assertion-in-UTF-8-JIT-mode.patch b/pcre-8.30-Fix-look-behind-assertion-in-UTF-8-JIT-mode.patch deleted file mode 100644 index 6d5cabd..0000000 --- a/pcre-8.30-Fix-look-behind-assertion-in-UTF-8-JIT-mode.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 918ed08b4415c8f9a94c22588a328c712317dea9 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= -Date: Thu, 5 Apr 2012 17:30:16 +0200 -Subject: [PATCH] Fix look-behind assertion in UTF-8 JIT mode -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This is back-port of upstream commit for pcre-8.30: -r953 | zherczeg | 2012-03-29 19:41:57 +0200 (ÄŒt, 29 bÅ™e 2012) | 1 line -Fixed a bug for backward assertions with REVERSE 0 in the JIT compiler ---- - pcre_jit_compile.c | 3 ++- - testdata/testinput5 | 3 +++ - testdata/testoutput5 | 4 ++++ - 3 files changed, 9 insertions(+), 1 deletions(-) - -diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c -index 97d227c..dfb78a7 100644 ---- a/pcre_jit_compile.c -+++ b/pcre_jit_compile.c -@@ -3417,7 +3417,8 @@ switch(type) - - case OP_REVERSE: - length = GET(cc, 0); -- SLJIT_ASSERT(length > 0); -+ if (length == 0) -+ return cc + LINK_SIZE; - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - #ifdef SUPPORT_UTF - if (common->utf) -diff --git a/testdata/testinput5 b/testdata/testinput5 -index 4f7cb32..de41fce 100644 ---- a/testdata/testinput5 -+++ b/testdata/testinput5 -@@ -691,4 +691,7 @@ - \x{2027}\x{2030}\x{2028}\x{2029} - \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d - -+/(?= 0xd800 && <= 0xdfff) at offset 7 - \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d - 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d} - -+/(? -Date: Mon, 23 Apr 2012 09:38:46 +0200 -Subject: [PATCH] Fix ovector overflow - -Back-port to 8.30: - -r963 | ph10 | 2012-04-21 20:06:31 +0200 (So, 21 dub 2012) | 3 lines -Fix ovector overrun when backreferences need temporary memory and the -highest block is not used. ---- - pcre_exec.c | 2 +- - pcretest.c | 1 + - testdata/testinput2 | 3 +++ - testdata/testoutput2 | 5 +++++ - 4 files changed, 10 insertions(+), 1 deletions(-) - -diff --git a/pcre_exec.c b/pcre_exec.c -index b715353..85e12df 100644 ---- a/pcre_exec.c -+++ b/pcre_exec.c -@@ -6887,7 +6887,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) - { - register int *iptr, *iend; - int resetcount = 2 + re->top_bracket * 2; -- if (resetcount > offsetcount) resetcount = ocount; -+ if (resetcount > offsetcount) resetcount = offsetcount; - iptr = offsets + md->end_offset_top; - iend = offsets + resetcount; - while (iptr < iend) *iptr++ = -1; -diff --git a/pcretest.c b/pcretest.c -index 1974453..4eee0ed 100644 ---- a/pcretest.c -+++ b/pcretest.c -@@ -3653,6 +3653,7 @@ while (!done) - } - use_size_offsets = n; - if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */ -+ else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */ - continue; - - case 'P': -diff --git a/testdata/testinput2 b/testdata/testinput2 -index 5cc61e0..0c746da 100644 ---- a/testdata/testinput2 -+++ b/testdata/testinput2 -@@ -3619,4 +3619,7 @@ replaced by single letters. --/ - /(?=a(*:x))(?=a(*:y)c|)/K+ - ab - -+"AB(C(D))(E(F))?(?(?=\2)(?=\4))" -+ ABCDGHI\O03 -+ - /-- End of testinput2 --/ -diff --git a/testdata/testoutput2 b/testdata/testoutput2 -index 90cfa4f..a9093fd 100644 ---- a/testdata/testoutput2 -+++ b/testdata/testoutput2 -@@ -12076,4 +12076,9 @@ MK: x - 0+ ab - MK: x - -+"AB(C(D))(E(F))?(?(?=\2)(?=\4))" -+ ABCDGHI\O03 -+Matched, but too many substrings -+ 0: ABCD -+ - /-- End of testinput2 --/ --- -1.7.7.6 - diff --git a/pcre-8.30-fix_spelling_formfeed_runtime_whitespace.patch b/pcre-8.30-fix_spelling_formfeed_runtime_whitespace.patch deleted file mode 100644 index 5616e71..0000000 --- a/pcre-8.30-fix_spelling_formfeed_runtime_whitespace.patch +++ /dev/null @@ -1,319 +0,0 @@ -http://bugs.exim.org/show_bug.cgi?id=1245 - -Index: doc/pcrebuild.3 -=================================================================== ---- doc/pcrebuild.3 (revision 965) -+++ doc/pcrebuild.3 (working copy) -@@ -100,7 +100,7 @@ - functions. - .P - If you set --enable-utf when compiling in an EBCDIC environment, PCRE expects --its input to be either ASCII or UTF-8 (depending on the runtime option). It is -+its input to be either ASCII or UTF-8 (depending on the run-time option). It is - not possible to support both EBCDIC and UTF-8 codes in the same version of the - library. Consequently, --enable-utf and --enable-ebcdic are mutually - exclusive. -@@ -313,7 +313,7 @@ - .sp - to the \fBconfigure\fP command, the distributed tables are no longer used. - Instead, a program called \fBdftables\fP is compiled and run. This outputs the --source for new set of tables, created in the default locale of your C runtime -+source for new set of tables, created in the default locale of your C run-time - system. (This method of replacing the tables does not work if you are cross - compiling, because \fBdftables\fP is run on the local host. If you need to - create alternative tables when cross compiling, you will have to do so "by -Index: doc/pcre_compile2.3 -=================================================================== ---- doc/pcre_compile2.3 (revision 965) -+++ doc/pcre_compile2.3 (working copy) -@@ -50,7 +50,7 @@ - PCRE_DOLLAR_ENDONLY $ not to match newline at end - PCRE_DOTALL . matches anything including NL - PCRE_DUPNAMES Allow duplicate names for subpatterns -- PCRE_EXTENDED Ignore whitespace and # comments -+ PCRE_EXTENDED Ignore white space and # comments - PCRE_EXTRA PCRE extra features - (not much use currently) - PCRE_FIRSTLINE Force matching to be before newline -Index: doc/pcreapi.3 -=================================================================== ---- doc/pcreapi.3 (revision 965) -+++ doc/pcreapi.3 (working copy) -@@ -302,7 +302,7 @@ - strings: a single CR (carriage return) character, a single LF (linefeed) - character, the two-character sequence CRLF, any of the three preceding, or any - Unicode newline sequence. The Unicode newline sequences are the three just --mentioned, plus the single characters VT (vertical tab, U+000B), FF (formfeed, -+mentioned, plus the single characters VT (vertical tab, U+000B), FF (form feed, - U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS - (paragraph separator, U+2029). - .P -@@ -642,8 +642,8 @@ - .sp - PCRE_EXTENDED - .sp --If this bit is set, whitespace data characters in the pattern are totally --ignored except when escaped or inside a character class. Whitespace does not -+If this bit is set, white space data characters in the pattern are totally -+ignored except when escaped or inside a character class. White space does not - include the VT character (code 11). In addition, characters between an - unescaped # outside a character class and the next newline, inclusive, are also - ignored. This is equivalent to Perl's /x option, and it can be changed within a -@@ -661,7 +661,7 @@ - happen to represent a newline do not count. - .P - This option makes it possible to include comments inside complicated patterns. --Note, however, that this applies only to data characters. Whitespace characters -+Note, however, that this applies only to data characters. White space characters - may never appear within special character sequences in a pattern, for example - within the sequence (?( that introduces a conditional subpattern. - .sp -@@ -741,7 +741,7 @@ - preceding sequences should be recognized. Setting PCRE_NEWLINE_ANY specifies - that any Unicode newline sequence should be recognized. The Unicode newline - sequences are the three just mentioned, plus the single characters VT (vertical --tab, U+000B), FF (formfeed, U+000C), NEL (next line, U+0085), LS (line -+tab, U+000B), FF (form feed, U+000C), NEL (next line, U+0085), LS (line - separator, U+2028), and PS (paragraph separator, U+2029). For the 8-bit - library, the last two are recognized only in UTF-8 mode. - .P -@@ -753,7 +753,7 @@ - other combinations may yield unused numbers and cause an error. - .P - The only time that a line break in a pattern is specially recognized when --compiling is when PCRE_EXTENDED is set. CR and LF are whitespace characters, -+compiling is when PCRE_EXTENDED is set. CR and LF are white space characters, - and so are ignored in this mode. Also, an unescaped # outside a character class - indicates a comment that lasts until after the next line break sequence. In - other circumstances, line break sequences in patterns are treated as literal -Index: doc/pcrecpp.3 -=================================================================== ---- doc/pcrecpp.3 (revision 965) -+++ doc/pcrecpp.3 (working copy) -@@ -173,7 +173,7 @@ - PCRE_DOTALL dot matches newlines /s - PCRE_DOLLAR_ENDONLY $ matches only at end N/A - PCRE_EXTRA strict escape parsing N/A -- PCRE_EXTENDED ignore whitespaces /x -+ PCRE_EXTENDED ignore white spaces /x - PCRE_UTF8 handles UTF8 chars built-in - PCRE_UNGREEDY reverses * and *? N/A - PCRE_NO_AUTO_CAPTURE disables capturing parens N/A (*) -Index: doc/pcre_jit_stack_alloc.3 -=================================================================== ---- doc/pcre_jit_stack_alloc.3 (revision 965) -+++ doc/pcre_jit_stack_alloc.3 (working copy) -@@ -21,7 +21,7 @@ - This function is used to create a stack for use by the code compiled by the JIT - optimization of \fBpcre[16]_study()\fP. The arguments are a starting size for - the stack, and a maximum size to which it is allowed to grow. The result can be --passed to the JIT runtime code by \fBpcre[16]_assign_jit_stack()\fP, or that -+passed to the JIT run-time code by \fBpcre[16]_assign_jit_stack()\fP, or that - function can set up a callback for obtaining a stack. A maximum stack size of - 512K to 1M should be more than enough for any pattern. For more details, see - the -Index: doc/pcreunicode.3 -=================================================================== ---- doc/pcreunicode.3 (revision 965) -+++ doc/pcreunicode.3 (working copy) -@@ -85,7 +85,7 @@ - .P - If an invalid UTF-8 string is passed to PCRE, an error return is given. At - compile time, the only additional information is the offset to the first byte --of the failing character. The runtime functions \fBpcre_exec()\fP and -+of the failing character. The run-time functions \fBpcre_exec()\fP and - \fBpcre_dfa_exec()\fP also pass back this information, as well as a more - detailed reason code if the caller has provided memory in which to do this. - .P -@@ -127,7 +127,7 @@ - .P - If an invalid UTF-16 string is passed to PCRE, an error return is given. At - compile time, the only additional information is the offset to the first data --unit of the failing character. The runtime functions \fBpcre16_exec()\fP and -+unit of the failing character. The run-time functions \fBpcre16_exec()\fP and - \fBpcre16_dfa_exec()\fP also pass back this information, as well as a more - detailed reason code if the caller has provided memory in which to do this. - .P -@@ -192,7 +192,7 @@ - 7. Similarly, characters that match the POSIX named character classes are all - low-valued characters, unless the PCRE_UCP option is set. - .P --8. However, the horizontal and vertical whitespace matching escapes (\eh, \eH, -+8. However, the horizontal and vertical white space matching escapes (\eh, \eH, - \ev, and \eV) do match all the appropriate Unicode characters, whether or not - PCRE_UCP is set. - .P -Index: doc/pcre_compile.3 -=================================================================== ---- doc/pcre_compile.3 (revision 965) -+++ doc/pcre_compile.3 (working copy) -@@ -44,7 +44,7 @@ - PCRE_DOLLAR_ENDONLY $ not to match newline at end - PCRE_DOTALL . matches anything including NL - PCRE_DUPNAMES Allow duplicate names for subpatterns -- PCRE_EXTENDED Ignore whitespace and # comments -+ PCRE_EXTENDED Ignore white space and # comments - PCRE_EXTRA PCRE extra features - (not much use currently) - PCRE_FIRSTLINE Force matching to be before newline -Index: doc/pcrepattern.3 -=================================================================== ---- doc/pcrepattern.3 (revision 965) -+++ doc/pcrepattern.3 (working copy) -@@ -198,10 +198,10 @@ - backslash. All other characters (in particular, those whose codepoints are - greater than 127) are treated as literals. - .P --If a pattern is compiled with the PCRE_EXTENDED option, whitespace in the -+If a pattern is compiled with the PCRE_EXTENDED option, white space in the - pattern (other than in a character class) and characters between a # outside - a character class and the next newline are ignored. An escaping backslash can --be used to include a whitespace or # character as part of the pattern. -+be used to include a white space or # character as part of the pattern. - .P - If you want to remove the special meaning from a sequence of characters, you - can do so by putting them between \eQ and \eE. This is different from Perl in -@@ -237,7 +237,7 @@ - \ea alarm, that is, the BEL character (hex 07) - \ecx "control-x", where x is any ASCII character - \ee escape (hex 1B) -- \ef formfeed (hex 0C) -+ \ef form feed (hex 0C) - \en linefeed (hex 0A) - \er carriage return (hex 0D) - \et tab (hex 09) -@@ -399,12 +399,12 @@ - .sp - \ed any decimal digit - \eD any character that is not a decimal digit -- \eh any horizontal whitespace character -- \eH any character that is not a horizontal whitespace character -- \es any whitespace character -- \eS any character that is not a whitespace character -- \ev any vertical whitespace character -- \eV any character that is not a vertical whitespace character -+ \eh any horizontal white space character -+ \eH any character that is not a horizontal white space character -+ \es any white space character -+ \eS any character that is not a white space character -+ \ev any vertical white space character -+ \eV any character that is not a vertical white space character - \ew any "word" character - \eW any "non-word" character - .sp -@@ -493,7 +493,7 @@ - .sp - U+000A Linefeed - U+000B Vertical tab -- U+000C Formfeed -+ U+000C Form feed - U+000D Carriage return - U+0085 Next line - U+2028 Line separator -@@ -520,7 +520,7 @@ - .\" - This particular group matches either the two-character sequence CR followed by - LF, or one of the single characters LF (linefeed, U+000A), VT (vertical tab, --U+000B), FF (formfeed, U+000C), CR (carriage return, U+000D), or NEL (next -+U+000B), FF (form feed, U+000C), CR (carriage return, U+000D), or NEL (next - line, U+0085). The two-character sequence is treated as a single unit that - cannot be split. - .P -@@ -819,7 +819,7 @@ - Xwd Any Perl "word" character - .sp - Xan matches characters that have either the L (letter) or the N (number) --property. Xps matches the characters tab, linefeed, vertical tab, formfeed, or -+property. Xps matches the characters tab, linefeed, vertical tab, form feed, or - carriage return, and any other character that has the Z (separator) property. - Xsp is the same as Xps, except that vertical tab is excluded. Xwd matches the - same characters as Xan, plus underscore. -@@ -1843,7 +1843,7 @@ - following a backslash are taken as part of a potential back reference number. - If the pattern continues with a digit character, some delimiter must be used to - terminate the back reference. If the PCRE_EXTENDED option is set, this can be --whitespace. Otherwise, the \eg{ syntax or an empty comment (see -+white space. Otherwise, the \eg{ syntax or an empty comment (see - .\" HTML - .\" - "Comments" -@@ -2200,7 +2200,7 @@ - subroutines - .\" - is described below.) For example, a pattern to match an IPv4 address such as --"192.168.23.245" could be written like this (ignore whitespace and line -+"192.168.23.245" could be written like this (ignore white space and line - breaks): - .sp - (?(DEFINE) (? 2[0-4]\ed | 25[0-5] | 1\ed\ed | [1-9]?\ed) ) -Index: doc/pcre_assign_jit_stack.3 -=================================================================== ---- doc/pcre_assign_jit_stack.3 (revision 965) -+++ doc/pcre_assign_jit_stack.3 (working copy) -@@ -18,7 +18,7 @@ - .SH DESCRIPTION - .rs - .sp --This function provides control over the memory used as a stack at runtime by a -+This function provides control over the memory used as a stack at run-time by a - call to \fBpcre[16]_exec()\fP with a pattern that has been successfully - compiled with JIT optimization. The arguments are: - .sp -Index: doc/pcrecompat.3 -=================================================================== ---- doc/pcrecompat.3 (revision 965) -+++ doc/pcrecompat.3 (working copy) -@@ -114,7 +114,7 @@ - .P - 14. Perl recognizes comments in some places that PCRE does not, for example, - between the ( and ? at the start of a subpattern. If the /x modifier is set, --Perl allows whitespace between ( and ? but PCRE never does, even if the -+Perl allows white space between ( and ? but PCRE never does, even if the - PCRE_EXTENDED option is set. - .P - 15. PCRE provides some extensions to the Perl regular expression facilities. -Index: doc/pcresyntax.3 -=================================================================== ---- doc/pcresyntax.3 (revision 965) -+++ doc/pcresyntax.3 (working copy) -@@ -25,7 +25,7 @@ - \ea alarm, that is, the BEL character (hex 07) - \ecx "control-x", where x is any ASCII character - \ee escape (hex 1B) -- \ef formfeed (hex 0C) -+ \ef form feed (hex 0C) - \en newline (hex 0A) - \er carriage return (hex 0D) - \et tab (hex 09) -@@ -42,16 +42,16 @@ - \eC one data unit, even in UTF mode (best avoided) - \ed a decimal digit - \eD a character that is not a decimal digit -- \eh a horizontal whitespace character -- \eH a character that is not a horizontal whitespace character -+ \eh a horizontal white space character -+ \eH a character that is not a horizontal white space character - \eN a character that is not a newline - \ep{\fIxx\fP} a character with the \fIxx\fP property - \eP{\fIxx\fP} a character without the \fIxx\fP property - \eR a newline sequence -- \es a whitespace character -- \eS a character that is not a whitespace character -- \ev a vertical whitespace character -- \eV a character that is not a vertical whitespace character -+ \es a white space character -+ \eS a character that is not a white space character -+ \ev a vertical white space character -+ \eV a character that is not a vertical white space character - \ew a "word" character - \eW a "non-word" character - \eX an extended Unicode sequence -@@ -245,7 +245,7 @@ - lower lower case letter - print printing, including space - punct printing, excluding alphanumeric -- space whitespace -+ space white space - upper upper case letter - word same as \ew - xdigit hexadecimal digit diff --git a/pcre-8.30-possesify_sr.patch b/pcre-8.30-possesify_sr.patch deleted file mode 100644 index 7c0d8dc..0000000 --- a/pcre-8.30-possesify_sr.patch +++ /dev/null @@ -1,138 +0,0 @@ -Possesify \s*\R - -This is back-ported of upstream commit for pcre-8.30: -r961 | ph10 | 2012-04-20 13:49:13 +0200 (Pá, 20 dub 2012) | 2 lines -Fix auto-possessify bugs for \s*\R and \S*R. - -Petr Pisar: Remove changelog entry - -Index: testdata/testinput2 -=================================================================== ---- testdata/testinput2 (revision 960) -+++ testdata/testinput2 (revision 961) -@@ -3102,7 +3102,25 @@ - /\d*\R/BZ - - /\s*\R/BZ -+ \x20\x0a -+ \x20\x0d -+ \x20\x0d\x0a - -+/\S*\R/BZ -+ a\x0a -+ -+/X\h*\R/BZ -+ X\x20\x0a -+ -+/X\H*\R/BZ -+ X\x0d\x0a -+ -+/X\H+\R/BZ -+ X\x0d\x0a -+ -+/X\H++\R/BZ -+ X\x0d\x0a -+ - /-- Perl treats this one differently, not failing the second string. I believe - that is a bug in Perl. --/ - -Index: testdata/testoutput2 -=================================================================== ---- testdata/testoutput2 (revision 960) -+++ testdata/testoutput2 (revision 961) -@@ -10755,12 +10755,77 @@ - /\s*\R/BZ - ------------------------------------------------------------------ - Bra -- \s*+ -+ \s* - \R - Ket - End - ------------------------------------------------------------------ -+ \x20\x0a -+ 0: \x0a -+ \x20\x0d -+ 0: \x0d -+ \x20\x0d\x0a -+ 0: \x0d\x0a - -+/\S*\R/BZ -+------------------------------------------------------------------ -+ Bra -+ \S*+ -+ \R -+ Ket -+ End -+------------------------------------------------------------------ -+ a\x0a -+ 0: a\x0a -+ -+/X\h*\R/BZ -+------------------------------------------------------------------ -+ Bra -+ X -+ \h*+ -+ \R -+ Ket -+ End -+------------------------------------------------------------------ -+ X\x20\x0a -+ 0: X \x0a -+ -+/X\H*\R/BZ -+------------------------------------------------------------------ -+ Bra -+ X -+ \H* -+ \R -+ Ket -+ End -+------------------------------------------------------------------ -+ X\x0d\x0a -+ 0: X\x0d\x0a -+ -+/X\H+\R/BZ -+------------------------------------------------------------------ -+ Bra -+ X -+ \H+ -+ \R -+ Ket -+ End -+------------------------------------------------------------------ -+ X\x0d\x0a -+ 0: X\x0d\x0a -+ -+/X\H++\R/BZ -+------------------------------------------------------------------ -+ Bra -+ X -+ \H++ -+ \R -+ Ket -+ End -+------------------------------------------------------------------ -+ X\x0d\x0a -+No match -+ - /-- Perl treats this one differently, not failing the second string. I believe - that is a bug in Perl. --/ - -Index: pcre_compile.c -=================================================================== ---- pcre_compile.c (revision 960) -+++ pcre_compile.c (revision 961) -@@ -3349,10 +3349,10 @@ - return next == -ESC_d; - - case OP_WHITESPACE: -- return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R; -+ return next == -ESC_S || next == -ESC_d || next == -ESC_w; - - case OP_NOT_WHITESPACE: -- return next == -ESC_s || next == -ESC_h || next == -ESC_v; -+ return next == -ESC_s || next == -ESC_h || next == -ESC_v || next == -ESC_R; - - case OP_HSPACE: - return next == -ESC_S || next == -ESC_H || next == -ESC_d || diff --git a/pcre-8.30-possessify_high_ascii.patch b/pcre-8.30-possessify_high_ascii.patch deleted file mode 100644 index 96c7610..0000000 --- a/pcre-8.30-possessify_high_ascii.patch +++ /dev/null @@ -1,450 +0,0 @@ -Possessify high ASCII - -r962 | ph10 | 2012-04-20 19:28:23 +0200 (Pá, 20 dub 2012) | 3 lines -Fix auto-possessifying bugs when PCRE_UCP is not set, but character tables -specify characters in the range 127-255 are letters, spaces, etc. - -Petr Pisar: Changelog entries removed - -Index: testdata/testoutput15 -=================================================================== ---- testdata/testoutput15 (revision 961) -+++ testdata/testoutput15 (revision 962) -@@ -910,4 +910,140 @@ - First char = \x{c7} - Need char = \x{bf} - -+/\w+\x{C4}/8BZ -+------------------------------------------------------------------ -+ Bra -+ \w++ -+ \x{c4} -+ Ket -+ End -+------------------------------------------------------------------ -+ a\x{C4}\x{C4} -+ 0: a\x{c4} -+ -+/\w+\x{C4}/8BZT1 -+------------------------------------------------------------------ -+ Bra -+ \w+ -+ \x{c4} -+ Ket -+ End -+------------------------------------------------------------------ -+ a\x{C4}\x{C4} -+ 0: a\x{c4}\x{c4} -+ -+/\W+\x{C4}/8BZ -+------------------------------------------------------------------ -+ Bra -+ \W+ -+ \x{c4} -+ Ket -+ End -+------------------------------------------------------------------ -+ !\x{C4} -+ 0: !\x{c4} -+ -+/\W+\x{C4}/8BZT1 -+------------------------------------------------------------------ -+ Bra -+ \W++ -+ \x{c4} -+ Ket -+ End -+------------------------------------------------------------------ -+ !\x{C4} -+ 0: !\x{c4} -+ -+/\W+\x{A1}/8BZ -+------------------------------------------------------------------ -+ Bra -+ \W+ -+ \x{a1} -+ Ket -+ End -+------------------------------------------------------------------ -+ !\x{A1} -+ 0: !\x{a1} -+ -+/\W+\x{A1}/8BZT1 -+------------------------------------------------------------------ -+ Bra -+ \W+ -+ \x{a1} -+ Ket -+ End -+------------------------------------------------------------------ -+ !\x{A1} -+ 0: !\x{a1} -+ -+/X\s+\x{A0}/8BZ -+------------------------------------------------------------------ -+ Bra -+ X -+ \s++ -+ \x{a0} -+ Ket -+ End -+------------------------------------------------------------------ -+ X\x20\x{A0}\x{A0} -+ 0: X \x{a0} -+ -+/X\s+\x{A0}/8BZT1 -+------------------------------------------------------------------ -+ Bra -+ X -+ \s+ -+ \x{a0} -+ Ket -+ End -+------------------------------------------------------------------ -+ X\x20\x{A0}\x{A0} -+ 0: X \x{a0}\x{a0} -+ -+/\S+\x{A0}/8BZ -+------------------------------------------------------------------ -+ Bra -+ \S+ -+ \x{a0} -+ Ket -+ End -+------------------------------------------------------------------ -+ X\x{A0}\x{A0} -+ 0: X\x{a0}\x{a0} -+ -+/\S+\x{A0}/8BZT1 -+------------------------------------------------------------------ -+ Bra -+ \S++ -+ \x{a0} -+ Ket -+ End -+------------------------------------------------------------------ -+ X\x{A0}\x{A0} -+ 0: X\x{a0} -+ -+/\x{a0}+\s!/8BZ -+------------------------------------------------------------------ -+ Bra -+ \x{a0}++ -+ \s -+ ! -+ Ket -+ End -+------------------------------------------------------------------ -+ \x{a0}\x20! -+ 0: \x{a0} ! -+ -+/\x{a0}+\s!/8BZT1 -+------------------------------------------------------------------ -+ Bra -+ \x{a0}+ -+ \s -+ ! -+ Ket -+ End -+------------------------------------------------------------------ -+ \x{a0}\x20! -+ 0: \x{a0} ! -+ - /-- End of testinput15 --/ -Index: testdata/testoutput18 -=================================================================== ---- testdata/testoutput18 (revision 961) -+++ testdata/testoutput18 (revision 962) -@@ -845,4 +845,140 @@ - /í¼€/8 - Failed: invalid UTF-16 string at offset 0 - -+/\w+\x{C4}/8BZ -+------------------------------------------------------------------ -+ Bra -+ \w++ -+ \xc4 -+ Ket -+ End -+------------------------------------------------------------------ -+ a\x{C4}\x{C4} -+ 0: a\x{c4} -+ -+/\w+\x{C4}/8BZT1 -+------------------------------------------------------------------ -+ Bra -+ \w+ -+ \xc4 -+ Ket -+ End -+------------------------------------------------------------------ -+ a\x{C4}\x{C4} -+ 0: a\x{c4}\x{c4} -+ -+/\W+\x{C4}/8BZ -+------------------------------------------------------------------ -+ Bra -+ \W+ -+ \xc4 -+ Ket -+ End -+------------------------------------------------------------------ -+ !\x{C4} -+ 0: !\x{c4} -+ -+/\W+\x{C4}/8BZT1 -+------------------------------------------------------------------ -+ Bra -+ \W++ -+ \xc4 -+ Ket -+ End -+------------------------------------------------------------------ -+ !\x{C4} -+ 0: !\x{c4} -+ -+/\W+\x{A1}/8BZ -+------------------------------------------------------------------ -+ Bra -+ \W+ -+ \xa1 -+ Ket -+ End -+------------------------------------------------------------------ -+ !\x{A1} -+ 0: !\x{a1} -+ -+/\W+\x{A1}/8BZT1 -+------------------------------------------------------------------ -+ Bra -+ \W+ -+ \xa1 -+ Ket -+ End -+------------------------------------------------------------------ -+ !\x{A1} -+ 0: !\x{a1} -+ -+/X\s+\x{A0}/8BZ -+------------------------------------------------------------------ -+ Bra -+ X -+ \s++ -+ \xa0 -+ Ket -+ End -+------------------------------------------------------------------ -+ X\x20\x{A0}\x{A0} -+ 0: X \x{a0} -+ -+/X\s+\x{A0}/8BZT1 -+------------------------------------------------------------------ -+ Bra -+ X -+ \s+ -+ \xa0 -+ Ket -+ End -+------------------------------------------------------------------ -+ X\x20\x{A0}\x{A0} -+ 0: X \x{a0}\x{a0} -+ -+/\S+\x{A0}/8BZ -+------------------------------------------------------------------ -+ Bra -+ \S+ -+ \xa0 -+ Ket -+ End -+------------------------------------------------------------------ -+ X\x{A0}\x{A0} -+ 0: X\x{a0}\x{a0} -+ -+/\S+\x{A0}/8BZT1 -+------------------------------------------------------------------ -+ Bra -+ \S++ -+ \xa0 -+ Ket -+ End -+------------------------------------------------------------------ -+ X\x{A0}\x{A0} -+ 0: X\x{a0} -+ -+/\x{a0}+\s!/8BZ -+------------------------------------------------------------------ -+ Bra -+ \xa0++ -+ \s -+ ! -+ Ket -+ End -+------------------------------------------------------------------ -+ \x{a0}\x20! -+ 0: \x{a0} ! -+ -+/\x{a0}+\s!/8BZT1 -+------------------------------------------------------------------ -+ Bra -+ \xa0+ -+ \s -+ ! -+ Ket -+ End -+------------------------------------------------------------------ -+ \x{a0}\x20! -+ 0: \x{a0} ! -+ - /-- End of testinput18 --/ -Index: testdata/testinput15 -=================================================================== ---- testdata/testinput15 (revision 961) -+++ testdata/testinput15 (revision 962) -@@ -277,4 +277,40 @@ - - /\777/8DZ - -+/\w+\x{C4}/8BZ -+ a\x{C4}\x{C4} -+ -+/\w+\x{C4}/8BZT1 -+ a\x{C4}\x{C4} -+ -+/\W+\x{C4}/8BZ -+ !\x{C4} -+ -+/\W+\x{C4}/8BZT1 -+ !\x{C4} -+ -+/\W+\x{A1}/8BZ -+ !\x{A1} -+ -+/\W+\x{A1}/8BZT1 -+ !\x{A1} -+ -+/X\s+\x{A0}/8BZ -+ X\x20\x{A0}\x{A0} -+ -+/X\s+\x{A0}/8BZT1 -+ X\x20\x{A0}\x{A0} -+ -+/\S+\x{A0}/8BZ -+ X\x{A0}\x{A0} -+ -+/\S+\x{A0}/8BZT1 -+ X\x{A0}\x{A0} -+ -+/\x{a0}+\s!/8BZ -+ \x{a0}\x20! -+ -+/\x{a0}+\s!/8BZT1 -+ \x{a0}\x20! -+ - /-- End of testinput15 --/ -Index: testdata/testinput18 -=================================================================== ---- testdata/testinput18 (revision 961) -+++ testdata/testinput18 (revision 962) -@@ -240,4 +240,40 @@ - - /í¼€/8 - -+/\w+\x{C4}/8BZ -+ a\x{C4}\x{C4} -+ -+/\w+\x{C4}/8BZT1 -+ a\x{C4}\x{C4} -+ -+/\W+\x{C4}/8BZ -+ !\x{C4} -+ -+/\W+\x{C4}/8BZT1 -+ !\x{C4} -+ -+/\W+\x{A1}/8BZ -+ !\x{A1} -+ -+/\W+\x{A1}/8BZT1 -+ !\x{A1} -+ -+/X\s+\x{A0}/8BZ -+ X\x20\x{A0}\x{A0} -+ -+/X\s+\x{A0}/8BZT1 -+ X\x20\x{A0}\x{A0} -+ -+/\S+\x{A0}/8BZ -+ X\x{A0}\x{A0} -+ -+/\S+\x{A0}/8BZT1 -+ X\x{A0}\x{A0} -+ -+/\x{a0}+\s!/8BZ -+ \x{a0}\x20! -+ -+/\x{a0}+\s!/8BZT1 -+ \x{a0}\x20! -+ - /-- End of testinput18 --/ -Index: pcre_compile.c -=================================================================== ---- pcre_compile.c (revision 961) -+++ pcre_compile.c (revision 962) -@@ -3132,22 +3132,22 @@ - When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ - - case OP_DIGIT: -- return next > 127 || (cd->ctypes[next] & ctype_digit) == 0; -+ return next > 255 || (cd->ctypes[next] & ctype_digit) == 0; - - case OP_NOT_DIGIT: -- return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0; -+ return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0; - - case OP_WHITESPACE: -- return next > 127 || (cd->ctypes[next] & ctype_space) == 0; -+ return next > 255 || (cd->ctypes[next] & ctype_space) == 0; - - case OP_NOT_WHITESPACE: -- return next <= 127 && (cd->ctypes[next] & ctype_space) != 0; -+ return next <= 255 && (cd->ctypes[next] & ctype_space) != 0; - - case OP_WORDCHAR: -- return next > 127 || (cd->ctypes[next] & ctype_word) == 0; -+ return next > 255 || (cd->ctypes[next] & ctype_word) == 0; - - case OP_NOT_WORDCHAR: -- return next <= 127 && (cd->ctypes[next] & ctype_word) != 0; -+ return next <= 255 && (cd->ctypes[next] & ctype_word) != 0; - - case OP_HSPACE: - case OP_NOT_HSPACE: -@@ -3225,22 +3225,22 @@ - switch(-next) - { - case ESC_d: -- return c > 127 || (cd->ctypes[c] & ctype_digit) == 0; -+ return c > 255 || (cd->ctypes[c] & ctype_digit) == 0; - - case ESC_D: -- return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0; -+ return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0; - - case ESC_s: -- return c > 127 || (cd->ctypes[c] & ctype_space) == 0; -+ return c > 255 || (cd->ctypes[c] & ctype_space) == 0; - - case ESC_S: -- return c <= 127 && (cd->ctypes[c] & ctype_space) != 0; -+ return c <= 255 && (cd->ctypes[c] & ctype_space) != 0; - - case ESC_w: -- return c > 127 || (cd->ctypes[c] & ctype_word) == 0; -+ return c > 255 || (cd->ctypes[c] & ctype_word) == 0; - - case ESC_W: -- return c <= 127 && (cd->ctypes[c] & ctype_word) != 0; -+ return c <= 255 && (cd->ctypes[c] & ctype_word) != 0; - - case ESC_h: - case ESC_H: diff --git a/pcre.spec b/pcre.spec index 72d61a0..210df18 100644 --- a/pcre.spec +++ b/pcre.spec @@ -1,8 +1,8 @@ # This is stable release: -#%%global rcversion RC1 +%global rcversion RC1 Name: pcre -Version: 8.30 -Release: %{?rcversion:0.}7%{?rcversion:.%rcversion}%{?dist} +Version: 8.31 +Release: %{?rcversion:0.}1%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library Group: System Environment/Libraries @@ -13,16 +13,6 @@ Source: ftp://ftp.csx.cam.ac.uk/pub/software/programming/%{name}/%{?rcversion:Te Patch0: pcre-8.21-multilib.patch # Refused by upstream, bug #675477 Patch1: pcre-8.30-refused_spelling_terminated.patch -# Bug #810314, fixed in upstream after 8.30 -Patch2: pcre-8.30-Fix-look-behind-assertion-in-UTF-8-JIT-mode.patch -# Bug #813237, fixed in upstream after 8.30 -Patch3: pcre-8.30-possesify_sr.patch -# bug #815217, fixed in upstream after 8.30 -Patch4: pcre-8.30-possessify_high_ascii.patch -# Bug #815214, fixed in upstream after 8.30 -Patch5: pcre-8.30-Fix-ovector-overflow.patch -# Bug #820978, fixed in upstream after 8.30 -Patch6: pcre-8.30-fix_spelling_formfeed_runtime_whitespace.patch BuildRequires: readline-devel # New libtool to get rid of rpath BuildRequires: autoconf, automake, libtool @@ -62,11 +52,6 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest. # Get rid of rpath %patch0 -p1 -b .multilib %patch1 -p1 -b .terminated_typos -%patch2 -p1 -b .lookbehind_assertion -%patch3 -p0 -b .possesify_sr -%patch4 -p0 -b .possessify_high_ascii -%patch5 -p1 -b .ovector_overflow -%patch6 -p0 -b .spelling # Because of rpath patch libtoolize --copy --force && autoreconf # One contributor's name is non-UTF-8 @@ -130,6 +115,9 @@ make check %{_mandir}/man1/pcretest.* %changelog +* Tue Jun 05 2012 Petr Pisar - 8.31-0.1.RC1 +- 8.31-RC1 bump + * Sat May 12 2012 Tom Callaway - 8.30-7 - disable jit for sparcv9 and sparc64 diff --git a/sources b/sources index b3207ca..3961eb5 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -98e8928cccc945d04279581e778fbdff pcre-8.30.tar.bz2 +96282d115da389eb76f9fbc0daa68d1c pcre-8.31-RC1.tar.bz2