From 83bd5c540ac11b9879ca4dcf030f0ae051bbaabe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Mon, 23 Apr 2012 09:28:31 +0200 Subject: [PATCH] Possessify high ASCII --- pcre-8.30-possessify_high_ascii.patch | 450 ++++++++++++++++++++++++++ pcre.spec | 8 +- 2 files changed, 457 insertions(+), 1 deletion(-) create mode 100644 pcre-8.30-possessify_high_ascii.patch diff --git a/pcre-8.30-possessify_high_ascii.patch b/pcre-8.30-possessify_high_ascii.patch new file mode 100644 index 0000000..96c7610 --- /dev/null +++ b/pcre-8.30-possessify_high_ascii.patch @@ -0,0 +1,450 @@ +Possessify high ASCII + +r962 | ph10 | 2012-04-20 19:28:23 +0200 (Pá, 20 dub 2012) | 3 lines +Fix auto-possessifying bugs when PCRE_UCP is not set, but character tables +specify characters in the range 127-255 are letters, spaces, etc. + +Petr Pisar: Changelog entries removed + +Index: testdata/testoutput15 +=================================================================== +--- testdata/testoutput15 (revision 961) ++++ testdata/testoutput15 (revision 962) +@@ -910,4 +910,140 @@ + First char = \x{c7} + Need char = \x{bf} + ++/\w+\x{C4}/8BZ ++------------------------------------------------------------------ ++ Bra ++ \w++ ++ \x{c4} ++ Ket ++ End ++------------------------------------------------------------------ ++ a\x{C4}\x{C4} ++ 0: a\x{c4} ++ ++/\w+\x{C4}/8BZT1 ++------------------------------------------------------------------ ++ Bra ++ \w+ ++ \x{c4} ++ Ket ++ End ++------------------------------------------------------------------ ++ a\x{C4}\x{C4} ++ 0: a\x{c4}\x{c4} ++ ++/\W+\x{C4}/8BZ ++------------------------------------------------------------------ ++ Bra ++ \W+ ++ \x{c4} ++ Ket ++ End ++------------------------------------------------------------------ ++ !\x{C4} ++ 0: !\x{c4} ++ ++/\W+\x{C4}/8BZT1 ++------------------------------------------------------------------ ++ Bra ++ \W++ ++ \x{c4} ++ Ket ++ End ++------------------------------------------------------------------ ++ !\x{C4} ++ 0: !\x{c4} ++ ++/\W+\x{A1}/8BZ ++------------------------------------------------------------------ ++ Bra ++ \W+ ++ \x{a1} ++ Ket ++ End ++------------------------------------------------------------------ ++ !\x{A1} ++ 0: !\x{a1} ++ ++/\W+\x{A1}/8BZT1 ++------------------------------------------------------------------ ++ Bra ++ \W+ ++ \x{a1} ++ Ket ++ End ++------------------------------------------------------------------ ++ !\x{A1} ++ 0: !\x{a1} ++ ++/X\s+\x{A0}/8BZ ++------------------------------------------------------------------ ++ Bra ++ X ++ \s++ ++ \x{a0} ++ Ket ++ End ++------------------------------------------------------------------ ++ X\x20\x{A0}\x{A0} ++ 0: X \x{a0} ++ ++/X\s+\x{A0}/8BZT1 ++------------------------------------------------------------------ ++ Bra ++ X ++ \s+ ++ \x{a0} ++ Ket ++ End ++------------------------------------------------------------------ ++ X\x20\x{A0}\x{A0} ++ 0: X \x{a0}\x{a0} ++ ++/\S+\x{A0}/8BZ ++------------------------------------------------------------------ ++ Bra ++ \S+ ++ \x{a0} ++ Ket ++ End ++------------------------------------------------------------------ ++ X\x{A0}\x{A0} ++ 0: X\x{a0}\x{a0} ++ ++/\S+\x{A0}/8BZT1 ++------------------------------------------------------------------ ++ Bra ++ \S++ ++ \x{a0} ++ Ket ++ End ++------------------------------------------------------------------ ++ X\x{A0}\x{A0} ++ 0: X\x{a0} ++ ++/\x{a0}+\s!/8BZ ++------------------------------------------------------------------ ++ Bra ++ \x{a0}++ ++ \s ++ ! ++ Ket ++ End ++------------------------------------------------------------------ ++ \x{a0}\x20! ++ 0: \x{a0} ! ++ ++/\x{a0}+\s!/8BZT1 ++------------------------------------------------------------------ ++ Bra ++ \x{a0}+ ++ \s ++ ! ++ Ket ++ End ++------------------------------------------------------------------ ++ \x{a0}\x20! ++ 0: \x{a0} ! ++ + /-- End of testinput15 --/ +Index: testdata/testoutput18 +=================================================================== +--- testdata/testoutput18 (revision 961) ++++ testdata/testoutput18 (revision 962) +@@ -845,4 +845,140 @@ + /í¼€/8 + Failed: invalid UTF-16 string at offset 0 + ++/\w+\x{C4}/8BZ ++------------------------------------------------------------------ ++ Bra ++ \w++ ++ \xc4 ++ Ket ++ End ++------------------------------------------------------------------ ++ a\x{C4}\x{C4} ++ 0: a\x{c4} ++ ++/\w+\x{C4}/8BZT1 ++------------------------------------------------------------------ ++ Bra ++ \w+ ++ \xc4 ++ Ket ++ End ++------------------------------------------------------------------ ++ a\x{C4}\x{C4} ++ 0: a\x{c4}\x{c4} ++ ++/\W+\x{C4}/8BZ ++------------------------------------------------------------------ ++ Bra ++ \W+ ++ \xc4 ++ Ket ++ End ++------------------------------------------------------------------ ++ !\x{C4} ++ 0: !\x{c4} ++ ++/\W+\x{C4}/8BZT1 ++------------------------------------------------------------------ ++ Bra ++ \W++ ++ \xc4 ++ Ket ++ End ++------------------------------------------------------------------ ++ !\x{C4} ++ 0: !\x{c4} ++ ++/\W+\x{A1}/8BZ ++------------------------------------------------------------------ ++ Bra ++ \W+ ++ \xa1 ++ Ket ++ End ++------------------------------------------------------------------ ++ !\x{A1} ++ 0: !\x{a1} ++ ++/\W+\x{A1}/8BZT1 ++------------------------------------------------------------------ ++ Bra ++ \W+ ++ \xa1 ++ Ket ++ End ++------------------------------------------------------------------ ++ !\x{A1} ++ 0: !\x{a1} ++ ++/X\s+\x{A0}/8BZ ++------------------------------------------------------------------ ++ Bra ++ X ++ \s++ ++ \xa0 ++ Ket ++ End ++------------------------------------------------------------------ ++ X\x20\x{A0}\x{A0} ++ 0: X \x{a0} ++ ++/X\s+\x{A0}/8BZT1 ++------------------------------------------------------------------ ++ Bra ++ X ++ \s+ ++ \xa0 ++ Ket ++ End ++------------------------------------------------------------------ ++ X\x20\x{A0}\x{A0} ++ 0: X \x{a0}\x{a0} ++ ++/\S+\x{A0}/8BZ ++------------------------------------------------------------------ ++ Bra ++ \S+ ++ \xa0 ++ Ket ++ End ++------------------------------------------------------------------ ++ X\x{A0}\x{A0} ++ 0: X\x{a0}\x{a0} ++ ++/\S+\x{A0}/8BZT1 ++------------------------------------------------------------------ ++ Bra ++ \S++ ++ \xa0 ++ Ket ++ End ++------------------------------------------------------------------ ++ X\x{A0}\x{A0} ++ 0: X\x{a0} ++ ++/\x{a0}+\s!/8BZ ++------------------------------------------------------------------ ++ Bra ++ \xa0++ ++ \s ++ ! ++ Ket ++ End ++------------------------------------------------------------------ ++ \x{a0}\x20! ++ 0: \x{a0} ! ++ ++/\x{a0}+\s!/8BZT1 ++------------------------------------------------------------------ ++ Bra ++ \xa0+ ++ \s ++ ! ++ Ket ++ End ++------------------------------------------------------------------ ++ \x{a0}\x20! ++ 0: \x{a0} ! ++ + /-- End of testinput18 --/ +Index: testdata/testinput15 +=================================================================== +--- testdata/testinput15 (revision 961) ++++ testdata/testinput15 (revision 962) +@@ -277,4 +277,40 @@ + + /\777/8DZ + ++/\w+\x{C4}/8BZ ++ a\x{C4}\x{C4} ++ ++/\w+\x{C4}/8BZT1 ++ a\x{C4}\x{C4} ++ ++/\W+\x{C4}/8BZ ++ !\x{C4} ++ ++/\W+\x{C4}/8BZT1 ++ !\x{C4} ++ ++/\W+\x{A1}/8BZ ++ !\x{A1} ++ ++/\W+\x{A1}/8BZT1 ++ !\x{A1} ++ ++/X\s+\x{A0}/8BZ ++ X\x20\x{A0}\x{A0} ++ ++/X\s+\x{A0}/8BZT1 ++ X\x20\x{A0}\x{A0} ++ ++/\S+\x{A0}/8BZ ++ X\x{A0}\x{A0} ++ ++/\S+\x{A0}/8BZT1 ++ X\x{A0}\x{A0} ++ ++/\x{a0}+\s!/8BZ ++ \x{a0}\x20! ++ ++/\x{a0}+\s!/8BZT1 ++ \x{a0}\x20! ++ + /-- End of testinput15 --/ +Index: testdata/testinput18 +=================================================================== +--- testdata/testinput18 (revision 961) ++++ testdata/testinput18 (revision 962) +@@ -240,4 +240,40 @@ + + /í¼€/8 + ++/\w+\x{C4}/8BZ ++ a\x{C4}\x{C4} ++ ++/\w+\x{C4}/8BZT1 ++ a\x{C4}\x{C4} ++ ++/\W+\x{C4}/8BZ ++ !\x{C4} ++ ++/\W+\x{C4}/8BZT1 ++ !\x{C4} ++ ++/\W+\x{A1}/8BZ ++ !\x{A1} ++ ++/\W+\x{A1}/8BZT1 ++ !\x{A1} ++ ++/X\s+\x{A0}/8BZ ++ X\x20\x{A0}\x{A0} ++ ++/X\s+\x{A0}/8BZT1 ++ X\x20\x{A0}\x{A0} ++ ++/\S+\x{A0}/8BZ ++ X\x{A0}\x{A0} ++ ++/\S+\x{A0}/8BZT1 ++ X\x{A0}\x{A0} ++ ++/\x{a0}+\s!/8BZ ++ \x{a0}\x20! ++ ++/\x{a0}+\s!/8BZT1 ++ \x{a0}\x20! ++ + /-- End of testinput18 --/ +Index: pcre_compile.c +=================================================================== +--- pcre_compile.c (revision 961) ++++ pcre_compile.c (revision 962) +@@ -3132,22 +3132,22 @@ + When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ + + case OP_DIGIT: +- return next > 127 || (cd->ctypes[next] & ctype_digit) == 0; ++ return next > 255 || (cd->ctypes[next] & ctype_digit) == 0; + + case OP_NOT_DIGIT: +- return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0; ++ return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0; + + case OP_WHITESPACE: +- return next > 127 || (cd->ctypes[next] & ctype_space) == 0; ++ return next > 255 || (cd->ctypes[next] & ctype_space) == 0; + + case OP_NOT_WHITESPACE: +- return next <= 127 && (cd->ctypes[next] & ctype_space) != 0; ++ return next <= 255 && (cd->ctypes[next] & ctype_space) != 0; + + case OP_WORDCHAR: +- return next > 127 || (cd->ctypes[next] & ctype_word) == 0; ++ return next > 255 || (cd->ctypes[next] & ctype_word) == 0; + + case OP_NOT_WORDCHAR: +- return next <= 127 && (cd->ctypes[next] & ctype_word) != 0; ++ return next <= 255 && (cd->ctypes[next] & ctype_word) != 0; + + case OP_HSPACE: + case OP_NOT_HSPACE: +@@ -3225,22 +3225,22 @@ + switch(-next) + { + case ESC_d: +- return c > 127 || (cd->ctypes[c] & ctype_digit) == 0; ++ return c > 255 || (cd->ctypes[c] & ctype_digit) == 0; + + case ESC_D: +- return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0; ++ return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0; + + case ESC_s: +- return c > 127 || (cd->ctypes[c] & ctype_space) == 0; ++ return c > 255 || (cd->ctypes[c] & ctype_space) == 0; + + case ESC_S: +- return c <= 127 && (cd->ctypes[c] & ctype_space) != 0; ++ return c <= 255 && (cd->ctypes[c] & ctype_space) != 0; + + case ESC_w: +- return c > 127 || (cd->ctypes[c] & ctype_word) == 0; ++ return c > 255 || (cd->ctypes[c] & ctype_word) == 0; + + case ESC_W: +- return c <= 127 && (cd->ctypes[c] & ctype_word) != 0; ++ return c <= 255 && (cd->ctypes[c] & ctype_word) != 0; + + case ESC_h: + case ESC_H: diff --git a/pcre.spec b/pcre.spec index e26bbee..4ddc9b3 100644 --- a/pcre.spec +++ b/pcre.spec @@ -2,7 +2,7 @@ #%%global rcversion RC1 Name: pcre Version: 8.30 -Release: %{?rcversion:0.}4%{?rcversion:.%rcversion}%{?dist} +Release: %{?rcversion:0.}5%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library Group: System Environment/Libraries @@ -17,6 +17,8 @@ Patch1: pcre-8.30-refused_spelling_terminated.patch Patch2: pcre-8.30-Fix-look-behind-assertion-in-UTF-8-JIT-mode.patch # Bug #813237, fixed in upstream after 8.30 Patch3: pcre-8.30-possesify_sr.patch +# bug #815217, fixed in upstream after 8.30 +Patch4: pcre-8.30-possessify_high_ascii.patch BuildRequires: readline-devel # New libtool to get rid of rpath BuildRequires: autoconf, automake, libtool @@ -58,6 +60,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest. %patch1 -p1 -b .terminated_typos %patch2 -p1 -b .lookbehind_assertion %patch3 -p0 -b .possesify_sr +%patch4 -p0 -b .possessify_high_ascii # Because of rpath patch libtoolize --copy --force && autoreconf # One contributor's name is non-UTF-8 @@ -121,6 +124,9 @@ make check %{_mandir}/man1/pcretest.* %changelog +* Mon Apr 23 2012 Petr Pisar - 8.30-5 +- Possessify high ASCII (bug #815217) + * Fri Apr 20 2012 Petr Pisar - 8.30-4 - Possesify \s*\R (bug #813237)