From 789dda6d1e8ff827e91bb7c614419573eae3620c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Fri, 2 Dec 2011 11:47:19 +0100 Subject: [PATCH] Fix case-less match if cases differ in encoding length --- pcre-8.20-caseless_different_length.patch | 150 ++++++++++++++++++++++ pcre.spec | 9 +- 2 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 pcre-8.20-caseless_different_length.patch diff --git a/pcre-8.20-caseless_different_length.patch b/pcre-8.20-caseless_different_length.patch new file mode 100644 index 0000000..88843e5 --- /dev/null +++ b/pcre-8.20-caseless_different_length.patch @@ -0,0 +1,150 @@ +From 72a4bb52e09d46af0b00dd4064f93e9948fdad51 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= +Date: Fri, 2 Dec 2011 11:36:54 +0100 +Subject: [PATCH] Fix caseless match if cases differ in encoding length +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: +r778 | ph10 | 2011-12-01 18:38:47 +0100 (Čt, 01 pro 2011) | 3 lines + +Fix bug with caseless matching of characters of different lengths when +the shorter is right at the end of the subject. + +Petr Pisar: Changelog entry removed. +--- + pcre_exec.c | 32 ++++++++++++++++---------------- + testdata/testinput6 | 14 ++++++++++++++ + testdata/testoutput6 | 22 ++++++++++++++++++++++ + 3 files changed, 52 insertions(+), 16 deletions(-) + +diff --git a/pcre_exec.c b/pcre_exec.c +index 2e763d1..9881bdd 100644 +--- a/pcre_exec.c ++++ b/pcre_exec.c +@@ -427,7 +427,7 @@ returns a negative (error) response, the outer incarnation must also return the + same response. */ + + /* These macros pack up tests that are used for partial matching, and which +-appears several times in the code. We set the "hit end" flag if the pointer is ++appear several times in the code. We set the "hit end" flag if the pointer is + at the end of the subject and also past the start of the subject (i.e. + something has been matched). For hard partial matching, we then return + immediately. The second one is used when we already know we are past the end of +@@ -3039,31 +3039,36 @@ for (;;) + } + break; + +- /* Match a single character, caselessly */ ++ /* Match a single character, caselessly. If we are at the end of the ++ subject, give up immediately. */ + + case OP_CHARI: ++ if (eptr >= md->end_subject) ++ { ++ SCHECK_PARTIAL(); ++ MRRETURN(MATCH_NOMATCH); ++ } ++ + #ifdef SUPPORT_UTF8 + if (utf8) + { + length = 1; + ecode++; + GETCHARLEN(fc, ecode, length); +- +- if (length > md->end_subject - eptr) +- { +- CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */ +- MRRETURN(MATCH_NOMATCH); +- } +- ++ + /* If the pattern character's value is < 128, we have only one byte, and +- can use the fast lookup table. */ ++ we know that its other case must also be one byte long, so we can use the ++ fast lookup table. We know that there is at least one byte left in the ++ subject. */ + + if (fc < 128) + { + if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); + } + +- /* Otherwise we must pick up the subject character */ ++ /* Otherwise we must pick up the subject character. Note that we cannot ++ use the value of "length" to check for sufficient bytes left, because the ++ other case of the character may have more or fewer bytes. */ + + else + { +@@ -3088,11 +3093,6 @@ for (;;) + + /* Non-UTF-8 mode */ + { +- if (md->end_subject - eptr < 1) +- { +- SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */ +- MRRETURN(MATCH_NOMATCH); +- } + if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH); + ecode += 2; + } +diff --git a/testdata/testinput6 b/testdata/testinput6 +index e5fc0e9..6b0d2f7 100644 +--- a/testdata/testinput6 ++++ b/testdata/testinput6 +@@ -802,4 +802,18 @@ + ** Failers + a\xFCb + ++/ⱥ/8i ++ ⱥ ++ Ⱥx ++ Ⱥ ++ ++/[ⱥ]/8i ++ ⱥ ++ Ⱥx ++ Ⱥ ++ ++/Ⱥ/8i ++ Ⱥ ++ ⱥ ++ + /-- End of testinput6 --/ +diff --git a/testdata/testoutput6 b/testdata/testoutput6 +index 1acaa23..68c0a46 100644 +--- a/testdata/testoutput6 ++++ b/testdata/testoutput6 +@@ -1353,4 +1353,26 @@ No match + a\xFCb + No match + ++/ⱥ/8i ++ ⱥ ++ 0: \x{2c65} ++ Ⱥx ++ 0: \x{23a} ++ Ⱥ ++ 0: \x{23a} ++ ++/[ⱥ]/8i ++ ⱥ ++ 0: \x{2c65} ++ Ⱥx ++ 0: \x{23a} ++ Ⱥ ++ 0: \x{23a} ++ ++/Ⱥ/8i ++ Ⱥ ++ 0: \x{23a} ++ ⱥ ++ 0: \x{2c65} ++ + /-- End of testinput6 --/ +-- +1.7.7.4 + diff --git a/pcre.spec b/pcre.spec index 0bc3fb4..280f0cf 100644 --- a/pcre.spec +++ b/pcre.spec @@ -1,7 +1,7 @@ # This is stable release: %%global rcversion RC3 Name: pcre Version: 8.20 -Release: %{?rcversion:0.}6%{?rcversion:.%rcversion}%{?dist} +Release: %{?rcversion:0.}7%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library Group: System Environment/Libraries @@ -19,6 +19,9 @@ Patch3: pcre-8.20-lookbehind-2.patch Patch4: pcre-8.20-forward_reference.patch # Fix cache-flush in JIT on PPC, in upstream after 8.20. Patch5: pcre-8.20-ppcjit.patch +# Fix case-less match if cases differ in encoding length, in upstream after +# 8.20. +Patch6: pcre-8.20-caseless_different_length.patch BuildRequires: readline-devel # New libtool to get rid of rpath BuildRequires: autoconf, automake, libtool @@ -63,6 +66,7 @@ libtoolize --copy --force && autoreconf %patch3 -p1 -b .lookbehind2 %patch4 -p0 -b .forward_reference %patch5 -p0 -b .ppcjit +%patch6 -p1 -b .caseless_different_length # One contributor's name is non-UTF-8 for F in ChangeLog; do iconv -f latin1 -t utf8 "$F" >"${F}.utf8" @@ -133,6 +137,9 @@ make check %{_mandir}/man1/pcretest.* %changelog +* Fri Dec 02 2011 Petr Pisar - 8.20-7 +- Fix case-less match if cases differ in encoding length (bug #756675) + * Fri Nov 25 2011 Petr Pisar - 8.20-6 - Fix cache-flush in JIT on PPC