Fix case-less match if cases differ in encoding length
This commit is contained in:
parent
eadca49929
commit
789dda6d1e
150
pcre-8.20-caseless_different_length.patch
Normal file
150
pcre-8.20-caseless_different_length.patch
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
From 72a4bb52e09d46af0b00dd4064f93e9948fdad51 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar@redhat.com>
|
||||||
|
Date: Fri, 2 Dec 2011 11:36:54 +0100
|
||||||
|
Subject: [PATCH] Fix caseless match if cases differ in encoding length
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
From:
|
||||||
|
r778 | ph10 | 2011-12-01 18:38:47 +0100 (Čt, 01 pro 2011) | 3 lines
|
||||||
|
|
||||||
|
Fix bug with caseless matching of characters of different lengths when
|
||||||
|
the shorter is right at the end of the subject.
|
||||||
|
|
||||||
|
Petr Pisar: Changelog entry removed.
|
||||||
|
---
|
||||||
|
pcre_exec.c | 32 ++++++++++++++++----------------
|
||||||
|
testdata/testinput6 | 14 ++++++++++++++
|
||||||
|
testdata/testoutput6 | 22 ++++++++++++++++++++++
|
||||||
|
3 files changed, 52 insertions(+), 16 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/pcre_exec.c b/pcre_exec.c
|
||||||
|
index 2e763d1..9881bdd 100644
|
||||||
|
--- a/pcre_exec.c
|
||||||
|
+++ b/pcre_exec.c
|
||||||
|
@@ -427,7 +427,7 @@ returns a negative (error) response, the outer incarnation must also return the
|
||||||
|
same response. */
|
||||||
|
|
||||||
|
/* These macros pack up tests that are used for partial matching, and which
|
||||||
|
-appears several times in the code. We set the "hit end" flag if the pointer is
|
||||||
|
+appear several times in the code. We set the "hit end" flag if the pointer is
|
||||||
|
at the end of the subject and also past the start of the subject (i.e.
|
||||||
|
something has been matched). For hard partial matching, we then return
|
||||||
|
immediately. The second one is used when we already know we are past the end of
|
||||||
|
@@ -3039,31 +3039,36 @@ for (;;)
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
- /* Match a single character, caselessly */
|
||||||
|
+ /* Match a single character, caselessly. If we are at the end of the
|
||||||
|
+ subject, give up immediately. */
|
||||||
|
|
||||||
|
case OP_CHARI:
|
||||||
|
+ if (eptr >= md->end_subject)
|
||||||
|
+ {
|
||||||
|
+ SCHECK_PARTIAL();
|
||||||
|
+ MRRETURN(MATCH_NOMATCH);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
#ifdef SUPPORT_UTF8
|
||||||
|
if (utf8)
|
||||||
|
{
|
||||||
|
length = 1;
|
||||||
|
ecode++;
|
||||||
|
GETCHARLEN(fc, ecode, length);
|
||||||
|
-
|
||||||
|
- if (length > md->end_subject - eptr)
|
||||||
|
- {
|
||||||
|
- CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
|
||||||
|
- MRRETURN(MATCH_NOMATCH);
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
+
|
||||||
|
/* If the pattern character's value is < 128, we have only one byte, and
|
||||||
|
- can use the fast lookup table. */
|
||||||
|
+ we know that its other case must also be one byte long, so we can use the
|
||||||
|
+ fast lookup table. We know that there is at least one byte left in the
|
||||||
|
+ subject. */
|
||||||
|
|
||||||
|
if (fc < 128)
|
||||||
|
{
|
||||||
|
if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
|
||||||
|
}
|
||||||
|
|
||||||
|
- /* Otherwise we must pick up the subject character */
|
||||||
|
+ /* Otherwise we must pick up the subject character. Note that we cannot
|
||||||
|
+ use the value of "length" to check for sufficient bytes left, because the
|
||||||
|
+ other case of the character may have more or fewer bytes. */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
@@ -3088,11 +3093,6 @@ for (;;)
|
||||||
|
|
||||||
|
/* Non-UTF-8 mode */
|
||||||
|
{
|
||||||
|
- if (md->end_subject - eptr < 1)
|
||||||
|
- {
|
||||||
|
- SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
|
||||||
|
- MRRETURN(MATCH_NOMATCH);
|
||||||
|
- }
|
||||||
|
if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
|
||||||
|
ecode += 2;
|
||||||
|
}
|
||||||
|
diff --git a/testdata/testinput6 b/testdata/testinput6
|
||||||
|
index e5fc0e9..6b0d2f7 100644
|
||||||
|
--- a/testdata/testinput6
|
||||||
|
+++ b/testdata/testinput6
|
||||||
|
@@ -802,4 +802,18 @@
|
||||||
|
** Failers
|
||||||
|
a\xFCb
|
||||||
|
|
||||||
|
+/ⱥ/8i
|
||||||
|
+ ⱥ
|
||||||
|
+ Ⱥx
|
||||||
|
+ Ⱥ
|
||||||
|
+
|
||||||
|
+/[ⱥ]/8i
|
||||||
|
+ ⱥ
|
||||||
|
+ Ⱥx
|
||||||
|
+ Ⱥ
|
||||||
|
+
|
||||||
|
+/Ⱥ/8i
|
||||||
|
+ Ⱥ
|
||||||
|
+ ⱥ
|
||||||
|
+
|
||||||
|
/-- End of testinput6 --/
|
||||||
|
diff --git a/testdata/testoutput6 b/testdata/testoutput6
|
||||||
|
index 1acaa23..68c0a46 100644
|
||||||
|
--- a/testdata/testoutput6
|
||||||
|
+++ b/testdata/testoutput6
|
||||||
|
@@ -1353,4 +1353,26 @@ No match
|
||||||
|
a\xFCb
|
||||||
|
No match
|
||||||
|
|
||||||
|
+/ⱥ/8i
|
||||||
|
+ ⱥ
|
||||||
|
+ 0: \x{2c65}
|
||||||
|
+ Ⱥx
|
||||||
|
+ 0: \x{23a}
|
||||||
|
+ Ⱥ
|
||||||
|
+ 0: \x{23a}
|
||||||
|
+
|
||||||
|
+/[ⱥ]/8i
|
||||||
|
+ ⱥ
|
||||||
|
+ 0: \x{2c65}
|
||||||
|
+ Ⱥx
|
||||||
|
+ 0: \x{23a}
|
||||||
|
+ Ⱥ
|
||||||
|
+ 0: \x{23a}
|
||||||
|
+
|
||||||
|
+/Ⱥ/8i
|
||||||
|
+ Ⱥ
|
||||||
|
+ 0: \x{23a}
|
||||||
|
+ ⱥ
|
||||||
|
+ 0: \x{2c65}
|
||||||
|
+
|
||||||
|
/-- End of testinput6 --/
|
||||||
|
--
|
||||||
|
1.7.7.4
|
||||||
|
|
@ -1,7 +1,7 @@
|
|||||||
# This is stable release: %%global rcversion RC3
|
# This is stable release: %%global rcversion RC3
|
||||||
Name: pcre
|
Name: pcre
|
||||||
Version: 8.20
|
Version: 8.20
|
||||||
Release: %{?rcversion:0.}6%{?rcversion:.%rcversion}%{?dist}
|
Release: %{?rcversion:0.}7%{?rcversion:.%rcversion}%{?dist}
|
||||||
%global myversion %{version}%{?rcversion:-%rcversion}
|
%global myversion %{version}%{?rcversion:-%rcversion}
|
||||||
Summary: Perl-compatible regular expression library
|
Summary: Perl-compatible regular expression library
|
||||||
Group: System Environment/Libraries
|
Group: System Environment/Libraries
|
||||||
@ -19,6 +19,9 @@ Patch3: pcre-8.20-lookbehind-2.patch
|
|||||||
Patch4: pcre-8.20-forward_reference.patch
|
Patch4: pcre-8.20-forward_reference.patch
|
||||||
# Fix cache-flush in JIT on PPC, in upstream after 8.20.
|
# Fix cache-flush in JIT on PPC, in upstream after 8.20.
|
||||||
Patch5: pcre-8.20-ppcjit.patch
|
Patch5: pcre-8.20-ppcjit.patch
|
||||||
|
# Fix case-less match if cases differ in encoding length, in upstream after
|
||||||
|
# 8.20.
|
||||||
|
Patch6: pcre-8.20-caseless_different_length.patch
|
||||||
BuildRequires: readline-devel
|
BuildRequires: readline-devel
|
||||||
# New libtool to get rid of rpath
|
# New libtool to get rid of rpath
|
||||||
BuildRequires: autoconf, automake, libtool
|
BuildRequires: autoconf, automake, libtool
|
||||||
@ -63,6 +66,7 @@ libtoolize --copy --force && autoreconf
|
|||||||
%patch3 -p1 -b .lookbehind2
|
%patch3 -p1 -b .lookbehind2
|
||||||
%patch4 -p0 -b .forward_reference
|
%patch4 -p0 -b .forward_reference
|
||||||
%patch5 -p0 -b .ppcjit
|
%patch5 -p0 -b .ppcjit
|
||||||
|
%patch6 -p1 -b .caseless_different_length
|
||||||
# One contributor's name is non-UTF-8
|
# One contributor's name is non-UTF-8
|
||||||
for F in ChangeLog; do
|
for F in ChangeLog; do
|
||||||
iconv -f latin1 -t utf8 "$F" >"${F}.utf8"
|
iconv -f latin1 -t utf8 "$F" >"${F}.utf8"
|
||||||
@ -133,6 +137,9 @@ make check
|
|||||||
%{_mandir}/man1/pcretest.*
|
%{_mandir}/man1/pcretest.*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Fri Dec 02 2011 Petr Pisar <ppisar@redhat.com> - 8.20-7
|
||||||
|
- Fix case-less match if cases differ in encoding length (bug #756675)
|
||||||
|
|
||||||
* Fri Nov 25 2011 Petr Pisar <ppisar@redhat.com> - 8.20-6
|
* Fri Nov 25 2011 Petr Pisar <ppisar@redhat.com> - 8.20-6
|
||||||
- Fix cache-flush in JIT on PPC
|
- Fix cache-flush in JIT on PPC
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user