Fix matching repeated character classes against an 8-bit string containting multi-code-unit characters
This commit is contained in:
parent
b92c5c8b8e
commit
25ca86c1eb
@ -0,0 +1,120 @@
|
|||||||
|
From ea6f7a508aaa2fd61eb60d7759fe00713f46cd5c Mon Sep 17 00:00:00 2001
|
||||||
|
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||||
|
Date: Mon, 19 Feb 2018 17:26:33 +0000
|
||||||
|
Subject: [PATCH] Fix \C bug with repeated character classes in UTF-8 mode.
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@918 6239d852-aaf2-0410-a92c-79f79f948069
|
||||||
|
Petr Písař: Ported to 10.31.
|
||||||
|
---
|
||||||
|
src/pcre2_match.c | 16 ++++++++++++----
|
||||||
|
testdata/testinput22 | 3 +++
|
||||||
|
testdata/testoutput22-16 | 4 ++++
|
||||||
|
testdata/testoutput22-32 | 4 ++++
|
||||||
|
testdata/testoutput22-8 | 4 ++++
|
||||||
|
|
||||||
|
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
|
||||||
|
index 79cc93f..ce96016 100644
|
||||||
|
--- a/src/pcre2_match.c
|
||||||
|
+++ b/src/pcre2_match.c
|
||||||
|
@@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
|
|
||||||
|
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
||||||
|
|
||||||
|
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
||||||
|
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
|
||||||
|
+ go too far. */
|
||||||
|
+
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
RMATCH(Fecode, RM201);
|
||||||
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
|
- if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
|
||||||
|
+ if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
|
||||||
|
BACKCHAR(Feptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
|
|
||||||
|
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
||||||
|
|
||||||
|
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
||||||
|
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
|
||||||
|
+ go too far. */
|
||||||
|
+
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
RMATCH(Fecode, RM101);
|
||||||
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
|
- if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
|
||||||
|
+ if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf) BACKCHAR(Feptr);
|
||||||
|
#endif
|
||||||
|
@@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
|
if (reptype == REPTYPE_POS) continue; /* No backtracking */
|
||||||
|
|
||||||
|
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
|
||||||
|
- Unicode character. Use <= pp to ensure backtracking doesn't go too far.
|
||||||
|
- */
|
||||||
|
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
|
||||||
|
+ go too far. */
|
||||||
|
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
diff --git a/testdata/testinput22 b/testdata/testinput22
|
||||||
|
index e6d4053..c218ea6 100644
|
||||||
|
--- a/testdata/testinput22
|
||||||
|
+++ b/testdata/testinput22
|
||||||
|
@@ -98,4 +98,7 @@
|
||||||
|
\= Expect no match - tests \C at end of subject
|
||||||
|
ab
|
||||||
|
|
||||||
|
+/\C[^\v]+\x80/utf
|
||||||
|
+ [AΏBŀC]
|
||||||
|
+
|
||||||
|
# End of testinput22
|
||||||
|
diff --git a/testdata/testoutput22-16 b/testdata/testoutput22-16
|
||||||
|
index 88f827c..5e23611 100644
|
||||||
|
--- a/testdata/testoutput22-16
|
||||||
|
+++ b/testdata/testoutput22-16
|
||||||
|
@@ -171,4 +171,8 @@ No match
|
||||||
|
ab
|
||||||
|
No match
|
||||||
|
|
||||||
|
+/\C[^\v]+\x80/utf
|
||||||
|
+ [AΏBŀC]
|
||||||
|
+No match
|
||||||
|
+
|
||||||
|
# End of testinput22
|
||||||
|
diff --git a/testdata/testoutput22-32 b/testdata/testoutput22-32
|
||||||
|
index ac485fc..8576f31 100644
|
||||||
|
--- a/testdata/testoutput22-32
|
||||||
|
+++ b/testdata/testoutput22-32
|
||||||
|
@@ -169,4 +169,8 @@ No match
|
||||||
|
ab
|
||||||
|
No match
|
||||||
|
|
||||||
|
+/\C[^\v]+\x80/utf
|
||||||
|
+ [AΏBŀC]
|
||||||
|
+No match
|
||||||
|
+
|
||||||
|
# End of testinput22
|
||||||
|
diff --git a/testdata/testoutput22-8 b/testdata/testoutput22-8
|
||||||
|
index 3d31fbc..8543652 100644
|
||||||
|
--- a/testdata/testoutput22-8
|
||||||
|
+++ b/testdata/testoutput22-8
|
||||||
|
@@ -173,4 +173,8 @@ No match
|
||||||
|
ab
|
||||||
|
No match
|
||||||
|
|
||||||
|
+/\C[^\v]+\x80/utf
|
||||||
|
+ [AΏBŀC]
|
||||||
|
+No match
|
||||||
|
+
|
||||||
|
# End of testinput22
|
||||||
|
--
|
||||||
|
2.13.6
|
||||||
|
|
@ -57,6 +57,9 @@ Patch2: pcre2-10.31-Fix-pcre2test-C-to-correctly-show-what-R-matches.patch
|
|||||||
# 2/2 Fix pcre2test -C to correctly show what \R matches,
|
# 2/2 Fix pcre2test -C to correctly show what \R matches,
|
||||||
# in upstream after 10.31
|
# in upstream after 10.31
|
||||||
Patch3: pcre2-10.31-Oops-forgot-about-C-bsr-in-previous-patch.patch
|
Patch3: pcre2-10.31-Oops-forgot-about-C-bsr-in-previous-patch.patch
|
||||||
|
# Fix matching repeated character classes against an 8-bit string containting
|
||||||
|
# multi-code-unit characters, in upstream after 10.31
|
||||||
|
Patch4: pcre2-10.31-Fix-C-bug-with-repeated-character-classes-in-UTF-8-m.patch
|
||||||
BuildRequires: autoconf
|
BuildRequires: autoconf
|
||||||
BuildRequires: automake
|
BuildRequires: automake
|
||||||
BuildRequires: coreutils
|
BuildRequires: coreutils
|
||||||
@ -136,6 +139,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
|
|||||||
%patch1 -p1
|
%patch1 -p1
|
||||||
%patch2 -p1
|
%patch2 -p1
|
||||||
%patch3 -p1
|
%patch3 -p1
|
||||||
|
%patch4 -p1
|
||||||
# Because of multilib patch
|
# Because of multilib patch
|
||||||
libtoolize --copy --force
|
libtoolize --copy --force
|
||||||
autoreconf -vif
|
autoreconf -vif
|
||||||
@ -242,6 +246,8 @@ make %{?_smp_mflags} check VERBOSE=yes
|
|||||||
- Fix returning unset groups in POSIX interface if REG_STARTEND a non-zero
|
- Fix returning unset groups in POSIX interface if REG_STARTEND a non-zero
|
||||||
starting offset (upstream bug #2244)
|
starting offset (upstream bug #2244)
|
||||||
- Fix pcre2test -C to correctly show what \R matches
|
- Fix pcre2test -C to correctly show what \R matches
|
||||||
|
- Fix matching repeated character classes against an 8-bit string containting
|
||||||
|
multi-code-unit characters
|
||||||
|
|
||||||
* Mon Feb 12 2018 Petr Pisar <ppisar@redhat.com> - 10.31-1
|
* Mon Feb 12 2018 Petr Pisar <ppisar@redhat.com> - 10.31-1
|
||||||
- 10.31 bump
|
- 10.31 bump
|
||||||
|
Loading…
Reference in New Issue
Block a user