Fix backtracking for \C\X* in UTF-8 mode

This commit is contained in:
Petr Písař 2015-04-10 08:11:38 +02:00
parent 0583be6269
commit de20f156dc
2 changed files with 151 additions and 0 deletions

View File

@ -0,0 +1,146 @@
From 9d8c223a96fde36fc21307abad702ed747135809 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar@redhat.com>
Date: Fri, 10 Apr 2015 08:17:18 +0200
Subject: [PATCH] Fix backtracking bug for \C\X* in UTF mode.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Upstream commits ported to 8.36:
commit 4a81b0ca19da65ea9a50c208017a74e55c3fd027
Author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Wed Apr 8 16:34:24 2015 +0000
Fix backtracking bug for \C\X* in UTF mode.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1545 2f5784b3-3f2a-0410-8824-cb99058d5e15
commit 6719c2cdeb7670d4bf10f15a8511ca15af7ea595
Author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Wed Apr 8 16:56:28 2015 +0000
Fix other cases of backtracking crashes after \C in UTF mode.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1546 2f5784b3-3f2a-0410-8824-
cb99058d5e15
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
pcre_exec.c | 19 ++++++++++++-------
testdata/testinput4 | 6 ++++++
testdata/testoutput4 | 8 ++++++++
3 files changed, 26 insertions(+), 7 deletions(-)
diff --git a/pcre_exec.c b/pcre_exec.c
index bb5620d..6c9f4d7 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -3490,7 +3490,7 @@ for (;;)
if (possessive) continue; /* No backtracking */
for(;;)
{
- if (eptr == pp) goto TAIL_RECURSE;
+ if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
#ifdef SUPPORT_UCP
@@ -3911,7 +3911,7 @@ for (;;)
if (possessive) continue; /* No backtracking */
for(;;)
{
- if (eptr == pp) goto TAIL_RECURSE;
+ if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
@@ -4046,7 +4046,7 @@ for (;;)
if (possessive) continue; /* No backtracking */
for(;;)
{
- if (eptr == pp) goto TAIL_RECURSE;
+ if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
@@ -5617,7 +5617,7 @@ for (;;)
if (possessive) continue; /* No backtracking */
for(;;)
{
- if (eptr == pp) goto TAIL_RECURSE;
+ if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
@@ -5659,12 +5659,17 @@ for (;;)
if (possessive) continue; /* No backtracking */
+ /* We use <= pp rather than == pp to detect the start of the run while
+ backtracking because the use of \C in UTF mode can cause BACKCHAR to
+ move back past pp. This is just palliative; the use of \C in UTF mode
+ is fraught with danger. */
+
for(;;)
{
int lgb, rgb;
PCRE_PUCHAR fptr;
- if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
+ if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@@ -5682,7 +5687,7 @@ for (;;)
for (;;)
{
- if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
+ if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
fptr = eptr - 1;
if (!utf) c = *fptr; else
{
@@ -5932,7 +5937,7 @@ for (;;)
if (possessive) continue; /* No backtracking */
for(;;)
{
- if (eptr == pp) goto TAIL_RECURSE;
+ if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
diff --git a/testdata/testinput4 b/testdata/testinput4
index 04fca1a..f139c62 100644
--- a/testdata/testinput4
+++ b/testdata/testinput4
@@ -724,4 +724,10 @@
"[\S\V\H]"8
+/\C\X*QT/8
+ Ӆ\x0aT
+
+/\C(\W?ſ)'?{{/8
+ \\C(\\W?ſ)'?{{
+
/-- End of testinput4 --/
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index 340a949..ffd02ea 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -1273,4 +1273,12 @@ No match
"[\S\V\H]"8
+/\C\X*QT/8
+ Ӆ\x0aT
+No match
+
+/\C(\W?ſ)'?{{/8
+ \\C(\\W?ſ)'?{{
+No match
+
/-- End of testinput4 --/
--
2.1.0

View File

@ -40,6 +40,9 @@ Patch9: pcre-8.36-Fix-pcregrep-loop-when-K-is-used-in-a-lookbehind-ass.patch
# Fix pcretest loop when \K is used in a lookbehind assertion, bug #1210423,
# in upstream after 8.36
Patch10: pcre-8.36-Fix-pcretest-loop-for-K-in-lookbehind-assertion.patch
# Fix backtracking for \C\X* in UTF-8 mode, bug #1210576,
# in upstream after 8.36
Patch11: pcre-8.36-Fix-backtracking-bug-for-C-X-in-UTF-mode.patch
BuildRequires: readline-devel
# New libtool to get rid of rpath
BuildRequires: autoconf, automake, libtool
@ -90,6 +93,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest.
%patch8 -p1 -b .mutual_recursion_in_assertion
%patch9 -p1 -b .pcregrep_k_loop
%patch10 -p1 -b .pcretest_k_loop
%patch11 -p1 -b .backtracking_cx8
# Because of rpath patch
libtoolize --copy --force && autoreconf -vif
# One contributor's name is non-UTF-8
@ -170,6 +174,7 @@ make %{?_smp_mflags} check VERBOSE=yes
(bug #1210417)
- Fix pcregrep loop when \K is used in a lookbehind assertion (bug #1210423)
- Fix pcretest loop when \K is used in a lookbehind assertion (bug #1210423)
- Fix backtracking for \C\X* in UTF-8 mode (bug #1210576)
* Thu Mar 26 2015 Petr Pisar <ppisar@redhat.com> - 8.36-4
- Fix computing size of JIT read-only data (bug #1206131)