Fix not including VT in starting characters for \s if pcre_study() is used
This commit is contained in:
parent
f241b20b12
commit
fc2aeac3d6
@ -0,0 +1,217 @@
|
||||
From da9e61642f795d859ef94e1e7a1f2b93489f915a Mon Sep 17 00:00:00 2001
|
||||
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
|
||||
Date: Wed, 18 Jun 2014 16:48:57 +0000
|
||||
Subject: [PATCH] Fix not including VT in starting characters for \s.
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1486 2f5784b3-3f2a-0410-8824-cb99058d5e15
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
|
||||
Petr Pisar: Ported to 8.35.
|
||||
|
||||
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||
---
|
||||
pcre_study.c | 13 +++----------
|
||||
testdata/testinput1 | 3 +++
|
||||
testdata/testoutput1 | 4 ++++
|
||||
testdata/testoutput15 | 20 ++++++++++----------
|
||||
testdata/testoutput18-16 | 30 +++++++++++++++---------------
|
||||
testdata/testoutput18-32 | 30 +++++++++++++++---------------
|
||||
6 files changed, 50 insertions(+), 50 deletions(-)
|
||||
|
||||
diff --git a/pcre_study.c b/pcre_study.c
|
||||
index ab9510e..cb6c424 100644
|
||||
--- a/pcre_study.c
|
||||
+++ b/pcre_study.c
|
||||
@@ -1106,24 +1106,17 @@ do
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
- /* The cbit_space table has vertical tab as whitespace; we have to
|
||||
- ensure it is set as not whitespace. Luckily, the code value is the same
|
||||
- (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */
|
||||
+ /* The cbit_space table has vertical tab as whitespace; we no longer
|
||||
+ have to play fancy tricks because Perl added VT to its whitespace at
|
||||
+ release 5.18. PCRE added it at release 8.34. */
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
set_nottype_bits(start_bits, cbit_space, table_limit, cd);
|
||||
- start_bits[1] |= 0x08;
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
- /* The cbit_space table has vertical tab as whitespace; we have to not
|
||||
- set it from the table. Luckily, the code value is the same (0x0b) in
|
||||
- ASCII and EBCDIC, so we can just adjust the appropriate bit. */
|
||||
-
|
||||
case OP_WHITESPACE:
|
||||
- c = start_bits[1]; /* Save in case it was already set */
|
||||
set_type_bits(start_bits, cbit_space, table_limit, cd);
|
||||
- start_bits[1] = (start_bits[1] & ~0x08) | c;
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
diff --git a/testdata/testinput1 b/testdata/testinput1
|
||||
index 7b36360..b68e574 100644
|
||||
--- a/testdata/testinput1
|
||||
+++ b/testdata/testinput1
|
||||
@@ -5666,4 +5666,7 @@ AbcdCBefgBhiBqz
|
||||
/(a\Kb)*/+
|
||||
ababc
|
||||
|
||||
+/\sabc/
|
||||
+ \x{0b}abc
|
||||
+
|
||||
/-- End of testinput1 --/
|
||||
diff --git a/testdata/testoutput1 b/testdata/testoutput1
|
||||
index 4dafc04..e0e5f2c 100644
|
||||
--- a/testdata/testoutput1
|
||||
+++ b/testdata/testoutput1
|
||||
@@ -9313,4 +9313,8 @@ No match
|
||||
0+ c
|
||||
1: ab
|
||||
|
||||
+/\sabc/
|
||||
+ \x{0b}abc
|
||||
+ 0: \x0babc
|
||||
+
|
||||
/-- End of testinput1 --/
|
||||
diff --git a/testdata/testoutput15 b/testdata/testoutput15
|
||||
index 5af369d..bad2807 100644
|
||||
--- a/testdata/testoutput15
|
||||
+++ b/testdata/testoutput15
|
||||
@@ -871,7 +871,7 @@ Options: utf
|
||||
No first char
|
||||
Need char = 'x'
|
||||
Subject length lower bound = 5
|
||||
-Starting chars: \x09 \x0a \x0c \x0d \x20 \xc2
|
||||
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 \xc2
|
||||
AB\x{85}xxx\x{a0}XYZ
|
||||
0: \x{85}xxx\x{a0}
|
||||
AB\x{a0}xxx\x{85}XYZ
|
||||
@@ -883,15 +883,15 @@ Options: utf
|
||||
No first char
|
||||
Need char = ' '
|
||||
Subject length lower bound = 3
|
||||
-Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e
|
||||
- \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
|
||||
- \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @
|
||||
- A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e
|
||||
- f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3
|
||||
- \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2
|
||||
- \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1
|
||||
- \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0
|
||||
- \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
|
||||
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
|
||||
+ \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
|
||||
+ \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
|
||||
+ D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
|
||||
+ i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4
|
||||
+ \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3
|
||||
+ \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2
|
||||
+ \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1
|
||||
+ \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
|
||||
\x{a2} \x{84}
|
||||
0: \x{a2} \x{84}
|
||||
A Z
|
||||
diff --git a/testdata/testoutput18-16 b/testdata/testoutput18-16
|
||||
index a196205..1ef8704 100644
|
||||
--- a/testdata/testoutput18-16
|
||||
+++ b/testdata/testoutput18-16
|
||||
@@ -752,7 +752,7 @@ Options: utf
|
||||
No first char
|
||||
Need char = 'x'
|
||||
Subject length lower bound = 5
|
||||
-Starting chars: \x09 \x0a \x0c \x0d \x20 \x85 \xa0
|
||||
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
|
||||
AB\x{85}xxx\x{a0}XYZ
|
||||
0: \x{85}xxx\x{a0}
|
||||
AB\x{a0}xxx\x{85}XYZ
|
||||
@@ -764,20 +764,20 @@ Options: utf
|
||||
No first char
|
||||
Need char = ' '
|
||||
Subject length lower bound = 3
|
||||
-Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e
|
||||
- \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
|
||||
- \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @
|
||||
- A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e
|
||||
- f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83
|
||||
- \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
|
||||
- \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3
|
||||
- \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2
|
||||
- \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1
|
||||
- \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
|
||||
- \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
|
||||
- \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
|
||||
- \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
|
||||
- \xfe \xff
|
||||
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
|
||||
+ \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
|
||||
+ \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
|
||||
+ D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
|
||||
+ i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
|
||||
+ \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94
|
||||
+ \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4
|
||||
+ \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
|
||||
+ \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
|
||||
+ \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
|
||||
+ \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
|
||||
+ \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
|
||||
+ \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
|
||||
+ \xff
|
||||
\x{a2} \x{84}
|
||||
0: \x{a2} \x{84}
|
||||
A Z
|
||||
diff --git a/testdata/testoutput18-32 b/testdata/testoutput18-32
|
||||
index 1525994..622ba64 100644
|
||||
--- a/testdata/testoutput18-32
|
||||
+++ b/testdata/testoutput18-32
|
||||
@@ -749,7 +749,7 @@ Options: utf
|
||||
No first char
|
||||
Need char = 'x'
|
||||
Subject length lower bound = 5
|
||||
-Starting chars: \x09 \x0a \x0c \x0d \x20 \x85 \xa0
|
||||
+Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
|
||||
AB\x{85}xxx\x{a0}XYZ
|
||||
0: \x{85}xxx\x{a0}
|
||||
AB\x{a0}xxx\x{85}XYZ
|
||||
@@ -761,20 +761,20 @@ Options: utf
|
||||
No first char
|
||||
Need char = ' '
|
||||
Subject length lower bound = 3
|
||||
-Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e
|
||||
- \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
|
||||
- \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @
|
||||
- A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e
|
||||
- f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83
|
||||
- \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
|
||||
- \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3
|
||||
- \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2
|
||||
- \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1
|
||||
- \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
|
||||
- \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
|
||||
- \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
|
||||
- \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
|
||||
- \xfe \xff
|
||||
+Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
|
||||
+ \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
|
||||
+ \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
|
||||
+ D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
|
||||
+ i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
|
||||
+ \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94
|
||||
+ \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4
|
||||
+ \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
|
||||
+ \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
|
||||
+ \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
|
||||
+ \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
|
||||
+ \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
|
||||
+ \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
|
||||
+ \xff
|
||||
\x{a2} \x{84}
|
||||
0: \x{a2} \x{84}
|
||||
A Z
|
||||
--
|
||||
1.9.3
|
||||
|
@ -20,6 +20,9 @@ Patch2: pcre-8.35-Do-not-rely-on-wrapping-signed-integer-while-parsein.patch
|
||||
# circumflex in multiline UTF mode, bug #1110620, upstream bug #1492,
|
||||
# in upstream after 8.35
|
||||
Patch3: pcre-8.35-Fix-bad-starting-data-when-char-with-more-than-one-o.patch
|
||||
# Fix not including VT in starting characters for \s if pcre_study() is used,
|
||||
# bug #1111045, upstream bug #1493, in upstream after 8.35
|
||||
Patch4: pcre-8.35-Fix-not-including-VT-in-starting-characters-for-s.patch
|
||||
BuildRequires: readline-devel
|
||||
# New libtool to get rid of rpath
|
||||
BuildRequires: autoconf, automake, libtool
|
||||
@ -63,6 +66,7 @@ Utilities demonstrating PCRE capabilities like pcregrep or pcretest.
|
||||
%patch1 -p1 -b .terminated_typos
|
||||
%patch2 -p1 -b .gcc49
|
||||
%patch3 -p1 -b .starting_data
|
||||
%patch4 -p1 -b .studied_vt
|
||||
# Because of rpath patch
|
||||
libtoolize --copy --force && autoreconf -vif
|
||||
# One contributor's name is non-UTF-8
|
||||
@ -133,6 +137,8 @@ make %{?_smp_mflags} check
|
||||
* Thu Jun 19 2014 Petr Pisar <ppisar@redhat.com> - 8.35-3
|
||||
- Fix bad starting data when char with more than one other case follows
|
||||
circumflex in multiline UTF mode (bug #1110620)
|
||||
- Fix not including VT in starting characters for \s if pcre_study() is used
|
||||
(bug #1111045)
|
||||
|
||||
* Fri Jun 06 2014 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 8.35-2.1
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_21_Mass_Rebuild
|
||||
|
Loading…
Reference in New Issue
Block a user