From da9e61642f795d859ef94e1e7a1f2b93489f915a Mon Sep 17 00:00:00 2001 From: ph10 Date: Wed, 18 Jun 2014 16:48:57 +0000 Subject: [PATCH] Fix not including VT in starting characters for \s. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1486 2f5784b3-3f2a-0410-8824-cb99058d5e15 Signed-off-by: Petr Písař Petr Pisar: Ported to 8.35. Signed-off-by: Petr Písař --- pcre_study.c | 13 +++---------- testdata/testinput1 | 3 +++ testdata/testoutput1 | 4 ++++ testdata/testoutput15 | 20 ++++++++++---------- testdata/testoutput18-16 | 30 +++++++++++++++--------------- testdata/testoutput18-32 | 30 +++++++++++++++--------------- 6 files changed, 50 insertions(+), 50 deletions(-) diff --git a/pcre_study.c b/pcre_study.c index ab9510e..cb6c424 100644 --- a/pcre_study.c +++ b/pcre_study.c @@ -1106,24 +1106,17 @@ do try_next = FALSE; break; - /* The cbit_space table has vertical tab as whitespace; we have to - ensure it is set as not whitespace. Luckily, the code value is the same - (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */ + /* The cbit_space table has vertical tab as whitespace; we no longer + have to play fancy tricks because Perl added VT to its whitespace at + release 5.18. PCRE added it at release 8.34. */ case OP_NOT_WHITESPACE: set_nottype_bits(start_bits, cbit_space, table_limit, cd); - start_bits[1] |= 0x08; try_next = FALSE; break; - /* The cbit_space table has vertical tab as whitespace; we have to not - set it from the table. Luckily, the code value is the same (0x0b) in - ASCII and EBCDIC, so we can just adjust the appropriate bit. */ - case OP_WHITESPACE: - c = start_bits[1]; /* Save in case it was already set */ set_type_bits(start_bits, cbit_space, table_limit, cd); - start_bits[1] = (start_bits[1] & ~0x08) | c; try_next = FALSE; break; diff --git a/testdata/testinput1 b/testdata/testinput1 index 7b36360..b68e574 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -5666,4 +5666,7 @@ AbcdCBefgBhiBqz /(a\Kb)*/+ ababc +/\sabc/ + \x{0b}abc + /-- End of testinput1 --/ diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 4dafc04..e0e5f2c 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -9313,4 +9313,8 @@ No match 0+ c 1: ab +/\sabc/ + \x{0b}abc + 0: \x0babc + /-- End of testinput1 --/ diff --git a/testdata/testoutput15 b/testdata/testoutput15 index 5af369d..bad2807 100644 --- a/testdata/testoutput15 +++ b/testdata/testoutput15 @@ -871,7 +871,7 @@ Options: utf No first char Need char = 'x' Subject length lower bound = 5 -Starting chars: \x09 \x0a \x0c \x0d \x20 \xc2 +Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 \xc2 AB\x{85}xxx\x{a0}XYZ 0: \x{85}xxx\x{a0} AB\x{a0}xxx\x{85}XYZ @@ -883,15 +883,15 @@ Options: utf No first char Need char = ' ' Subject length lower bound = 3 -Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e - \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d - \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ - A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e - f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 - \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 - \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 - \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 - \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 + \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 + \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 + \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 + \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff \x{a2} \x{84} 0: \x{a2} \x{84} A Z diff --git a/testdata/testoutput18-16 b/testdata/testoutput18-16 index a196205..1ef8704 100644 --- a/testdata/testoutput18-16 +++ b/testdata/testoutput18-16 @@ -752,7 +752,7 @@ Options: utf No first char Need char = 'x' Subject length lower bound = 5 -Starting chars: \x09 \x0a \x0c \x0d \x20 \x85 \xa0 +Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0 AB\x{85}xxx\x{a0}XYZ 0: \x{85}xxx\x{a0} AB\x{a0}xxx\x{85}XYZ @@ -764,20 +764,20 @@ Options: utf No first char Need char = ' ' Subject length lower bound = 3 -Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e - \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d - \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ - A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e - f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 - \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 - \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 - \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 - \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 - \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 - \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf - \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee - \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd - \xfe \xff +Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 + \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff \x{a2} \x{84} 0: \x{a2} \x{84} A Z diff --git a/testdata/testoutput18-32 b/testdata/testoutput18-32 index 1525994..622ba64 100644 --- a/testdata/testoutput18-32 +++ b/testdata/testoutput18-32 @@ -749,7 +749,7 @@ Options: utf No first char Need char = 'x' Subject length lower bound = 5 -Starting chars: \x09 \x0a \x0c \x0d \x20 \x85 \xa0 +Starting chars: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0 AB\x{85}xxx\x{a0}XYZ 0: \x{85}xxx\x{a0} AB\x{a0}xxx\x{85}XYZ @@ -761,20 +761,20 @@ Options: utf No first char Need char = ' ' Subject length lower bound = 3 -Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e - \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d - \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ - A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e - f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 - \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 - \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 - \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 - \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 - \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 - \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf - \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee - \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd - \xfe \xff +Starting chars: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 + \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff \x{a2} \x{84} 0: \x{a2} \x{84} A Z -- 1.9.3