From de310a916bd3581277c7f6cc20d7c4c70d6e0662 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Tue, 3 Jan 2017 15:34:41 +0100 Subject: [PATCH] Fix compiling a class with UCP and without UTF (port to 10.22) --- ...en-UCP-but-not-UTF-was-set-and-all-w.patch | 103 +++++++++--------- 1 file changed, 50 insertions(+), 53 deletions(-) diff --git a/pcre2-10.22-Fix-class-bug-when-UCP-but-not-UTF-was-set-and-all-w.patch b/pcre2-10.22-Fix-class-bug-when-UCP-but-not-UTF-was-set-and-all-w.patch index 4c32791..6d32a6b 100644 --- a/pcre2-10.22-Fix-class-bug-when-UCP-but-not-UTF-was-set-and-all-w.patch +++ b/pcre2-10.22-Fix-class-bug-when-UCP-but-not-UTF-was-set-and-all-w.patch @@ -1,4 +1,4 @@ -From a83027bb4b195c879d504da051571f22a5ac7ca3 Mon Sep 17 00:00:00 2001 +From b3343e2c2c77b85f841a7af5e4121dab11692065 Mon Sep 17 00:00:00 2001 From: ph10 Date: Mon, 26 Dec 2016 17:11:18 +0000 Subject: [PATCH] Fix class bug when UCP but not UTF was set and all wide @@ -7,53 +7,50 @@ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@628 6239d852-aaf2-0410-a92c-79f79f948069 +Ported to 10.22: + +commit a83027bb4b195c879d504da051571f22a5ac7ca3 +Author: ph10 +Date: Mon Dec 26 17:11:18 2016 +0000 + + Fix class bug when UCP but not UTF was set and all wide characters need to b +e + included. + + git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@628 6239d852-aaf2-0410-a92c-79f79f948069 + Signed-off-by: Petr Písař --- - ChangeLog | 4 ++++ - src/pcre2_compile.c | 8 +++++++- - testdata/testinput10 | 2 ++ - testdata/testinput12 | 2 ++ - testdata/testoutput10 | 8 ++++++++ - testdata/testoutput12-16 | 8 ++++++++ - testdata/testoutput12-32 | 8 ++++++++ - 7 files changed, 39 insertions(+), 1 deletion(-) + src/pcre2_compile.c | 10 ++++++++-- + testdata/testinput10 | 2 ++ + testdata/testinput12 | 2 ++ + testdata/testoutput10 | 8 ++++++++ + testdata/testoutput12-16 | 8 ++++++++ + testdata/testoutput12-32 | 8 ++++++++ + 6 files changed, 36 insertions(+), 2 deletions(-) -diff --git a/ChangeLog b/ChangeLog -index 6156b24..d276feb 100644 ---- a/ChangeLog -+++ b/ChangeLog -@@ -255,6 +255,10 @@ This lifts a restriction on the number of branches in a group (more than about - 38. Add the "-ac" command line option to pcre2test as a synonym for "-pattern - auto_callout". - -+39. In a library with Unicode support, incorrect data was compiled for a -+pattern with PCRE2_UCP set without PCRE2_UTF if a class required all wide -+characters to match (for example, /[\s[:^ascii:]]/). -+ - - Version 10.22 29-July-2016 - -------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c -index 9dc9c98..fce226b 100644 +index ae6b5e1..2c9f758 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c -@@ -4927,9 +4927,13 @@ for (;; pptr++) - automatically handled by the use of OP_CLASS or OP_NCLASS, but an - explicit range is needed for OP_XCLASS. Setting a flag here - causes the range to be generated later when it is known that -- OP_XCLASS is required. */ -+ OP_XCLASS is required. In the 8-bit library this is relevant only in -+ utf mode, since no wide characters can exist otherwise. */ +@@ -4482,10 +4482,14 @@ for (;; ptr++) + In the special case where there are no xclass items, this is + automatically handled by the use of OP_CLASS or OP_NCLASS, but an + explicit range is needed for OP_XCLASS. Setting a flag here causes +- the range to be generated later when it is known that OP_XCLASS is +- required. */ ++ the range to be generated later when it is known that ++ OP_XCLASS is required. In the 8-bit library this is relevant only in ++ utf mode, since no wide characters can exist otherwise. */ - default: + default: +#if PCRE2_CODE_UNIT_WIDTH == 8 -+ if (utf) ++ if (utf) +#endif - match_all_or_no_wide_chars |= local_negate; - break; - } -@@ -5217,6 +5221,8 @@ for (;; pptr++) + match_all_or_no_wide_chars |= local_negate; + break; + } +@@ -4993,6 +4997,8 @@ for (;; ptr++) all wide characters (depending on whether the whole class is or is not negated). This requirement is indicated by match_all_or_no_wide_chars being true. We do this by including an explicit range, which works in both cases. @@ -63,21 +60,21 @@ index 9dc9c98..fce226b 100644 When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit class where \S etc is present without PCRE2_UCP, causing an extended class diff --git a/testdata/testinput10 b/testdata/testinput10 -index a1806ae..85d2005 100644 +index 4b80778..1c6134b 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 -@@ -456,4 +456,6 @@ - - /(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf +@@ -454,4 +454,6 @@ + \= Expect no match + 123 +/[\s[:^ascii:]]/B,ucp + # End of testinput10 diff --git a/testdata/testinput12 b/testdata/testinput12 -index 5b29f41..c3b2bfc 100644 +index 29934ec..d851ae6 100644 --- a/testdata/testinput12 +++ b/testdata/testinput12 -@@ -358,4 +358,6 @@ +@@ -354,4 +354,6 @@ \= Expect no match 123 @@ -85,12 +82,12 @@ index 5b29f41..c3b2bfc 100644 + # End of testinput12 diff --git a/testdata/testoutput10 b/testdata/testoutput10 -index 3c35f0b..31b7d00 100644 +index 0c1e9b2..aef89ca 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 -@@ -1567,4 +1567,12 @@ No match - /(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf - Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN) +@@ -1564,4 +1564,12 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1 + 123 + No match +/[\s[:^ascii:]]/B,ucp +------------------------------------------------------------------ @@ -102,10 +99,10 @@ index 3c35f0b..31b7d00 100644 + # End of testinput10 diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 -index f0f2230..3b5a0cd 100644 +index 9cd6640..e2d5b9f 100644 --- a/testdata/testoutput12-16 +++ b/testdata/testoutput12-16 -@@ -1407,4 +1407,12 @@ Subject length lower bound = 2 +@@ -1396,4 +1396,12 @@ Subject length lower bound = 2 123 No match @@ -119,10 +116,10 @@ index f0f2230..3b5a0cd 100644 + # End of testinput12 diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 -index 3718b27..1496159 100644 +index 75a5ad7..7479a93 100644 --- a/testdata/testoutput12-32 +++ b/testdata/testoutput12-32 -@@ -1401,4 +1401,12 @@ Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defin +@@ -1390,4 +1390,12 @@ Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defin 123 No match