perl/perl-5.16.1-PATCH-perl-114220-h-not-equiv-to-h.patch

84 lines
2.9 KiB
Diff
Raw Normal View History

From ee895e3e46998560ad9b18ffcaf5852b1fbc5403 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Sat, 21 Jul 2012 12:12:33 -0600
Subject: [PATCH] PATCH: [perl #114220] /\h/ not equiv to /[\h]/
\h matches the No-Break space even under /d. It is the only
(non-complemented) Posix-like character class that has matches under /d
in the Latin1 range above ASCII. A special case is made for it, and \H
to make sure they have the correct code points.
---
regcomp.c | 27 ++++++++++++++++++++++-----
t/re/re_tests | 5 +++++
2 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/regcomp.c b/regcomp.c
index 143f349..2254159 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -10885,6 +10885,7 @@ parseit:
* A similar issue a little earlier when switching on value.
* --jhi */
switch ((I32)namedclass) {
+ int i; /* loop counter */
case ANYOF_ALNUMC: /* C's alnum, in contrast to \w */
DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties,
@@ -10957,16 +10958,32 @@ parseit:
PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv);
break;
case ANYOF_HORIZWS:
- /* For these, we use the nonbitmap, as /d doesn't make a
- * difference in what these match. There would be problems
- * if these characters had folds other than themselves, as
- * nonbitmap is subject to folding. It turns out that \h
- * is just a synonym for XPosixBlank */
+ /* NBSP matches this, and needs to be added unconditionally
+ * to the bit map as it matches even under /d, unlike all
+ * the rest of the Posix-like classes (\v doesn't have any
+ * matches in the Latin1 range, so it is unaffected.) which
+ * Otherwise, we use the nonbitmap, as /d doesn't make a
+ * difference in what these match. It turns out that \h is
+ * just a synonym for XPosixBlank */
_invlist_union(nonbitmap, PL_XPosixBlank, &nonbitmap);
+ stored += set_regclass_bit(pRExC_state, ret,
+ UNI_TO_NATIVE(0xA0),
+ &l1_fold_invlist,
+ &unicode_alternate);
+
break;
case ANYOF_NHORIZWS:
_invlist_union_complement_2nd(nonbitmap,
PL_XPosixBlank, &nonbitmap);
+ for (i = 128; i < 256; i++) {
+ if (i == 0xA0) {
+ continue;
+ }
+ stored += set_regclass_bit(pRExC_state, ret,
+ UNI_TO_NATIVE(i),
+ &l1_fold_invlist,
+ &unicode_alternate);
+ }
break;
case ANYOF_LOWER:
case ANYOF_NLOWER:
diff --git a/t/re/re_tests b/t/re/re_tests
index 4d78a6a..cfc813f 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1595,6 +1595,11 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer
/[[:word:]]/ \x{2c1} y - -
/[[:word:]]/ \x{2c2} n - -
+# [perl #114220]
+/[\h]/ \x{A0} y $& \xA0
+/[\H]/ \x{BF} y $& \xBF
+/[\H]/ \x{A0} n - -
+/[\H]/ \x{A1} y $& \xA1
# vim: softtabstop=0 noexpandtab
--
1.7.11.4