From ee895e3e46998560ad9b18ffcaf5852b1fbc5403 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 21 Jul 2012 12:12:33 -0600 Subject: [PATCH] PATCH: [perl #114220] /\h/ not equiv to /[\h]/ \h matches the No-Break space even under /d. It is the only (non-complemented) Posix-like character class that has matches under /d in the Latin1 range above ASCII. A special case is made for it, and \H to make sure they have the correct code points. --- regcomp.c | 27 ++++++++++++++++++++++----- t/re/re_tests | 5 +++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/regcomp.c b/regcomp.c index 143f349..2254159 100644 --- a/regcomp.c +++ b/regcomp.c @@ -10885,6 +10885,7 @@ parseit: * A similar issue a little earlier when switching on value. * --jhi */ switch ((I32)namedclass) { + int i; /* loop counter */ case ANYOF_ALNUMC: /* C's alnum, in contrast to \w */ DO_POSIX_LATIN1_ONLY_KNOWN(ret, namedclass, properties, @@ -10957,16 +10958,32 @@ parseit: PL_PosixGraph, PL_L1PosixGraph, "XPosixGraph", listsv); break; case ANYOF_HORIZWS: - /* For these, we use the nonbitmap, as /d doesn't make a - * difference in what these match. There would be problems - * if these characters had folds other than themselves, as - * nonbitmap is subject to folding. It turns out that \h - * is just a synonym for XPosixBlank */ + /* NBSP matches this, and needs to be added unconditionally + * to the bit map as it matches even under /d, unlike all + * the rest of the Posix-like classes (\v doesn't have any + * matches in the Latin1 range, so it is unaffected.) which + * Otherwise, we use the nonbitmap, as /d doesn't make a + * difference in what these match. It turns out that \h is + * just a synonym for XPosixBlank */ _invlist_union(nonbitmap, PL_XPosixBlank, &nonbitmap); + stored += set_regclass_bit(pRExC_state, ret, + UNI_TO_NATIVE(0xA0), + &l1_fold_invlist, + &unicode_alternate); + break; case ANYOF_NHORIZWS: _invlist_union_complement_2nd(nonbitmap, PL_XPosixBlank, &nonbitmap); + for (i = 128; i < 256; i++) { + if (i == 0xA0) { + continue; + } + stored += set_regclass_bit(pRExC_state, ret, + UNI_TO_NATIVE(i), + &l1_fold_invlist, + &unicode_alternate); + } break; case ANYOF_LOWER: case ANYOF_NLOWER: diff --git a/t/re/re_tests b/t/re/re_tests index 4d78a6a..cfc813f 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1595,6 +1595,11 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer /[[:word:]]/ \x{2c1} y - - /[[:word:]]/ \x{2c2} n - - +# [perl #114220] +/[\h]/ \x{A0} y $& \xA0 +/[\H]/ \x{BF} y $& \xBF +/[\H]/ \x{A0} n - - +/[\H]/ \x{A1} y $& \xA1 # vim: softtabstop=0 noexpandtab -- 1.7.11.4