Fix firstchar bitmap under UTF-8 with prefix optimization
This commit is contained in:
parent
dee7e92b27
commit
281d2faaad
@ -0,0 +1,97 @@
|
|||||||
|
From 1b90dad20879f0e7a3eced5da0e0aacda93708ed Mon Sep 17 00:00:00 2001
|
||||||
|
From: Yves Orton <demerphq@gmail.com>
|
||||||
|
Date: Thu, 27 Oct 2016 13:52:24 +0200
|
||||||
|
Subject: [PATCH] regcomp.c: fix perl #129950 - fix firstchar bitmap under utf8
|
||||||
|
with prefix optimisation
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Ported to 5.24.0:
|
||||||
|
|
||||||
|
commit da42332b10691ba7af7550035ffc7f46c87e4e66
|
||||||
|
Author: Yves Orton <demerphq@gmail.com>
|
||||||
|
Date: Thu Oct 27 13:52:24 2016 +0200
|
||||||
|
|
||||||
|
regcomp.c: fix perl #129950 - fix firstchar bitmap under utf8 with prefix optimisation
|
||||||
|
|
||||||
|
The trie code contains a number of sub optimisations, one of which
|
||||||
|
extracts common prefixes from alternations, and another which isa
|
||||||
|
bitmap of the possible matching first chars.
|
||||||
|
|
||||||
|
The bitmap needs to contain the possible first octets of the string
|
||||||
|
which the trie can match, and for codepoints which might have a different
|
||||||
|
first octet under utf8 or non-utf8 need to register BOTH codepoints.
|
||||||
|
|
||||||
|
So for instance in the pattern (?:a|a\x{E4}) we should restructure this
|
||||||
|
as a(|\x{E4), and the bitmap for the trie should contain both \x{E4} AND
|
||||||
|
\x{C3} as \x{C3} is the first byte of \x{EF} expressed as utf8.
|
||||||
|
|
||||||
|
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
||||||
|
---
|
||||||
|
regcomp.c | 14 ++++++++++++++
|
||||||
|
t/re/pat.t | 9 ++++++++-
|
||||||
|
2 files changed, 22 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/regcomp.c b/regcomp.c
|
||||||
|
index 7462885..bcb8db5 100644
|
||||||
|
--- a/regcomp.c
|
||||||
|
+++ b/regcomp.c
|
||||||
|
@@ -3272,6 +3272,13 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
|
||||||
|
TRIE_BITMAP_SET(trie,*ch);
|
||||||
|
if ( folder )
|
||||||
|
TRIE_BITMAP_SET(trie, folder[ *ch ]);
|
||||||
|
+ if ( !UTF ) {
|
||||||
|
+ /* store first byte of utf8 representation of
|
||||||
|
+ variant codepoints */
|
||||||
|
+ if (! UVCHR_IS_INVARIANT(*ch)) {
|
||||||
|
+ TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(*ch));
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
DEBUG_OPTIMISE_r(
|
||||||
|
Perl_re_printf( aTHX_ "%s", (char*)ch)
|
||||||
|
);
|
||||||
|
@@ -3280,6 +3287,13 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
|
||||||
|
TRIE_BITMAP_SET(trie,*ch);
|
||||||
|
if ( folder )
|
||||||
|
TRIE_BITMAP_SET(trie,folder[ *ch ]);
|
||||||
|
+ if ( !UTF ) {
|
||||||
|
+ /* store first byte of utf8 representation of
|
||||||
|
+ variant codepoints */
|
||||||
|
+ if (! UVCHR_IS_INVARIANT(*ch)) {
|
||||||
|
+ TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(*ch));
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "%s", ch));
|
||||||
|
}
|
||||||
|
idx = ofs;
|
||||||
|
diff --git a/t/re/pat.t b/t/re/pat.t
|
||||||
|
index 295a9f7..4aa77cf 100644
|
||||||
|
--- a/t/re/pat.t
|
||||||
|
+++ b/t/re/pat.t
|
||||||
|
@@ -23,7 +23,7 @@ BEGIN {
|
||||||
|
skip_all_without_unicode_tables();
|
||||||
|
}
|
||||||
|
|
||||||
|
-plan tests => 789; # Update this when adding/deleting tests.
|
||||||
|
+plan tests => 791; # Update this when adding/deleting tests.
|
||||||
|
|
||||||
|
run_tests() unless caller;
|
||||||
|
|
||||||
|
@@ -1758,6 +1758,13 @@ EOP
|
||||||
|
fresh_perl_is($code, $expect, {}, "$bug - $test_name" );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ {
|
||||||
|
+ my $str = "a\xE4";
|
||||||
|
+ ok( $str =~ m{^(a|a\x{e4})$}, "fix [perl #129950] - latin1 case" );
|
||||||
|
+ utf8::upgrade($str);
|
||||||
|
+ ok( $str =~ m{^(a|a\x{e4})$}, "fix [perl #129950] - utf8 case" );
|
||||||
|
+ }
|
||||||
|
} # End of sub run_tests
|
||||||
|
|
||||||
|
1;
|
||||||
|
--
|
||||||
|
2.7.4
|
||||||
|
|
@ -206,6 +206,11 @@ Patch54: perl-5.24.0-perl-129350-anchored-floating-substrings-must-be-utf
|
|||||||
# Fix parsing perl options in shell bang line, RT#129336,
|
# Fix parsing perl options in shell bang line, RT#129336,
|
||||||
# in upstream after 5.25.5
|
# in upstream after 5.25.5
|
||||||
Patch55: perl-5.24.0-rt-129336-perl-i-u-erroneously-interpreted-as-u.patch
|
Patch55: perl-5.24.0-rt-129336-perl-i-u-erroneously-interpreted-as-u.patch
|
||||||
|
|
||||||
|
# Fix firstchar bitmap under UTF-8 with prefix optimization, RT#129950,
|
||||||
|
# in upstream after 5.25.6
|
||||||
|
Patch56: perl-5.24.0-regcomp.c-fix-perl-129950-fix-firstchar-bitmap-under.patch
|
||||||
|
|
||||||
# Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048
|
# Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048
|
||||||
Patch200: perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch
|
Patch200: perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch
|
||||||
|
|
||||||
@ -2884,6 +2889,7 @@ Perl extension for Version Objects
|
|||||||
%patch53 -p1
|
%patch53 -p1
|
||||||
%patch54 -p1
|
%patch54 -p1
|
||||||
%patch55 -p1
|
%patch55 -p1
|
||||||
|
%patch56 -p1
|
||||||
%patch200 -p1
|
%patch200 -p1
|
||||||
%patch201 -p1
|
%patch201 -p1
|
||||||
|
|
||||||
@ -2930,6 +2936,7 @@ perl -x patchlevel.h \
|
|||||||
'Fedora Patch53: Fix string overrun in Perl_gv_fetchmethod_pvn_flags (RT#129267)' \
|
'Fedora Patch53: Fix string overrun in Perl_gv_fetchmethod_pvn_flags (RT#129267)' \
|
||||||
'Fedora Patch54: Fix crash when matching UTF-8 string with non-UTF-8 substrings (RT#129350)' \
|
'Fedora Patch54: Fix crash when matching UTF-8 string with non-UTF-8 substrings (RT#129350)' \
|
||||||
'Fedora Patch55: Fix parsing perl options in shell bang line (RT#129336)' \
|
'Fedora Patch55: Fix parsing perl options in shell bang line (RT#129336)' \
|
||||||
|
'Fedora Patch56: Fix firstchar bitmap under UTF-8 with prefix optimization (RT#129950)' \
|
||||||
'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \
|
'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \
|
||||||
'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \
|
'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \
|
||||||
%{nil}
|
%{nil}
|
||||||
@ -5214,6 +5221,7 @@ popd
|
|||||||
- Fix string overrun in Perl_gv_fetchmethod_pvn_flags (RT#129267)
|
- Fix string overrun in Perl_gv_fetchmethod_pvn_flags (RT#129267)
|
||||||
- Fix crash when matching UTF-8 string with non-UTF-8 substrings (RT#129350)
|
- Fix crash when matching UTF-8 string with non-UTF-8 substrings (RT#129350)
|
||||||
- Fix parsing perl options in shell bang line (RT#129336)
|
- Fix parsing perl options in shell bang line (RT#129336)
|
||||||
|
- Fix firstchar bitmap under UTF-8 with prefix optimization (RT#129950)
|
||||||
|
|
||||||
* Fri Sep 02 2016 Petr Pisar <ppisar@redhat.com> - 4:5.24.0-378
|
* Fri Sep 02 2016 Petr Pisar <ppisar@redhat.com> - 4:5.24.0-378
|
||||||
- perl-core depends on Parse::CPAN::Meta module instead of package name to allow
|
- perl-core depends on Parse::CPAN::Meta module instead of package name to allow
|
||||||
|
Loading…
Reference in New Issue
Block a user