Fix optimized caseless matching of non-ASCII characters in assertions

This commit is contained in:
Petr Písař 2019-11-18 12:47:38 +01:00
parent 821dd3e84c
commit 1ee52c43e8
2 changed files with 90 additions and 1 deletions

View File

@ -0,0 +1,81 @@
From 3c7295bb56a7944fe5358cb2eab2ad68d35a3aa1 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 16 Nov 2019 17:30:07 +0000
Subject: [PATCH] Fix sometimes failing caseless non-ASCII matching in
assertion.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1185 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.34-RC2.
---
src/pcre2_compile.c | 13 +++++++++++++
testdata/testinput4 | 8 ++++++++
testdata/testoutput4 | 12 ++++++++++++
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 3204973..800b61b 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -8741,6 +8741,19 @@ do {
case OP_MINPLUSI:
case OP_POSPLUSI:
if (inassert == 0) return 0;
+
+ /* If the character is more than one code unit long, we cannot set its
+ first code unit when matching caselessly. Later scanning may pick up
+ multiple code units. */
+
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ if (scode[1] >= 0x80) return 0;
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+ if (scode[1] >= 0xd800 && scode[1] <= 0xdfff) return 0;
+#endif
+#endif
+
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
else if (c != scode[1]) return 0;
break;
diff --git a/testdata/testinput4 b/testdata/testinput4
index f3d498c..0871835 100644
--- a/testdata/testinput4
+++ b/testdata/testinput4
@@ -2483,4 +2483,12 @@
/\X*/
\xF3aaa\xE4\xEA\xEB\xFEa
+/Я/i,utf
+ \x{42f}
+ \x{44f}
+
+/(?=Я)/i,utf
+ \x{42f}
+ \x{44f}
+
# End of testinput4
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index 53926ed..2c8037b 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -4016,4 +4016,16 @@ No match
\xF3aaa\xE4\xEA\xEB\xFEa
0: \xf3aaa\xe4\xea\xeb\xfea
+/Я/i,utf
+ \x{42f}
+ 0: \x{42f}
+ \x{44f}
+ 0: \x{44f}
+
+/(?=Я)/i,utf
+ \x{42f}
+ 0:
+ \x{44f}
+ 0:
+
# End of testinput4
--
2.21.0

View File

@ -9,7 +9,7 @@
%global rcversion RC2
Name: pcre2
Version: 10.34
Release: %{?rcversion:0.}1%{?rcversion:.%rcversion}%{?dist}
Release: %{?rcversion:0.}2%{?rcversion:.%rcversion}%{?dist}
%global myversion %{version}%{?rcversion:-%rcversion}
Summary: Perl-compatible regular expression library
# the library: BSD with exceptions
@ -55,6 +55,9 @@ Patch0: pcre2-10.10-Fix-multilib.patch
# in upstream after 10.34-RC2
# <https://lists.exim.org/lurker/message/20191111.150436.ac8d8581.en.html>
Patch1: pcre2-10.34-RC2-fix_a_loop_in_neon_arm64_jit.patch
# Fix optimized caseless matching of non-ASCII characters in assertions,
# upstream bug #2466, in upstream after 10.34-RC2
Patch2: pcre2-10.34-RC2-Fix-sometimes-failing-caseless-non-ASCII-matching-in.patch
BuildRequires: autoconf
BuildRequires: automake
BuildRequires: coreutils
@ -134,6 +137,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
%setup -q -n %{name}-%{myversion}
%patch0 -p1
%patch1 -p0
%patch2 -p1
# Because of multilib patch
libtoolize --copy --force
autoreconf -vif
@ -231,6 +235,10 @@ make %{?_smp_mflags} check VERBOSE=yes
%{_mandir}/man1/pcre2test.*
%changelog
* Mon Nov 18 2019 Petr Pisar <ppisar@redhat.com> - 10.34-0.2.RC2
- Fix optimized caseless matching of non-ASCII characters in assertions
(upstream bug #2466)
* Thu Nov 07 2019 Petr Pisar <ppisar@redhat.com> - 10.34-0.1.RC2
- 10.34-RC2 bump
- Fix an infinite loop in 64-bit ARM JIT with NEON instructions