Fix matching a character set when JIT is enabled and both Unicode script and Unicode class are present

This commit is contained in:
Petr Písař 2020-09-21 12:49:55 +02:00
parent 3841a80996
commit aefe0df10c
2 changed files with 103 additions and 1 deletions

View File

@ -0,0 +1,94 @@
From 5002a59a8289027b8a88c4933077a9b66e839d6c Mon Sep 17 00:00:00 2001
From: zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 19 Sep 2020 03:49:32 +0000
Subject: [PATCH] Fixed a bug in character set matching when JIT is enabled.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1273 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.35.
---
src/pcre2_jit_compile.c | 36 ++++++++++++++++++++++++++++--------
src/pcre2_jit_test.c | 1 +
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index edf64d5..04f0278 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -7672,25 +7672,43 @@ if (needstype || needsscript)
}
cc = ccbegin;
- }
- if (needschar)
- OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
+ if (needstype)
+ {
+ /* TMP2 has already been shifted by 2 */
+ if (!needschar)
+ {
+ OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
+
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+ }
+ else
+ {
+ OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
+ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- if (needstype)
+ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
+ OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+ typereg = RETURN_ADDR;
+ }
+ }
+ else if (needschar)
+ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
+ }
+ else if (needstype)
{
+ OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
+
if (!needschar)
{
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
}
else
{
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
@@ -7698,6 +7716,8 @@ if (needstype || needsscript)
typereg = RETURN_ADDR;
}
}
+ else if (needschar)
+ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
}
#endif /* SUPPORT_UNICODE */
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index b7856ad..d935887 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -409,6 +409,7 @@ static struct regression_test_case regression_test_cases[] = {
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
{ MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" },
+ { MUP, 0, 0, 0 | F_NOMATCH, "[^\\p{Hangul}\\p{Z}]", " " },
/* Possible empty brackets. */
{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
--
2.25.4

View File

@ -9,7 +9,7 @@
#%%global rcversion RC1
Name: pcre2
Version: 10.35
Release: %{?rcversion:0.}6%{?rcversion:.%rcversion}%{?dist}
Release: %{?rcversion:0.}7%{?rcversion:.%rcversion}%{?dist}
%global myversion %{version}%{?rcversion:-%rcversion}
Summary: Perl-compatible regular expression library
# the library: BSD with exceptions
@ -75,6 +75,9 @@ Patch7: pcre2-10.35-Update-pcre2test-to-check-delimiters-after-perltest-.pat
# Fix a mismatch when caselessly searching in an invalid UTF-8 text and a start
# optimization is enabled, upstream bug #2642, in upstream after 10.35
Patch8: pcre2-10.35-Fix-Bugzilla-2642-no-match-bug-in-8-bit-mode-for-cas.patch
# Fix matching a character set when JIT is enabled and both Unicode script and
# Unicode class are present, upstream bug #2644, in upstream after 10.35
Patch9: pcre2-10.35-Fixed-a-bug-in-character-set-matching-when-JIT-is-en.patch
BuildRequires: autoconf
BuildRequires: automake
BuildRequires: coreutils
@ -174,6 +177,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
%patch6 -p1
%patch7 -p1
%patch8 -p1
%patch9 -p1
# Because of multilib patch
libtoolize --copy --force
autoreconf -vif
@ -287,6 +291,10 @@ make %{?_smp_mflags} check VERBOSE=yes
%{_mandir}/man1/pcre2test.*
%changelog
* Mon Sep 21 2020 Petr Pisar <ppisar@redhat.com> - 10.35-7
- Fix matching a character set when JIT is enabled and both Unicode script and
Unicode class are present (upstream bug #2644)
* Wed Sep 16 2020 Petr Pisar <ppisar@redhat.com> - 10.35-6
- Fix escaping test data and only allow slash delimiter after perltest pragma
(upstream bug #2641)