From aefe0df10c5ed528a7034affaf69227df0058a69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= Date: Mon, 21 Sep 2020 12:49:55 +0200 Subject: [PATCH] Fix matching a character set when JIT is enabled and both Unicode script and Unicode class are present --- ...haracter-set-matching-when-JIT-is-en.patch | 94 +++++++++++++++++++ pcre2.spec | 10 +- 2 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 pcre2-10.35-Fixed-a-bug-in-character-set-matching-when-JIT-is-en.patch diff --git a/pcre2-10.35-Fixed-a-bug-in-character-set-matching-when-JIT-is-en.patch b/pcre2-10.35-Fixed-a-bug-in-character-set-matching-when-JIT-is-en.patch new file mode 100644 index 0000000..fbec5fc --- /dev/null +++ b/pcre2-10.35-Fixed-a-bug-in-character-set-matching-when-JIT-is-en.patch @@ -0,0 +1,94 @@ +From 5002a59a8289027b8a88c4933077a9b66e839d6c Mon Sep 17 00:00:00 2001 +From: zherczeg +Date: Sat, 19 Sep 2020 03:49:32 +0000 +Subject: [PATCH] Fixed a bug in character set matching when JIT is enabled. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1273 6239d852-aaf2-0410-a92c-79f79f948069 +Petr Písař: Ported to 10.35. +--- + src/pcre2_jit_compile.c | 36 ++++++++++++++++++++++++++++-------- + src/pcre2_jit_test.c | 1 + + +diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c +index edf64d5..04f0278 100644 +--- a/src/pcre2_jit_compile.c ++++ b/src/pcre2_jit_compile.c +@@ -7672,25 +7672,43 @@ if (needstype || needsscript) + } + + cc = ccbegin; +- } + +- if (needschar) +- OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); ++ if (needstype) ++ { ++ /* TMP2 has already been shifted by 2 */ ++ if (!needschar) ++ { ++ OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0); ++ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); ++ ++ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); ++ } ++ else ++ { ++ OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0); ++ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + +- if (needstype) ++ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); ++ OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); ++ typereg = RETURN_ADDR; ++ } ++ } ++ else if (needschar) ++ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); ++ } ++ else if (needstype) + { ++ OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); ++ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); ++ + if (!needschar) + { +- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); +- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + } + else + { +- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); +- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); +@@ -7698,6 +7716,8 @@ if (needstype || needsscript) + typereg = RETURN_ADDR; + } + } ++ else if (needschar) ++ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + } + #endif /* SUPPORT_UNICODE */ + +diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c +index b7856ad..d935887 100644 +--- a/src/pcre2_jit_test.c ++++ b/src/pcre2_jit_test.c +@@ -409,6 +409,7 @@ static struct regression_test_case regression_test_cases[] = { + { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" }, + { MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" }, + { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" }, ++ { MUP, 0, 0, 0 | F_NOMATCH, "[^\\p{Hangul}\\p{Z}]", " " }, + + /* Possible empty brackets. */ + { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" }, +-- +2.25.4 + diff --git a/pcre2.spec b/pcre2.spec index d21881e..549b72f 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -9,7 +9,7 @@ #%%global rcversion RC1 Name: pcre2 Version: 10.35 -Release: %{?rcversion:0.}6%{?rcversion:.%rcversion}%{?dist} +Release: %{?rcversion:0.}7%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library # the library: BSD with exceptions @@ -75,6 +75,9 @@ Patch7: pcre2-10.35-Update-pcre2test-to-check-delimiters-after-perltest-.pat # Fix a mismatch when caselessly searching in an invalid UTF-8 text and a start # optimization is enabled, upstream bug #2642, in upstream after 10.35 Patch8: pcre2-10.35-Fix-Bugzilla-2642-no-match-bug-in-8-bit-mode-for-cas.patch +# Fix matching a character set when JIT is enabled and both Unicode script and +# Unicode class are present, upstream bug #2644, in upstream after 10.35 +Patch9: pcre2-10.35-Fixed-a-bug-in-character-set-matching-when-JIT-is-en.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -174,6 +177,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %patch6 -p1 %patch7 -p1 %patch8 -p1 +%patch9 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -287,6 +291,10 @@ make %{?_smp_mflags} check VERBOSE=yes %{_mandir}/man1/pcre2test.* %changelog +* Mon Sep 21 2020 Petr Pisar - 10.35-7 +- Fix matching a character set when JIT is enabled and both Unicode script and + Unicode class are present (upstream bug #2644) + * Wed Sep 16 2020 Petr Pisar - 10.35-6 - Fix escaping test data and only allow slash delimiter after perltest pragma (upstream bug #2641)