Fix a partial matching for a word boundary in JIT mode

This commit is contained in:
Petr Písař 2020-10-27 14:04:39 +01:00
parent aefe0df10c
commit 8a057ba535
2 changed files with 71 additions and 1 deletions

View File

@ -0,0 +1,63 @@
From fd5e04a35122c62584da5a9558f3b744a2d09692 Mon Sep 17 00:00:00 2001
From: zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Tue, 27 Oct 2020 08:16:04 +0000
Subject: [PATCH] Fixed a word boundary check bug in JIT when partial matching
is enabled.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1280 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.35.
---
src/pcre2_jit_compile.c | 2 ++
testdata/testinput10 | 4 ++++
testdata/testoutput10 | 6 ++++++
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 04f0278..1977d28 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -6571,9 +6571,11 @@ if (common->invalid_utf)
if (common->mode != PCRE2_JIT_COMPLETE)
{
+ OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
move_back(common, NULL, TRUE);
check_start_used_ptr(common);
+ OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
}
}
diff --git a/testdata/testinput10 b/testdata/testinput10
index efd3298..53e37cb 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -613,4 +613,8 @@
/A/utf,match_invalid_utf,caseless
\xe5A
+/\bch\b/utf,match_invalid_utf
+ qchq\=ph
+ qchq\=ps
+
# End of testinput10
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index 2a3803f..d408510 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1875,4 +1875,10 @@ Subject length lower bound = 1
\xe5A
0: A
+/\bch\b/utf,match_invalid_utf
+ qchq\=ph
+Partial match:
+ qchq\=ps
+Partial match:
+
# End of testinput10
--
2.25.4

View File

@ -9,7 +9,7 @@
#%%global rcversion RC1 #%%global rcversion RC1
Name: pcre2 Name: pcre2
Version: 10.35 Version: 10.35
Release: %{?rcversion:0.}7%{?rcversion:.%rcversion}%{?dist} Release: %{?rcversion:0.}8%{?rcversion:.%rcversion}%{?dist}
%global myversion %{version}%{?rcversion:-%rcversion} %global myversion %{version}%{?rcversion:-%rcversion}
Summary: Perl-compatible regular expression library Summary: Perl-compatible regular expression library
# the library: BSD with exceptions # the library: BSD with exceptions
@ -78,6 +78,9 @@ Patch8: pcre2-10.35-Fix-Bugzilla-2642-no-match-bug-in-8-bit-mode-for-cas.pat
# Fix matching a character set when JIT is enabled and both Unicode script and # Fix matching a character set when JIT is enabled and both Unicode script and
# Unicode class are present, upstream bug #2644, in upstream after 10.35 # Unicode class are present, upstream bug #2644, in upstream after 10.35
Patch9: pcre2-10.35-Fixed-a-bug-in-character-set-matching-when-JIT-is-en.patch Patch9: pcre2-10.35-Fixed-a-bug-in-character-set-matching-when-JIT-is-en.patch
# Fix a partial matching for a word boundary in JIT mode, upstream bug #2663,
# in upstream after 10.35.
Patch10: pcre2-10.35-Fixed-a-word-boundary-check-bug-in-JIT-when-partial-.patch
BuildRequires: autoconf BuildRequires: autoconf
BuildRequires: automake BuildRequires: automake
BuildRequires: coreutils BuildRequires: coreutils
@ -178,6 +181,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
%patch7 -p1 %patch7 -p1
%patch8 -p1 %patch8 -p1
%patch9 -p1 %patch9 -p1
%patch10 -p1
# Because of multilib patch # Because of multilib patch
libtoolize --copy --force libtoolize --copy --force
autoreconf -vif autoreconf -vif
@ -291,6 +295,9 @@ make %{?_smp_mflags} check VERBOSE=yes
%{_mandir}/man1/pcre2test.* %{_mandir}/man1/pcre2test.*
%changelog %changelog
* Tue Oct 27 2020 Petr Pisar <ppisar@redhat.com> - 10.35-8
- Fix a partial matching for a word boundary in JIT mode (upstream bug #2663)
* Mon Sep 21 2020 Petr Pisar <ppisar@redhat.com> - 10.35-7 * Mon Sep 21 2020 Petr Pisar <ppisar@redhat.com> - 10.35-7
- Fix matching a character set when JIT is enabled and both Unicode script and - Fix matching a character set when JIT is enabled and both Unicode script and
Unicode class are present (upstream bug #2644) Unicode class are present (upstream bug #2644)