10.34 bump

This commit is contained in:
Petr Písař 2019-11-22 13:27:49 +01:00
parent 1ee52c43e8
commit a392626e04
5 changed files with 9 additions and 340 deletions

2
.gitignore vendored
View File

@ -20,3 +20,5 @@
/pcre2-10.34-RC1.tar.bz2.sig /pcre2-10.34-RC1.tar.bz2.sig
/pcre2-10.34-RC2.tar.bz2 /pcre2-10.34-RC2.tar.bz2
/pcre2-10.34-RC2.tar.bz2.sig /pcre2-10.34-RC2.tar.bz2.sig
/pcre2-10.34.tar.bz2
/pcre2-10.34.tar.bz2.sig

View File

@ -1,81 +0,0 @@
From 3c7295bb56a7944fe5358cb2eab2ad68d35a3aa1 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 16 Nov 2019 17:30:07 +0000
Subject: [PATCH] Fix sometimes failing caseless non-ASCII matching in
assertion.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1185 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.34-RC2.
---
src/pcre2_compile.c | 13 +++++++++++++
testdata/testinput4 | 8 ++++++++
testdata/testoutput4 | 12 ++++++++++++
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 3204973..800b61b 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -8741,6 +8741,19 @@ do {
case OP_MINPLUSI:
case OP_POSPLUSI:
if (inassert == 0) return 0;
+
+ /* If the character is more than one code unit long, we cannot set its
+ first code unit when matching caselessly. Later scanning may pick up
+ multiple code units. */
+
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ if (scode[1] >= 0x80) return 0;
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+ if (scode[1] >= 0xd800 && scode[1] <= 0xdfff) return 0;
+#endif
+#endif
+
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
else if (c != scode[1]) return 0;
break;
diff --git a/testdata/testinput4 b/testdata/testinput4
index f3d498c..0871835 100644
--- a/testdata/testinput4
+++ b/testdata/testinput4
@@ -2483,4 +2483,12 @@
/\X*/
\xF3aaa\xE4\xEA\xEB\xFEa
+/Я/i,utf
+ \x{42f}
+ \x{44f}
+
+/(?=Я)/i,utf
+ \x{42f}
+ \x{44f}
+
# End of testinput4
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index 53926ed..2c8037b 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -4016,4 +4016,16 @@ No match
\xF3aaa\xE4\xEA\xEB\xFEa
0: \xf3aaa\xe4\xea\xeb\xfea
+/Я/i,utf
+ \x{42f}
+ 0: \x{42f}
+ \x{44f}
+ 0: \x{44f}
+
+/(?=Я)/i,utf
+ \x{42f}
+ 0:
+ \x{44f}
+ 0:
+
# End of testinput4
--
2.21.0

View File

@ -1,246 +0,0 @@
Index: src/pcre2_jit_neon_inc.h
===================================================================
--- src/pcre2_jit_neon_inc.h (revision 1183)
+++ src/pcre2_jit_neon_inc.h (working copy)
@@ -112,14 +112,14 @@
vect_t cmp1a, cmp1b, cmp2a, cmp2b;
const sljit_u32 diff = IN_UCHARS(offs1 - offs2);
PCRE2_UCHAR char1a = ic.c.c1;
-PCRE2_UCHAR char1b = ic.c.c2;
PCRE2_UCHAR char2a = ic.c.c3;
-PCRE2_UCHAR char2b = ic.c.c4;
# ifdef FFCPS_CHAR1A2A
cmp1a = VDUPQ(char1a);
cmp2a = VDUPQ(char2a);
# else
+PCRE2_UCHAR char1b = ic.c.c2;
+PCRE2_UCHAR char2b = ic.c.c4;
if (char1a == char1b)
cmp1a = VDUPQ(char1a);
else
@@ -159,10 +159,17 @@
}
# endif
-str_ptr += offs1;
+str_ptr += IN_UCHARS(offs1);
#endif
+#if PCRE2_CODE_UNIT_WIDTH != 8
+vect_t char_mask = VDUPQ(0xff);
+#endif
+
+#if defined(FF_UTF)
restart:;
+#endif
+
#if defined(FFCPS)
sljit_u8 *p1 = str_ptr - diff;
#endif
@@ -169,7 +176,10 @@
sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf);
str_ptr = (sljit_u8 *) ((uint64_t)str_ptr & ~0xf);
vect_t data = VLD1Q(str_ptr);
-
+#if PCRE2_CODE_UNIT_WIDTH != 8
+data = VANDQ(data, char_mask);
+#endif
+
#if defined(FFCS)
vect_t eq = VCEQQ(data, vc1);
@@ -186,7 +196,17 @@
# if defined(FFCPS_DIFF1)
vect_t prev_data = data;
# endif
-vect_t data2 = VLD1Q(str_ptr - diff);
+
+vect_t data2;
+if (p1 < str_ptr)
+ {
+ data2 = VLD1Q(str_ptr - diff);
+#if PCRE2_CODE_UNIT_WIDTH != 8
+ data2 = VANDQ(data2, char_mask);
+#endif
+ }
+else
+ data2 = shift_left_n_lanes(data, offs1 - offs2);
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
@@ -223,6 +243,9 @@
while (str_ptr < str_end)
{
vect_t orig_data = VLD1Q(str_ptr);
+#if PCRE2_CODE_UNIT_WIDTH != 8
+ orig_data = VANDQ(orig_data, char_mask);
+#endif
data = orig_data;
#if defined(FFCS)
@@ -240,9 +263,12 @@
#if defined(FFCPS)
# if defined (FFCPS_DIFF1)
- data2 = VEXTQ(prev_data, data, 15);
+ data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1);
# else
data2 = VLD1Q(str_ptr - diff);
+# if PCRE2_CODE_UNIT_WIDTH != 8
+ data2 = VANDQ(data2, char_mask);
+# endif
# endif
# ifdef FFCPS_CHAR1A2A
Index: src/pcre2_jit_simd_inc.h
===================================================================
--- src/pcre2_jit_simd_inc.h (revision 1183)
+++ src/pcre2_jit_simd_inc.h (working copy)
@@ -655,8 +655,9 @@
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
#if PCRE2_CODE_UNIT_WIDTH == 8
+# define VECTOR_FACTOR 16
# define vect_t uint8x16_t
-# define VLD1Q vld1q_u8
+# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
# define VCEQQ vceqq_u8
# define VORRQ vorrq_u8
# define VST1Q vst1q_u8
@@ -668,8 +669,9 @@
uint64_t dw[2];
} quad_word;
#elif PCRE2_CODE_UNIT_WIDTH == 16
+# define VECTOR_FACTOR 8
# define vect_t uint16x8_t
-# define VLD1Q vld1q_u16
+# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
# define VCEQQ vceqq_u16
# define VORRQ vorrq_u16
# define VST1Q vst1q_u16
@@ -681,8 +683,9 @@
uint64_t dw[2];
} quad_word;
#else
+# define VECTOR_FACTOR 4
# define vect_t uint32x4_t
-# define VLD1Q vld1q_u32
+# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
# define VCEQQ vceqq_u32
# define VORRQ vorrq_u32
# define VST1Q vst1q_u32
@@ -697,23 +700,29 @@
#define FFCS
#include "pcre2_jit_neon_inc.h"
-#define FF_UTF
-#include "pcre2_jit_neon_inc.h"
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+# define FF_UTF
+# include "pcre2_jit_neon_inc.h"
+# undef FF_UTF
+#endif
#undef FFCS
-#undef FF_UTF
#define FFCS_2
#include "pcre2_jit_neon_inc.h"
-#define FF_UTF
-#include "pcre2_jit_neon_inc.h"
-#undef FF_UTF
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+# define FF_UTF
+# include "pcre2_jit_neon_inc.h"
+# undef FF_UTF
+#endif
#undef FFCS_2
#define FFCS_MASK
#include "pcre2_jit_neon_inc.h"
-#define FF_UTF
-#include "pcre2_jit_neon_inc.h"
-#undef FF_UTF
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+# define FF_UTF
+# include "pcre2_jit_neon_inc.h"
+# undef FF_UTF
+#endif
#undef FFCS_MASK
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
@@ -842,6 +851,29 @@
#endif
}
+/* ARM doesn't have a shift left across lanes. */
+static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
+{
+vect_t zero = VDUPQ(0);
+SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
+/* VEXTQ takes an immediate as last argument. */
+#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
+switch (n)
+ {
+ C(1); C(2); C(3);
+#if PCRE2_CODE_UNIT_WIDTH != 32
+ C(4); C(5); C(6); C(7);
+# if PCRE2_CODE_UNIT_WIDTH != 16
+ C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
+# endif
+#endif
+ default:
+ /* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
+ happen. The return is still here for compilers to not warn. */
+ return a;
+ }
+}
+
#define FFCPS
#define FFCPS_DIFF1
#define FFCPS_CHAR1A2A
@@ -848,9 +880,11 @@
#define FFCPS_0
#include "pcre2_jit_neon_inc.h"
-#define FF_UTF
-#include "pcre2_jit_neon_inc.h"
-#undef FF_UTF
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+# define FF_UTF
+# include "pcre2_jit_neon_inc.h"
+# undef FF_UTF
+#endif
#undef FFCPS_0
#undef FFCPS_CHAR1A2A
@@ -857,9 +891,11 @@
#define FFCPS_1
#include "pcre2_jit_neon_inc.h"
-#define FF_UTF
-#include "pcre2_jit_neon_inc.h"
-#undef FF_UTF
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+# define FF_UTF
+# include "pcre2_jit_neon_inc.h"
+# undef FF_UTF
+#endif
#undef FFCPS_1
#undef FFCPS_DIFF1
@@ -866,9 +902,11 @@
#define FFCPS_DEFAULT
#include "pcre2_jit_neon_inc.h"
-#define FF_UTF
-#include "pcre2_jit_neon_inc.h"
-#undef FF_UTF
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+# define FF_UTF
+# include "pcre2_jit_neon_inc.h"
+# undef FF_UTF
+#endif
#undef FFCPS
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1

View File

@ -6,10 +6,10 @@
%bcond_with pcre2_enables_sealloc %bcond_with pcre2_enables_sealloc
# This is stable release: # This is stable release:
%global rcversion RC2 #%%global rcversion RC1
Name: pcre2 Name: pcre2
Version: 10.34 Version: 10.34
Release: %{?rcversion:0.}2%{?rcversion:.%rcversion}%{?dist} Release: %{?rcversion:0.}1%{?rcversion:.%rcversion}%{?dist}
%global myversion %{version}%{?rcversion:-%rcversion} %global myversion %{version}%{?rcversion:-%rcversion}
Summary: Perl-compatible regular expression library Summary: Perl-compatible regular expression library
# the library: BSD with exceptions # the library: BSD with exceptions
@ -51,13 +51,6 @@ Source1: https://ftp.pcre.org/pub/pcre/%{?rcversion:Testing/}%{name}-%{myvers
Source2: https://ftp.pcre.org/pub/pcre/Public-Key Source2: https://ftp.pcre.org/pub/pcre/Public-Key
# Do no set RPATH if libdir is not /usr/lib # Do no set RPATH if libdir is not /usr/lib
Patch0: pcre2-10.10-Fix-multilib.patch Patch0: pcre2-10.10-Fix-multilib.patch
# Fix an infinite loop in 64-bit ARM JIT with NEON instructions,
# in upstream after 10.34-RC2
# <https://lists.exim.org/lurker/message/20191111.150436.ac8d8581.en.html>
Patch1: pcre2-10.34-RC2-fix_a_loop_in_neon_arm64_jit.patch
# Fix optimized caseless matching of non-ASCII characters in assertions,
# upstream bug #2466, in upstream after 10.34-RC2
Patch2: pcre2-10.34-RC2-Fix-sometimes-failing-caseless-non-ASCII-matching-in.patch
BuildRequires: autoconf BuildRequires: autoconf
BuildRequires: automake BuildRequires: automake
BuildRequires: coreutils BuildRequires: coreutils
@ -136,8 +129,6 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
%{gpgverify} --keyring='%{SOURCE2}' --signature='%{SOURCE1}' --data='%{SOURCE0}' %{gpgverify} --keyring='%{SOURCE2}' --signature='%{SOURCE1}' --data='%{SOURCE0}'
%setup -q -n %{name}-%{myversion} %setup -q -n %{name}-%{myversion}
%patch0 -p1 %patch0 -p1
%patch1 -p0
%patch2 -p1
# Because of multilib patch # Because of multilib patch
libtoolize --copy --force libtoolize --copy --force
autoreconf -vif autoreconf -vif
@ -235,6 +226,9 @@ make %{?_smp_mflags} check VERBOSE=yes
%{_mandir}/man1/pcre2test.* %{_mandir}/man1/pcre2test.*
%changelog %changelog
* Fri Nov 22 2019 Petr Pisar <ppisar@redhat.com> - 10.34-1
- 10.34 bump
* Mon Nov 18 2019 Petr Pisar <ppisar@redhat.com> - 10.34-0.2.RC2 * Mon Nov 18 2019 Petr Pisar <ppisar@redhat.com> - 10.34-0.2.RC2
- Fix optimized caseless matching of non-ASCII characters in assertions - Fix optimized caseless matching of non-ASCII characters in assertions
(upstream bug #2466) (upstream bug #2466)

View File

@ -1,2 +1,2 @@
SHA512 (pcre2-10.34-RC2.tar.bz2) = 82567bd159a6c0473166904648aed0e4718bedc4ad0f75fbc9c2b3b6350b50359dcc7c74d5ecb93f4acca043fa38c89c2296f4ab2672cddf913153532d5f382a SHA512 (pcre2-10.34.tar.bz2) = 77ad75f8b0b8bbfc2f57932596151bca25b06bd621e0f047e476f38cd127f43e2052460b95c281a7e874aad2b7fd86c8f3413f4a323abb74b9440a42d0ee9524
SHA512 (pcre2-10.34-RC2.tar.bz2.sig) = b9f55286a240b2118a0040e7edd58d44950fd1adab8acd077d481c8cfb042b430482165fac27a960ac10a051e3af3cb46dd5261a95a39bd888a0135ddcd9560d SHA512 (pcre2-10.34.tar.bz2.sig) = f4cb8dcbe4ef254a47ccd76e3a62097fb6ee2b6278053d08fb87d4e2e21f788cc84bc54587e41d236b73fb1403816ba1576ec10545d2abdffac5a523d97fd71c