diff --git a/.gitignore b/.gitignore index d8061dc..0f11108 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,5 @@ /pcre2-10.33.tar.bz2.sig /pcre2-10.34-RC1.tar.bz2 /pcre2-10.34-RC1.tar.bz2.sig +/pcre2-10.34-RC2.tar.bz2 +/pcre2-10.34-RC2.tar.bz2.sig diff --git a/pcre2-10.34-RC2-fix_a_loop_in_neon_arm64_jit.patch b/pcre2-10.34-RC2-fix_a_loop_in_neon_arm64_jit.patch new file mode 100644 index 0000000..f0bd6b5 --- /dev/null +++ b/pcre2-10.34-RC2-fix_a_loop_in_neon_arm64_jit.patch @@ -0,0 +1,246 @@ +Index: src/pcre2_jit_neon_inc.h +=================================================================== +--- src/pcre2_jit_neon_inc.h (revision 1183) ++++ src/pcre2_jit_neon_inc.h (working copy) +@@ -112,14 +112,14 @@ + vect_t cmp1a, cmp1b, cmp2a, cmp2b; + const sljit_u32 diff = IN_UCHARS(offs1 - offs2); + PCRE2_UCHAR char1a = ic.c.c1; +-PCRE2_UCHAR char1b = ic.c.c2; + PCRE2_UCHAR char2a = ic.c.c3; +-PCRE2_UCHAR char2b = ic.c.c4; + + # ifdef FFCPS_CHAR1A2A + cmp1a = VDUPQ(char1a); + cmp2a = VDUPQ(char2a); + # else ++PCRE2_UCHAR char1b = ic.c.c2; ++PCRE2_UCHAR char2b = ic.c.c4; + if (char1a == char1b) + cmp1a = VDUPQ(char1a); + else +@@ -159,10 +159,17 @@ + } + # endif + +-str_ptr += offs1; ++str_ptr += IN_UCHARS(offs1); + #endif + ++#if PCRE2_CODE_UNIT_WIDTH != 8 ++vect_t char_mask = VDUPQ(0xff); ++#endif ++ ++#if defined(FF_UTF) + restart:; ++#endif ++ + #if defined(FFCPS) + sljit_u8 *p1 = str_ptr - diff; + #endif +@@ -169,7 +176,10 @@ + sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf); + str_ptr = (sljit_u8 *) ((uint64_t)str_ptr & ~0xf); + vect_t data = VLD1Q(str_ptr); +- ++#if PCRE2_CODE_UNIT_WIDTH != 8 ++data = VANDQ(data, char_mask); ++#endif ++ + #if defined(FFCS) + vect_t eq = VCEQQ(data, vc1); + +@@ -186,7 +196,17 @@ + # if defined(FFCPS_DIFF1) + vect_t prev_data = data; + # endif +-vect_t data2 = VLD1Q(str_ptr - diff); ++ ++vect_t data2; ++if (p1 < str_ptr) ++ { ++ data2 = VLD1Q(str_ptr - diff); ++#if PCRE2_CODE_UNIT_WIDTH != 8 ++ data2 = VANDQ(data2, char_mask); ++#endif ++ } ++else ++ data2 = shift_left_n_lanes(data, offs1 - offs2); + + data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b); + data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b); +@@ -223,6 +243,9 @@ + while (str_ptr < str_end) + { + vect_t orig_data = VLD1Q(str_ptr); ++#if PCRE2_CODE_UNIT_WIDTH != 8 ++ orig_data = VANDQ(orig_data, char_mask); ++#endif + data = orig_data; + + #if defined(FFCS) +@@ -240,9 +263,12 @@ + + #if defined(FFCPS) + # if defined (FFCPS_DIFF1) +- data2 = VEXTQ(prev_data, data, 15); ++ data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1); + # else + data2 = VLD1Q(str_ptr - diff); ++# if PCRE2_CODE_UNIT_WIDTH != 8 ++ data2 = VANDQ(data2, char_mask); ++# endif + # endif + + # ifdef FFCPS_CHAR1A2A +Index: src/pcre2_jit_simd_inc.h +=================================================================== +--- src/pcre2_jit_simd_inc.h (revision 1183) ++++ src/pcre2_jit_simd_inc.h (working copy) +@@ -655,8 +655,9 @@ + #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ + + #if PCRE2_CODE_UNIT_WIDTH == 8 ++# define VECTOR_FACTOR 16 + # define vect_t uint8x16_t +-# define VLD1Q vld1q_u8 ++# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X)) + # define VCEQQ vceqq_u8 + # define VORRQ vorrq_u8 + # define VST1Q vst1q_u8 +@@ -668,8 +669,9 @@ + uint64_t dw[2]; + } quad_word; + #elif PCRE2_CODE_UNIT_WIDTH == 16 ++# define VECTOR_FACTOR 8 + # define vect_t uint16x8_t +-# define VLD1Q vld1q_u16 ++# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X)) + # define VCEQQ vceqq_u16 + # define VORRQ vorrq_u16 + # define VST1Q vst1q_u16 +@@ -681,8 +683,9 @@ + uint64_t dw[2]; + } quad_word; + #else ++# define VECTOR_FACTOR 4 + # define vect_t uint32x4_t +-# define VLD1Q vld1q_u32 ++# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X)) + # define VCEQQ vceqq_u32 + # define VORRQ vorrq_u32 + # define VST1Q vst1q_u32 +@@ -697,23 +700,29 @@ + + #define FFCS + #include "pcre2_jit_neon_inc.h" +-#define FF_UTF +-#include "pcre2_jit_neon_inc.h" ++#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 ++# define FF_UTF ++# include "pcre2_jit_neon_inc.h" ++# undef FF_UTF ++#endif + #undef FFCS +-#undef FF_UTF + + #define FFCS_2 + #include "pcre2_jit_neon_inc.h" +-#define FF_UTF +-#include "pcre2_jit_neon_inc.h" +-#undef FF_UTF ++#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 ++# define FF_UTF ++# include "pcre2_jit_neon_inc.h" ++# undef FF_UTF ++#endif + #undef FFCS_2 + + #define FFCS_MASK + #include "pcre2_jit_neon_inc.h" +-#define FF_UTF +-#include "pcre2_jit_neon_inc.h" +-#undef FF_UTF ++#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 ++# define FF_UTF ++# include "pcre2_jit_neon_inc.h" ++# undef FF_UTF ++#endif + #undef FFCS_MASK + + #define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 +@@ -842,6 +851,29 @@ + #endif + } + ++/* ARM doesn't have a shift left across lanes. */ ++static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n) ++{ ++vect_t zero = VDUPQ(0); ++SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR); ++/* VEXTQ takes an immediate as last argument. */ ++#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X); ++switch (n) ++ { ++ C(1); C(2); C(3); ++#if PCRE2_CODE_UNIT_WIDTH != 32 ++ C(4); C(5); C(6); C(7); ++# if PCRE2_CODE_UNIT_WIDTH != 16 ++ C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15); ++# endif ++#endif ++ default: ++ /* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't ++ happen. The return is still here for compilers to not warn. */ ++ return a; ++ } ++} ++ + #define FFCPS + #define FFCPS_DIFF1 + #define FFCPS_CHAR1A2A +@@ -848,9 +880,11 @@ + + #define FFCPS_0 + #include "pcre2_jit_neon_inc.h" +-#define FF_UTF +-#include "pcre2_jit_neon_inc.h" +-#undef FF_UTF ++#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 ++# define FF_UTF ++# include "pcre2_jit_neon_inc.h" ++# undef FF_UTF ++#endif + #undef FFCPS_0 + + #undef FFCPS_CHAR1A2A +@@ -857,9 +891,11 @@ + + #define FFCPS_1 + #include "pcre2_jit_neon_inc.h" +-#define FF_UTF +-#include "pcre2_jit_neon_inc.h" +-#undef FF_UTF ++#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 ++# define FF_UTF ++# include "pcre2_jit_neon_inc.h" ++# undef FF_UTF ++#endif + #undef FFCPS_1 + + #undef FFCPS_DIFF1 +@@ -866,9 +902,11 @@ + + #define FFCPS_DEFAULT + #include "pcre2_jit_neon_inc.h" +-#define FF_UTF +-#include "pcre2_jit_neon_inc.h" +-#undef FF_UTF ++#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 ++# define FF_UTF ++# include "pcre2_jit_neon_inc.h" ++# undef FF_UTF ++#endif + #undef FFCPS + + #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 diff --git a/pcre2.spec b/pcre2.spec index ce809dc..c765589 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -6,7 +6,7 @@ %bcond_with pcre2_enables_sealloc # This is stable release: -%global rcversion RC1 +%global rcversion RC2 Name: pcre2 Version: 10.34 Release: %{?rcversion:0.}1%{?rcversion:.%rcversion}%{?dist} @@ -51,6 +51,10 @@ Source1: https://ftp.pcre.org/pub/pcre/%{?rcversion:Testing/}%{name}-%{myvers Source2: https://ftp.pcre.org/pub/pcre/Public-Key # Do no set RPATH if libdir is not /usr/lib Patch0: pcre2-10.10-Fix-multilib.patch +# Fix an infinite loop in 64-bit ARM JIT with NEON instructions, +# proposed to upstream +# +Patch1: pcre2-10.34-RC2-fix_a_loop_in_neon_arm64_jit.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -129,6 +133,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %{gpgverify} --keyring='%{SOURCE2}' --signature='%{SOURCE1}' --data='%{SOURCE0}' %setup -q -n %{name}-%{myversion} %patch0 -p1 +%patch1 -p0 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -226,6 +231,10 @@ make %{?_smp_mflags} check VERBOSE=yes %{_mandir}/man1/pcre2test.* %changelog +* Thu Nov 07 2019 Petr Pisar - 10.34-0.1.RC2 +- 10.34-RC2 bump +- Fix an infinite loop in 64-bit ARM JIT with NEON instructions + * Wed Oct 30 2019 Petr Pisar - 10.34-0.1.RC1 - 10.34-RC1 bump diff --git a/sources b/sources index fb3c324..660e112 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ -SHA512 (pcre2-10.34-RC1.tar.bz2) = 9421ff823e13cdc9598819c3d4777a648f8d3ce3d13eaa9c5e64df676adf9c77fbaf439784138d1e265edaabe71ab863d1fc84e1fac7a9cfa3a4f7b867b6a47a -SHA512 (pcre2-10.34-RC1.tar.bz2.sig) = 04f6c1b20b378ba831f73a6a19439fef3b763144a3fa4864bfdd29b6380a6ab676006044fe19195feba59a4047420e2ce3dce354211f441a4d0abd2c50ee100a +SHA512 (pcre2-10.34-RC2.tar.bz2) = 82567bd159a6c0473166904648aed0e4718bedc4ad0f75fbc9c2b3b6350b50359dcc7c74d5ecb93f4acca043fa38c89c2296f4ab2672cddf913153532d5f382a +SHA512 (pcre2-10.34-RC2.tar.bz2.sig) = b9f55286a240b2118a0040e7edd58d44950fd1adab8acd077d481c8cfb042b430482165fac27a960ac10a051e3af3cb46dd5261a95a39bd888a0135ddcd9560d