diff --git a/pcre2-10.34-Fix-bug-in-UTF-16-checker-returning-wrong-offset-for.patch b/pcre2-10.34-Fix-bug-in-UTF-16-checker-returning-wrong-offset-for.patch new file mode 100644 index 0000000..5c0dbd8 --- /dev/null +++ b/pcre2-10.34-Fix-bug-in-UTF-16-checker-returning-wrong-offset-for.patch @@ -0,0 +1,122 @@ +From 9af350af12899021537ce50c25ba98bdd7c1e5ee Mon Sep 17 00:00:00 2001 +From: ph10 +Date: Mon, 24 Feb 2020 15:39:56 +0000 +Subject: [PATCH] Fix bug in UTF-16 checker returning wrong offset for missing + low surrogate. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1226 6239d852-aaf2-0410-a92c-79f79f948069 +Petr Písař: Ported to 10.34. +--- + src/pcre2_valid_utf.c | 4 ++-- + testdata/testinput12 | 6 ++++++ + testdata/testoutput12-16 | 11 ++++++++++- + testdata/testoutput12-32 | 9 +++++++++ + testdata/testoutput14-16 | 2 +- + +diff --git a/src/pcre2_valid_utf.c b/src/pcre2_valid_utf.c +index 96e8bff..e47ea78 100644 +--- a/src/pcre2_valid_utf.c ++++ b/src/pcre2_valid_utf.c +@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge +- New API code Copyright (c) 2016-2017 University of Cambridge ++ New API code Copyright (c) 2016-2020 University of Cambridge + + ----------------------------------------------------------------------------- + Redistribution and use in source and binary forms, with or without +@@ -347,7 +347,7 @@ for (p = string; length > 0; p++) + length--; + if ((*p & 0xfc00) != 0xdc00) + { +- *erroroffset = p - string; ++ *erroroffset = p - string - 1; + return PCRE2_ERROR_UTF16_ERR2; + } + } +diff --git a/testdata/testinput12 b/testdata/testinput12 +index 32e97b5..beaf643 100644 +--- a/testdata/testinput12 ++++ b/testdata/testinput12 +@@ -444,6 +444,12 @@ + \= Expect no match + A\x{d800}B + A\x{110000}B ++ ++/aa/utf,ucp,match_invalid_utf,global ++ aa\x{d800}aa ++ ++/aa/utf,ucp,match_invalid_utf,global ++ \x{d800}aa + + # ---------------------------------------------------- + +diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 +index b944311..6e545c3 100644 +--- a/testdata/testoutput12-16 ++++ b/testdata/testoutput12-16 +@@ -533,7 +533,7 @@ Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{110000} + ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 + XX\x{d800}\x{1234} +-Failed: error -25: UTF-16 error: invalid low surrogate at offset 3 ++Failed: error -25: UTF-16 error: invalid low surrogate at offset 2 + \= Expect no match + XX\x{d800}\=offset=3 + No match +@@ -1576,6 +1576,15 @@ No match + No match + A\x{110000}B + ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 ++ ++/aa/utf,ucp,match_invalid_utf,global ++ aa\x{d800}aa ++ 0: aa ++ 0: aa ++ ++/aa/utf,ucp,match_invalid_utf,global ++ \x{d800}aa ++ 0: aa + + # ---------------------------------------------------- + +diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 +index 74ccac8..1a0783a 100644 +--- a/testdata/testoutput12-32 ++++ b/testdata/testoutput12-32 +@@ -1574,6 +1574,15 @@ No match + No match + A\x{110000}B + No match ++ ++/aa/utf,ucp,match_invalid_utf,global ++ aa\x{d800}aa ++ 0: aa ++ 0: aa ++ ++/aa/utf,ucp,match_invalid_utf,global ++ \x{d800}aa ++ 0: aa + + # ---------------------------------------------------- + +diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16 +index 2d58f1c..61541f6 100644 +--- a/testdata/testoutput14-16 ++++ b/testdata/testoutput14-16 +@@ -33,7 +33,7 @@ Failed: error -26: UTF-16 error: isolated low surrogate at offset 2 + XX\x{110000} + ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 + XX\x{d800}\x{1234} +-Failed: error -25: UTF-16 error: invalid low surrogate at offset 3 ++Failed: error -25: UTF-16 error: invalid low surrogate at offset 2 + + /badutf/utf + X\xdf +-- +2.21.1 + diff --git a/pcre2.spec b/pcre2.spec index 097e227..e0586c9 100644 --- a/pcre2.spec +++ b/pcre2.spec @@ -9,7 +9,7 @@ #%%global rcversion RC1 Name: pcre2 Version: 10.34 -Release: %{?rcversion:0.}7%{?rcversion:.%rcversion}%{?dist} +Release: %{?rcversion:0.}8%{?rcversion:.%rcversion}%{?dist} %global myversion %{version}%{?rcversion:-%rcversion} Summary: Perl-compatible regular expression library # the library: BSD with exceptions @@ -77,6 +77,10 @@ Patch8: pcre2-10.34-Fix-control-verb-chain-restoration-issue-in-JIT.patch # Fix a crash in JIT when an invalid UTF-8 character is encountered in # match_invalid_utf mode, upstream bug #2529, in upstream after 10.34 Patch9: pcre2-10.34-Fix-a-crash-which-occurs-when-the-character-type-of-.patch +# Fix computing an offest for the start of the UTF-16 error when a high surrogate +# is not followed by a valid low surrogate, upstream bug #2527, +# in upstream after 10.34 +Patch10: pcre2-10.34-Fix-bug-in-UTF-16-checker-returning-wrong-offset-for.patch BuildRequires: autoconf BuildRequires: automake BuildRequires: coreutils @@ -164,6 +168,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test. %patch7 -p1 %patch8 -p1 %patch9 -p1 +%patch10 -p1 # Because of multilib patch libtoolize --copy --force autoreconf -vif @@ -261,6 +266,10 @@ make %{?_smp_mflags} check VERBOSE=yes %{_mandir}/man1/pcre2test.* %changelog +* Mon Mar 16 2020 Petr Pisar - 10.34-8 +- Fix computing an offest for the start of the UTF-16 error when a high + surrogate is not followed by a valid low surrogate (upstream bug #2527) + * Thu Feb 20 2020 Petr Pisar - 10.34-7 - Fix a crash in JIT when an invalid UTF-8 character is encountered in match_invalid_utf mode (upstream bug #2529)