Fix computing an offest for the start of the UTF-16 error when a high surrogate is not followed by a valid low surrogate
This commit is contained in:
parent
00b42ec03c
commit
0ea51b6558
@ -0,0 +1,122 @@
|
|||||||
|
From 9af350af12899021537ce50c25ba98bdd7c1e5ee Mon Sep 17 00:00:00 2001
|
||||||
|
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
|
||||||
|
Date: Mon, 24 Feb 2020 15:39:56 +0000
|
||||||
|
Subject: [PATCH] Fix bug in UTF-16 checker returning wrong offset for missing
|
||||||
|
low surrogate.
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1226 6239d852-aaf2-0410-a92c-79f79f948069
|
||||||
|
Petr Písař: Ported to 10.34.
|
||||||
|
---
|
||||||
|
src/pcre2_valid_utf.c | 4 ++--
|
||||||
|
testdata/testinput12 | 6 ++++++
|
||||||
|
testdata/testoutput12-16 | 11 ++++++++++-
|
||||||
|
testdata/testoutput12-32 | 9 +++++++++
|
||||||
|
testdata/testoutput14-16 | 2 +-
|
||||||
|
|
||||||
|
diff --git a/src/pcre2_valid_utf.c b/src/pcre2_valid_utf.c
|
||||||
|
index 96e8bff..e47ea78 100644
|
||||||
|
--- a/src/pcre2_valid_utf.c
|
||||||
|
+++ b/src/pcre2_valid_utf.c
|
||||||
|
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
|
Written by Philip Hazel
|
||||||
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
|
- New API code Copyright (c) 2016-2017 University of Cambridge
|
||||||
|
+ New API code Copyright (c) 2016-2020 University of Cambridge
|
||||||
|
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@@ -347,7 +347,7 @@ for (p = string; length > 0; p++)
|
||||||
|
length--;
|
||||||
|
if ((*p & 0xfc00) != 0xdc00)
|
||||||
|
{
|
||||||
|
- *erroroffset = p - string;
|
||||||
|
+ *erroroffset = p - string - 1;
|
||||||
|
return PCRE2_ERROR_UTF16_ERR2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
diff --git a/testdata/testinput12 b/testdata/testinput12
|
||||||
|
index 32e97b5..beaf643 100644
|
||||||
|
--- a/testdata/testinput12
|
||||||
|
+++ b/testdata/testinput12
|
||||||
|
@@ -444,6 +444,12 @@
|
||||||
|
\= Expect no match
|
||||||
|
A\x{d800}B
|
||||||
|
A\x{110000}B
|
||||||
|
+
|
||||||
|
+/aa/utf,ucp,match_invalid_utf,global
|
||||||
|
+ aa\x{d800}aa
|
||||||
|
+
|
||||||
|
+/aa/utf,ucp,match_invalid_utf,global
|
||||||
|
+ \x{d800}aa
|
||||||
|
|
||||||
|
# ----------------------------------------------------
|
||||||
|
|
||||||
|
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
|
||||||
|
index b944311..6e545c3 100644
|
||||||
|
--- a/testdata/testoutput12-16
|
||||||
|
+++ b/testdata/testoutput12-16
|
||||||
|
@@ -533,7 +533,7 @@ Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
|
||||||
|
XX\x{110000}
|
||||||
|
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
|
||||||
|
XX\x{d800}\x{1234}
|
||||||
|
-Failed: error -25: UTF-16 error: invalid low surrogate at offset 3
|
||||||
|
+Failed: error -25: UTF-16 error: invalid low surrogate at offset 2
|
||||||
|
\= Expect no match
|
||||||
|
XX\x{d800}\=offset=3
|
||||||
|
No match
|
||||||
|
@@ -1576,6 +1576,15 @@ No match
|
||||||
|
No match
|
||||||
|
A\x{110000}B
|
||||||
|
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
|
||||||
|
+
|
||||||
|
+/aa/utf,ucp,match_invalid_utf,global
|
||||||
|
+ aa\x{d800}aa
|
||||||
|
+ 0: aa
|
||||||
|
+ 0: aa
|
||||||
|
+
|
||||||
|
+/aa/utf,ucp,match_invalid_utf,global
|
||||||
|
+ \x{d800}aa
|
||||||
|
+ 0: aa
|
||||||
|
|
||||||
|
# ----------------------------------------------------
|
||||||
|
|
||||||
|
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
|
||||||
|
index 74ccac8..1a0783a 100644
|
||||||
|
--- a/testdata/testoutput12-32
|
||||||
|
+++ b/testdata/testoutput12-32
|
||||||
|
@@ -1574,6 +1574,15 @@ No match
|
||||||
|
No match
|
||||||
|
A\x{110000}B
|
||||||
|
No match
|
||||||
|
+
|
||||||
|
+/aa/utf,ucp,match_invalid_utf,global
|
||||||
|
+ aa\x{d800}aa
|
||||||
|
+ 0: aa
|
||||||
|
+ 0: aa
|
||||||
|
+
|
||||||
|
+/aa/utf,ucp,match_invalid_utf,global
|
||||||
|
+ \x{d800}aa
|
||||||
|
+ 0: aa
|
||||||
|
|
||||||
|
# ----------------------------------------------------
|
||||||
|
|
||||||
|
diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16
|
||||||
|
index 2d58f1c..61541f6 100644
|
||||||
|
--- a/testdata/testoutput14-16
|
||||||
|
+++ b/testdata/testoutput14-16
|
||||||
|
@@ -33,7 +33,7 @@ Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
|
||||||
|
XX\x{110000}
|
||||||
|
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
|
||||||
|
XX\x{d800}\x{1234}
|
||||||
|
-Failed: error -25: UTF-16 error: invalid low surrogate at offset 3
|
||||||
|
+Failed: error -25: UTF-16 error: invalid low surrogate at offset 2
|
||||||
|
|
||||||
|
/badutf/utf
|
||||||
|
X\xdf
|
||||||
|
--
|
||||||
|
2.21.1
|
||||||
|
|
11
pcre2.spec
11
pcre2.spec
@ -9,7 +9,7 @@
|
|||||||
#%%global rcversion RC1
|
#%%global rcversion RC1
|
||||||
Name: pcre2
|
Name: pcre2
|
||||||
Version: 10.34
|
Version: 10.34
|
||||||
Release: %{?rcversion:0.}7%{?rcversion:.%rcversion}%{?dist}
|
Release: %{?rcversion:0.}8%{?rcversion:.%rcversion}%{?dist}
|
||||||
%global myversion %{version}%{?rcversion:-%rcversion}
|
%global myversion %{version}%{?rcversion:-%rcversion}
|
||||||
Summary: Perl-compatible regular expression library
|
Summary: Perl-compatible regular expression library
|
||||||
# the library: BSD with exceptions
|
# the library: BSD with exceptions
|
||||||
@ -77,6 +77,10 @@ Patch8: pcre2-10.34-Fix-control-verb-chain-restoration-issue-in-JIT.patch
|
|||||||
# Fix a crash in JIT when an invalid UTF-8 character is encountered in
|
# Fix a crash in JIT when an invalid UTF-8 character is encountered in
|
||||||
# match_invalid_utf mode, upstream bug #2529, in upstream after 10.34
|
# match_invalid_utf mode, upstream bug #2529, in upstream after 10.34
|
||||||
Patch9: pcre2-10.34-Fix-a-crash-which-occurs-when-the-character-type-of-.patch
|
Patch9: pcre2-10.34-Fix-a-crash-which-occurs-when-the-character-type-of-.patch
|
||||||
|
# Fix computing an offest for the start of the UTF-16 error when a high surrogate
|
||||||
|
# is not followed by a valid low surrogate, upstream bug #2527,
|
||||||
|
# in upstream after 10.34
|
||||||
|
Patch10: pcre2-10.34-Fix-bug-in-UTF-16-checker-returning-wrong-offset-for.patch
|
||||||
BuildRequires: autoconf
|
BuildRequires: autoconf
|
||||||
BuildRequires: automake
|
BuildRequires: automake
|
||||||
BuildRequires: coreutils
|
BuildRequires: coreutils
|
||||||
@ -164,6 +168,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
|
|||||||
%patch7 -p1
|
%patch7 -p1
|
||||||
%patch8 -p1
|
%patch8 -p1
|
||||||
%patch9 -p1
|
%patch9 -p1
|
||||||
|
%patch10 -p1
|
||||||
# Because of multilib patch
|
# Because of multilib patch
|
||||||
libtoolize --copy --force
|
libtoolize --copy --force
|
||||||
autoreconf -vif
|
autoreconf -vif
|
||||||
@ -261,6 +266,10 @@ make %{?_smp_mflags} check VERBOSE=yes
|
|||||||
%{_mandir}/man1/pcre2test.*
|
%{_mandir}/man1/pcre2test.*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Mon Mar 16 2020 Petr Pisar <ppisar@redhat.com> - 10.34-8
|
||||||
|
- Fix computing an offest for the start of the UTF-16 error when a high
|
||||||
|
surrogate is not followed by a valid low surrogate (upstream bug #2527)
|
||||||
|
|
||||||
* Thu Feb 20 2020 Petr Pisar <ppisar@redhat.com> - 10.34-7
|
* Thu Feb 20 2020 Petr Pisar <ppisar@redhat.com> - 10.34-7
|
||||||
- Fix a crash in JIT when an invalid UTF-8 character is encountered in
|
- Fix a crash in JIT when an invalid UTF-8 character is encountered in
|
||||||
match_invalid_utf mode (upstream bug #2529)
|
match_invalid_utf mode (upstream bug #2529)
|
||||||
|
Loading…
Reference in New Issue
Block a user