Fix a crash when finding a Unicode property for a character with a code point greater than 0x10ffff

2017-02-27 15:46:55 +01:00 · 2017-02-27 15:46:55 +01:00 · 3e683888a4
commit 3e683888a4
parent c9c5588a71
2 changed files with 132 additions and 1 deletions
--- a/pcre2-10.23-Fix-32-bit-non-UTF-property-test-crash.patch
+++ b/pcre2-10.23-Fix-32-bit-non-UTF-property-test-crash.patch
@ -0,0 +1,123 @@
+From be14c2f3f01dc2b6b574b3a86536ee8194945f7f Mon Sep 17 00:00:00 2001
+From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
+Date: Fri, 24 Feb 2017 18:25:32 +0000
+Subject: [PATCH] Fix 32-bit non-UTF property test crash.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@670 6239d852-aaf2-0410-a92c-79f79f948069
+
+Petr Písař: Ported to 10.23.
+
+diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
+index 6a8774c..720bbc9 100644
+--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
+@@ -1774,10 +1774,17 @@ typedef struct {
+ /* UCD access macros */
+ 
+ #define UCD_BLOCK_SIZE 128
+-#define GET_UCD(ch) (PRIV(ucd_records) + \
+#define REAL_GET_UCD(ch) (PRIV(ucd_records) + \
+         PRIV(ucd_stage2)[PRIV(ucd_stage1)[(int)(ch) / UCD_BLOCK_SIZE] * \
+         UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
+ 
+#if PCRE2_CODE_UNIT_WIDTH == 32
+#define GET_UCD(ch) ((ch > MAX_UTF_CODE_POINT)? \
+  PRIV(dummy_ucd_record) : REAL_GET_UCD(ch))
+#else
+#define GET_UCD(ch) REAL_GET_UCD(ch)
+#endif
+
+ #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype
+ #define UCD_SCRIPT(ch)      GET_UCD(ch)->script
+ #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
+@@ -1834,6 +1841,9 @@ extern const uint8_t          PRIV(utf8_table4)[];
+ #define _pcre2_default_compile_context PCRE2_SUFFIX(_pcre2_default_compile_context_)
+ #define _pcre2_default_match_context   PCRE2_SUFFIX(_pcre2_default_match_context_)
+ #define _pcre2_default_tables          PCRE2_SUFFIX(_pcre2_default_tables_)
+#if PCRE2_CODE_UNIT_WIDTH == 32
+#define _pcre2_dummy_ucd_record        PCRE2_SUFFIX(_pcre2_dummy_ucd_record_)
+#endif
+ #define _pcre2_hspace_list             PCRE2_SUFFIX(_pcre2_hspace_list_)
+ #define _pcre2_vspace_list             PCRE2_SUFFIX(_pcre2_vspace_list_)
+ #define _pcre2_ucd_caseless_sets       PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
+@@ -1858,6 +1868,9 @@ extern const uint32_t                  PRIV(hspace_list)[];
+ extern const uint32_t                  PRIV(vspace_list)[];
+ extern const uint32_t                  PRIV(ucd_caseless_sets)[];
+ extern const ucd_record                PRIV(ucd_records)[];
+#if PCRE2_CODE_UNIT_WIDTH == 32
+extern const ucd_record                PRIV(dummy_ucd_record)[];
+#endif
+ extern const uint8_t                   PRIV(ucd_stage1)[];
+ extern const uint16_t                  PRIV(ucd_stage2)[];
+ extern const uint32_t                  PRIV(ucp_gbtable)[];
+diff --git a/src/pcre2_ucd.c b/src/pcre2_ucd.c
+index 116f537..56aa29d 100644
+--- a/src/pcre2_ucd.c
+++ b/src/pcre2_ucd.c
+@@ -41,6 +41,20 @@ const uint32_t PRIV(ucd_caseless_sets)[] = {0};
+ 
+ const char *PRIV(unicode_version) = "8.0.0";
+ 
+/* If the 32-bit library is run in non-32-bit mode, character values
+greater than 0x10ffff may be encountered. For these we set up a
+special record. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+const ucd_record PRIV(dummy_ucd_record)[] = {{
+  ucp_Common,    /* script */
+  ucp_Cn,        /* type unassigned */
+  ucp_gbOther,   /* grapheme break property */
+  0,             /* case set */
+  0,             /* other case */
+  }};
+#endif
+
+ /* When recompiling tables with a new Unicode version, please check the
+ types in this structure definition from pcre2_internal.h (the actual
+ field names will be different):
+diff --git a/testdata/testinput12 b/testdata/testinput12
+index c3b2bfc..decfe82 100644
+--- a/testdata/testinput12
+++ b/testdata/testinput12
+@@ -360,4 +360,7 @@
+ 
+ /[\s[:^ascii:]]/B,ucp
+ 
+/\pP/ucp
+    \x{7fffffff}\=no_jit
+
+ # End of testinput12
+diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
+index 3b5a0cd..41e0a48 100644
+--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
+@@ -1415,4 +1415,10 @@ No match
+         End
+ ------------------------------------------------------------------
+ 
+/\pP/ucp
+    \x{7fffffff}\=no_jit
+** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+No match
+
+ # End of testinput12
+diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
+index 1496159..e9130b9 100644
+--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
+@@ -1409,4 +1409,8 @@ No match
+         End
+ ------------------------------------------------------------------
+ 
+/\pP/ucp
+    \x{7fffffff}\=no_jit
+No match
+
+ # End of testinput12
+-- 
+2.7.4
+
--- a/pcre2.spec
+++ b/pcre2.spec
@ -39,6 +39,10 @@ Patch0:     pcre2-10.10-Fix-multilib.patch
 # Handle memmory allocation failures in pcre2test tool, in upstream after 10.23
 Patch1:     pcre2-10.23-Check-malloc-returns-in-pcre2test.patch
 # Fix a compiler warning, proposed to upstream but not portable before ISO C99
+# Fix a crash when finding a Unicode property for a character with a code
+# point greater than 0x10ffff in UTF-32 library while UTF mode is disabled,
+# upstream bug #2052, in upstream after 10.23
+Patch2:     pcre2-10.23-Fix-32-bit-non-UTF-property-test-crash.patch
 BuildRequires:  autoconf
 BuildRequires:  automake
 BuildRequires:  coreutils
@ -116,6 +120,7 @@ Utilities demonstrating PCRE2 capabilities like pcre2grep or pcre2test.
 %setup -q -n %{name}-%{myversion}
 %patch0 -p1
 %patch1 -p1
+%patch2 -p1
 # Because of multilib patch
 libtoolize --copy --force
 autoreconf -vif
@ -214,8 +219,11 @@ make %{?_smp_mflags} check VERBOSE=yes
 %{_mandir}/man1/pcre2test.*

 %changelog
-* Fri Feb 24 2017 Petr Pisar <ppisar@redhat.com> - 10.23-2
+* Mon Feb 27 2017 Petr Pisar <ppisar@redhat.com> - 10.23-2
 - Handle memmory allocation failures in pcre2test tool
+- Fix a crash when finding a Unicode property for a character with a code
+  point greater than 0x10ffff in UTF-32 library while UTF mode is disabled
+  (upstream bug #2052)

 * Tue Feb 14 2017 Petr Pisar <ppisar@redhat.com> - 10.23-1
 - 10.23 bump