From 84afda9c83c4a52cfd0990313ead2f05c5f61034 Mon Sep 17 00:00:00 2001
From: Jitka Plesnikova <jplesnik@redhat.com>
Date: Mon, 27 Nov 2023 14:04:43 +0100
Subject: [PATCH] Resolves: RHEL-16372

---
 .perl.metadata                   |   1 +
 perl-5.32.1-CVE-2023-47038.patch | 120 +++++++++++++++++++++++++++++++
 perl.spec                        |  13 +++-
 3 files changed, 132 insertions(+), 2 deletions(-)
 create mode 100644 .perl.metadata
 create mode 100644 perl-5.32.1-CVE-2023-47038.patch

diff --git a/.perl.metadata b/.perl.metadata
new file mode 100644
index 0000000..bc66edd
--- /dev/null
+++ b/.perl.metadata
@@ -0,0 +1 @@
+1fb4f710d139da1e1a3e1fa4eaba201fcaa8e18e perl-5.32.1.tar.xz
diff --git a/perl-5.32.1-CVE-2023-47038.patch b/perl-5.32.1-CVE-2023-47038.patch
new file mode 100644
index 0000000..080c023
--- /dev/null
+++ b/perl-5.32.1-CVE-2023-47038.patch
@@ -0,0 +1,120 @@
+From 12c313ce49b36160a7ca2e9b07ad5bd92ee4a010 Mon Sep 17 00:00:00 2001
+From: Karl Williamson <khw@cpan.org>
+Date: Sat, 9 Sep 2023 11:59:09 -0600
+Subject: [PATCH 1/2] Fix read/write past buffer end: perl-security#140
+
+A package name may be specified in a \p{...} regular expression
+construct.  If unspecified, "utf8::" is assumed, which is the package
+all official Unicode properties are in.  By specifying a different
+package, one can create a user-defined property with the same
+unqualified name as a Unicode one.  Such a property is defined by a sub
+whose name begins with "Is" or "In", and if the sub wishes to refer to
+an official Unicode property, it must explicitly specify the "utf8::".
+S_parse_uniprop_string() is used to parse the interior of both \p{} and
+the user-defined sub lines.
+
+In S_parse_uniprop_string(), it parses the input "name" parameter,
+creating a modified copy, "lookup_name", malloc'ed with the same size as
+"name".  The modifications are essentially to create a canonicalized
+version of the input, with such things as extraneous white-space
+stripped off.  I found it convenient to strip off the package specifier
+"utf8::".  To to so, the code simply pretends "lookup_name" begins just
+after the "utf8::", and adjusts various other values to compensate.
+However, it missed the adjustment of one required one.
+
+This is only a problem when the property name begins with "perl" and
+isn't "perlspace" nor "perlword".  All such ones are undocumented
+internal properties.
+
+What happens in this case is that the input is reparsed with slightly
+different rules in effect as to what is legal versus illegal.  The
+problem is that "lookup_name" no longer is pointing to its initial
+value, but "name" is.  Thus the space allocated for filling "lookup_name"
+is now shorter than "name", and as this shortened "lookup_name" is
+filled by copying suitable portions of "name", the write can be to
+unallocated space.
+
+The solution is to skip the "utf8::" when reparsing "name".  Then both
+"lookup_name" and "name" are effectively shortened by the same amount,
+and there is no going off the end.
+
+This commit also does white-space adjustment so that things align
+vertically for readability.
+
+This can be easily backported to earlier Perl releases.
+---
+ regcomp.c           | 17 +++++++++++------
+ t/re/pat_advanced.t |  8 ++++++++
+ 2 files changed, 19 insertions(+), 6 deletions(-)
+
+diff --git a/regcomp.c b/regcomp.c
+index 9c6ccc2c1b..833f8644f7 100644
+--- a/regcomp.c
++++ b/regcomp.c
+@@ -23697,7 +23697,7 @@ S_parse_uniprop_string(pTHX_
+      * compile perl to know about them) */
+     bool is_nv_type = FALSE;
+ 
+-    unsigned int i, j = 0;
++    unsigned int i = 0, i_zero = 0, j = 0;
+     int equals_pos = -1;    /* Where the '=' is found, or negative if none */
+     int slash_pos  = -1;    /* Where the '/' is found, or negative if none */
+     int table_index = 0;    /* The entry number for this property in the table
+@@ -23831,9 +23831,13 @@ S_parse_uniprop_string(pTHX_
+      * all of them are considered to be for that package.  For the purposes of
+      * parsing the rest of the property, strip it off */
+     if (non_pkg_begin == STRLENs("utf8::") && memBEGINPs(name, name_len, "utf8::")) {
+-        lookup_name +=  STRLENs("utf8::");
+-        j -=  STRLENs("utf8::");
+-        equals_pos -=  STRLENs("utf8::");
++        lookup_name += STRLENs("utf8::");
++        j           -= STRLENs("utf8::");
++        equals_pos  -= STRLENs("utf8::");
++        i_zero       = STRLENs("utf8::");   /* When resetting 'i' to reparse
++                                               from the beginning, it has to be
++                                               set past what we're stripping
++                                               off */
+         stripped_utf8_pkg = TRUE;
+     }
+ 
+@@ -24238,7 +24242,8 @@ S_parse_uniprop_string(pTHX_
+ 
+             /* We set the inputs back to 0 and the code below will reparse,
+              * using strict */
+-            i = j = 0;
++            i = i_zero;
++            j = 0;
+         }
+     }
+ 
+@@ -24259,7 +24264,7 @@ S_parse_uniprop_string(pTHX_
+          * separates two digits */
+         if (cur == '_') {
+             if (    stricter
+-                && (     i == 0 || (int) i == equals_pos || i == name_len- 1
++                && (   i == i_zero || (int) i == equals_pos || i == name_len- 1
+                     || ! isDIGIT_A(name[i-1]) || ! isDIGIT_A(name[i+1])))
+             {
+                 lookup_name[j++] = '_';
+diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
+index 6152c7b85c..1db317fff9 100644
+--- a/t/re/pat_advanced.t
++++ b/t/re/pat_advanced.t
+@@ -2576,6 +2576,14 @@ EOF
+                        {}, "GH #17278");
+     }
+ 
++    {   # perl-security#140, read/write past buffer end
++        fresh_perl_like('qr/\p{utf8::perl x}/',
++                        qr/Illegal user-defined property name "utf8::perl x" in regex/,
++                        {}, "perl-security#140");
++        fresh_perl_is('qr/\p{utf8::_perl_surrogate}/', "",
++                        {}, "perl-security#140");
++    }
++
+ 
+     # !!! NOTE that tests that aren't at all likely to crash perl should go
+     # a ways above, above these last ones.  There's a comment there that, like
+-- 
+2.34.1
+
diff --git a/perl.spec b/perl.spec
index 39066b5..a201595 100644
--- a/perl.spec
+++ b/perl.spec
@@ -100,7 +100,7 @@ License:        GPL+ or Artistic
 Epoch:          %{perl_epoch}
 Version:        %{perl_version}
 # release number must be even higher, because dual-lived modules will be broken otherwise
-Release:        480%{?dist}
+Release:        481%{?dist}
 Summary:        Practical Extraction and Report Language
 Url:            https://www.perl.org/
 Source0:        https://www.cpan.org/src/5.0/perl-%{perl_version}.tar.xz
@@ -280,6 +280,10 @@ Patch57:        perl-5.32.1-Perl_do_sv_dump-handle-PL_strtab.patch
 # in upstream after 5.33.8
 Patch58:        perl-5.33.8-Fix-broken-left-shift-of-IV_MIN-under-use-integer.patch
 
+# Fix write past buffer end via illegal user-defined Unicode property
+# CVE-2023-47038
+Patch59:        perl-5.32.1-CVE-2023-47038.patch
+
 # Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048
 Patch200:       perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch
 
@@ -4343,6 +4347,7 @@ you're not running VMS, this module does nothing.
 %patch56 -p1
 %patch57 -p1
 %patch58 -p1
+%patch59 -p1
 %patch200 -p1
 %patch201 -p1
 %patch202 -p1
@@ -4400,7 +4405,8 @@ perl -x patchlevel.h \
     'Fedora Patch55: Prevent the number of buckets in a hash from getting too large' \
     'Fedora Patch56: Fix a memory leak when compiling a regular expression (GH#18604)' \
     'Fedora Patch57: Fix dumping a hash entry of PL_strtab type' \
-    'Fedora Patch57: Fix an arithmetic left shift of a minimal integer value (GH#18639)' \
+    'Fedora Patch58: Fix an arithmetic left shift of a minimal integer value (GH#18639)' \
+    'RHEL Patch59: Fix write past buffer end via illegal user-defined Unicode property (CVE-2023-47038)' \
     'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \
     'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \
     'Fedora Patch202: Add definition of OPTIMIZE to .ph files (bug #2159759)' \
@@ -7176,6 +7182,9 @@ popd
 
 # Old changelog entries are preserved in CVS.
 %changelog
+* Mon Nov 27 2023 Jitka Plesnikova <jplesnik@redhat.com> - 4:5.32.1-481
+- Fixes: CVE-2023-47038
+
 * Wed Jan 18 2023 Jitka Plesnikova <jplesnik@redhat.com> - 4:5.32.1-480
 - Add definition of OPTIMIZE to .ph files, if optimizing is used
   (bug#2159759)