glibc/SOURCES/glibc-rh1622678-2.patch

commit 0b79004569e5ce1669136b8c41564c3809730f15
Author: Florian Weimer <fweimer@redhat.com>
Date:   Tue Aug 28 12:57:46 2018 +0200

    regex: Add test tst-regcomp-truncated [BZ #23578]
    
    (cherry picked from commit 761404b74d9853ce1608195e24f25b78a910591a)

diff --git a/posix/Makefile b/posix/Makefile
index 00c62841a282f15a..83162123f9c927a0 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -96,7 +96,7 @@ tests		:= test-errno tstgetopt testfnm runtests runptests \
 		   tst-posix_fadvise tst-posix_fadvise64 \
 		   tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \
 		   tst-glob-tilde test-ssize-max tst-spawn4 bug-regex37 \
-		   bug-regex38
+		   bug-regex38 tst-regcomp-truncated
 tests-internal	:= bug-regex5 bug-regex20 bug-regex33 \
 		   tst-rfc3484 tst-rfc3484-2 tst-rfc3484-3 \
 		   tst-glob_lstat_compat tst-spawn4-compat
@@ -194,6 +194,7 @@ $(objpfx)tst-regex2.out: $(gen-locales)
 $(objpfx)tst-regexloc.out: $(gen-locales)
 $(objpfx)tst-rxspencer.out: $(gen-locales)
 $(objpfx)tst-rxspencer-no-utf8.out: $(gen-locales)
+$(objpfx)tst-regcomp-truncated.out: $(gen-locales)
 endif
 
 # If we will use the generic uname implementation, we must figure out what
diff --git a/posix/tst-regcomp-truncated.c b/posix/tst-regcomp-truncated.c
new file mode 100644
index 0000000000000000..a4a1581bbc2b39eb
--- /dev/null
+++ b/posix/tst-regcomp-truncated.c
@@ -0,0 +1,191 @@
+/* Test compilation of truncated regular expressions.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This test constructs various patterns in an attempt to trigger
+   over-reading the regular expression compiler, such as bug
+   23578.  */
+
+#include <array_length.h>
+#include <errno.h>
+#include <locale.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/next_to_fault.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+#include <wchar.h>
+
+/* Locales to test.  */
+static const char locales[][17] =
+  {
+    "C",
+    "en_US.UTF-8",
+    "de_DE.ISO-8859-1",
+  };
+
+/* Syntax options.  Will be combined with other flags.  */
+static const reg_syntax_t syntaxes[] =
+  {
+    RE_SYNTAX_EMACS,
+    RE_SYNTAX_AWK,
+    RE_SYNTAX_GNU_AWK,
+    RE_SYNTAX_POSIX_AWK,
+    RE_SYNTAX_GREP,
+    RE_SYNTAX_EGREP,
+    RE_SYNTAX_POSIX_EGREP,
+    RE_SYNTAX_POSIX_BASIC,
+    RE_SYNTAX_POSIX_EXTENDED,
+    RE_SYNTAX_POSIX_MINIMAL_EXTENDED,
+  };
+
+/* Trailing characters placed after the initial character.  */
+static const char trailing_strings[][4] =
+  {
+    "",
+    "[",
+    "\\",
+    "[\\",
+    "(",
+    "(\\",
+    "\\(",
+  };
+
+static int
+do_test (void)
+{
+  /* Staging buffer for the constructed regular expression.  */
+  char buffer[16];
+
+  /* Allocation used to detect over-reading by the regular expression
+     compiler.  */
+  struct support_next_to_fault ntf
+    = support_next_to_fault_allocate (sizeof (buffer));
+
+  /* Arbitrary Unicode codepoint at which we stop generating
+     characters.  We do not probe the whole range because that would
+     take too long due to combinatorical exploision as the result of
+     combination with other flags.  */
+  static const wchar_t last_character = 0xfff;
+
+  for (size_t locale_idx = 0; locale_idx < array_length (locales);
+       ++ locale_idx)
+    {
+      if (setlocale (LC_ALL, locales[locale_idx]) == NULL)
+        {
+          support_record_failure ();
+          printf ("error: setlocale (\"%s\"): %m", locales[locale_idx]);
+          continue;
+        }
+      if (test_verbose > 0)
+        printf ("info: testing locale \"%s\"\n", locales[locale_idx]);
+
+      for (wchar_t wc = 0; wc <= last_character; ++wc)
+        {
+          char *after_wc;
+          if (wc == 0)
+            {
+              /* wcrtomb treats L'\0' in a special way.  */
+              *buffer = '\0';
+              after_wc = &buffer[1];
+            }
+          else
+            {
+              mbstate_t ps = { };
+              size_t ret = wcrtomb (buffer, wc, &ps);
+              if (ret == (size_t) -1)
+                {
+                  /* EILSEQ means that the target character set
+                     cannot encode the character.  */
+                  if (errno != EILSEQ)
+                    {
+                      support_record_failure ();
+                      printf ("error: wcrtomb (0x%x) failed: %m\n",
+                              (unsigned) wc);
+                    }
+                  continue;
+                }
+              TEST_VERIFY_EXIT (ret != 0);
+              after_wc = &buffer[ret];
+            }
+
+          for (size_t trailing_idx = 0;
+               trailing_idx < array_length (trailing_strings);
+               ++trailing_idx)
+            {
+              char *after_trailing
+                = stpcpy (after_wc, trailing_strings[trailing_idx]);
+
+              for (int do_nul = 0; do_nul < 2; ++do_nul)
+                {
+                  char *after_nul;
+                  if (do_nul)
+                    {
+                      *after_trailing = '\0';
+                      after_nul = &after_trailing[1];
+                    }
+                  else
+                    after_nul = after_trailing;
+
+                  size_t length = after_nul - buffer;
+
+                  /* Make sure that the faulting region starts
+                     after the used portion of the buffer.  */
+                  char *ntf_start = ntf.buffer + sizeof (buffer) - length;
+                  memcpy (ntf_start, buffer, length);
+
+                  for (const reg_syntax_t *psyntax = syntaxes;
+                       psyntax < array_end (syntaxes); ++psyntax)
+                    for (int do_icase = 0; do_icase < 2; ++do_icase)
+                      {
+                        re_syntax_options = *psyntax;
+                        if (do_icase)
+                          re_syntax_options |= RE_ICASE;
+
+                        regex_t reg;
+                        memset (&reg, 0, sizeof (reg));
+                        const char *msg = re_compile_pattern
+                          (ntf_start, length, &reg);
+                        if (msg != NULL)
+                          {
+                            if (test_verbose > 0)
+                              {
+                                char *quoted = support_quote_blob
+                                  (buffer, length);
+                                printf ("info: compilation failed for pattern"
+                                        " \"%s\", syntax 0x%lx: %s\n",
+                                        quoted, re_syntax_options, msg);
+                                free (quoted);
+                              }
+                          }
+                        else
+                          regfree (&reg);
+                      }
+                }
+            }
+        }
+    }
+
+  support_next_to_fault_free (&ntf);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
import glibc-2.28-158.el8 2021-04-23 04:11:25 +00:00			`commit 0b79004569e5ce1669136b8c41564c3809730f15`
			`Author: Florian Weimer <fweimer@redhat.com>`
			`Date: Tue Aug 28 12:57:46 2018 +0200`

			`regex: Add test tst-regcomp-truncated [BZ #23578]`

			`(cherry picked from commit 761404b74d9853ce1608195e24f25b78a910591a)`

			`diff --git a/posix/Makefile b/posix/Makefile`
			`index 00c62841a282f15a..83162123f9c927a0 100644`
			`--- a/posix/Makefile`
			`+++ b/posix/Makefile`
			`@@ -96,7 +96,7 @@ tests := test-errno tstgetopt testfnm runtests runptests \`
			`tst-posix_fadvise tst-posix_fadvise64 \`
			`tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \`
			`tst-glob-tilde test-ssize-max tst-spawn4 bug-regex37 \`
			`- bug-regex38`
			`+ bug-regex38 tst-regcomp-truncated`
			`tests-internal := bug-regex5 bug-regex20 bug-regex33 \`
			`tst-rfc3484 tst-rfc3484-2 tst-rfc3484-3 \`
			`tst-glob_lstat_compat tst-spawn4-compat`
			`@@ -194,6 +194,7 @@ $(objpfx)tst-regex2.out: $(gen-locales)`
			`$(objpfx)tst-regexloc.out: $(gen-locales)`
			`$(objpfx)tst-rxspencer.out: $(gen-locales)`
			`$(objpfx)tst-rxspencer-no-utf8.out: $(gen-locales)`
			`+$(objpfx)tst-regcomp-truncated.out: $(gen-locales)`
			`endif`

			`# If we will use the generic uname implementation, we must figure out what`
			`diff --git a/posix/tst-regcomp-truncated.c b/posix/tst-regcomp-truncated.c`
			`new file mode 100644`
			`index 0000000000000000..a4a1581bbc2b39eb`
			`--- /dev/null`
			`+++ b/posix/tst-regcomp-truncated.c`
			`@@ -0,0 +1,191 @@`
			`+/* Test compilation of truncated regular expressions.`
			`+ Copyright (C) 2018 Free Software Foundation, Inc.`
			`+ This file is part of the GNU C Library.`
			`+`
			`+ The GNU C Library is free software; you can redistribute it and/or`
			`+ modify it under the terms of the GNU Lesser General Public`
			`+ License as published by the Free Software Foundation; either`
			`+ version 2.1 of the License, or (at your option) any later version.`
			`+`
			`+ The GNU C Library is distributed in the hope that it will be useful,`
			`+ but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`+ Lesser General Public License for more details.`
			`+`
			`+ You should have received a copy of the GNU Lesser General Public`
			`+ License along with the GNU C Library; if not, see`
			`+ <http://www.gnu.org/licenses/>. */`
			`+`
			`+/* This test constructs various patterns in an attempt to trigger`
			`+ over-reading the regular expression compiler, such as bug`
			`+ 23578. */`
			`+`
			`+#include <array_length.h>`
			`+#include <errno.h>`
			`+#include <locale.h>`
			`+#include <regex.h>`
			`+#include <stdio.h>`
			`+#include <stdlib.h>`
			`+#include <string.h>`
			`+#include <support/check.h>`
			`+#include <support/next_to_fault.h>`
			`+#include <support/support.h>`
			`+#include <support/test-driver.h>`
			`+#include <wchar.h>`
			`+`
			`+/* Locales to test. */`
			`+static const char locales[][17] =`
			`+ {`
			`+ "C",`
			`+ "en_US.UTF-8",`
			`+ "de_DE.ISO-8859-1",`
			`+ };`
			`+`
			`+/* Syntax options. Will be combined with other flags. */`
			`+static const reg_syntax_t syntaxes[] =`
			`+ {`
			`+ RE_SYNTAX_EMACS,`
			`+ RE_SYNTAX_AWK,`
			`+ RE_SYNTAX_GNU_AWK,`
			`+ RE_SYNTAX_POSIX_AWK,`
			`+ RE_SYNTAX_GREP,`
			`+ RE_SYNTAX_EGREP,`
			`+ RE_SYNTAX_POSIX_EGREP,`
			`+ RE_SYNTAX_POSIX_BASIC,`
			`+ RE_SYNTAX_POSIX_EXTENDED,`
			`+ RE_SYNTAX_POSIX_MINIMAL_EXTENDED,`
			`+ };`
			`+`
			`+/* Trailing characters placed after the initial character. */`
			`+static const char trailing_strings[][4] =`
			`+ {`
			`+ "",`
			`+ "[",`
			`+ "\\",`
			`+ "[\\",`
			`+ "(",`
			`+ "(\\",`
			`+ "\\(",`
			`+ };`
			`+`
			`+static int`
			`+do_test (void)`
			`+{`
			`+ /* Staging buffer for the constructed regular expression. */`
			`+ char buffer[16];`
			`+`
			`+ /* Allocation used to detect over-reading by the regular expression`
			`+ compiler. */`
			`+ struct support_next_to_fault ntf`
			`+ = support_next_to_fault_allocate (sizeof (buffer));`
			`+`
			`+ /* Arbitrary Unicode codepoint at which we stop generating`
			`+ characters. We do not probe the whole range because that would`
			`+ take too long due to combinatorical exploision as the result of`
			`+ combination with other flags. */`
			`+ static const wchar_t last_character = 0xfff;`
			`+`
			`+ for (size_t locale_idx = 0; locale_idx < array_length (locales);`
			`+ ++ locale_idx)`
			`+ {`
			`+ if (setlocale (LC_ALL, locales[locale_idx]) == NULL)`
			`+ {`
			`+ support_record_failure ();`
			`+ printf ("error: setlocale (\"%s\"): %m", locales[locale_idx]);`
			`+ continue;`
			`+ }`
			`+ if (test_verbose > 0)`
			`+ printf ("info: testing locale \"%s\"\n", locales[locale_idx]);`
			`+`
			`+ for (wchar_t wc = 0; wc <= last_character; ++wc)`
			`+ {`
			`+ char *after_wc;`
			`+ if (wc == 0)`
			`+ {`
			`+ /* wcrtomb treats L'\0' in a special way. */`
			`+ *buffer = '\0';`
			`+ after_wc = &buffer[1];`
			`+ }`
			`+ else`
			`+ {`
			`+ mbstate_t ps = { };`
			`+ size_t ret = wcrtomb (buffer, wc, &ps);`
			`+ if (ret == (size_t) -1)`
			`+ {`
			`+ /* EILSEQ means that the target character set`
			`+ cannot encode the character. */`
			`+ if (errno != EILSEQ)`
			`+ {`
			`+ support_record_failure ();`
			`+ printf ("error: wcrtomb (0x%x) failed: %m\n",`
			`+ (unsigned) wc);`
			`+ }`
			`+ continue;`
			`+ }`
			`+ TEST_VERIFY_EXIT (ret != 0);`
			`+ after_wc = &buffer[ret];`
			`+ }`
			`+`
			`+ for (size_t trailing_idx = 0;`
			`+ trailing_idx < array_length (trailing_strings);`
			`+ ++trailing_idx)`
			`+ {`
			`+ char *after_trailing`
			`+ = stpcpy (after_wc, trailing_strings[trailing_idx]);`
			`+`
			`+ for (int do_nul = 0; do_nul < 2; ++do_nul)`
			`+ {`
			`+ char *after_nul;`
			`+ if (do_nul)`
			`+ {`
			`+ *after_trailing = '\0';`
			`+ after_nul = &after_trailing[1];`
			`+ }`
			`+ else`
			`+ after_nul = after_trailing;`
			`+`
			`+ size_t length = after_nul - buffer;`
			`+`
			`+ /* Make sure that the faulting region starts`
			`+ after the used portion of the buffer. */`
			`+ char *ntf_start = ntf.buffer + sizeof (buffer) - length;`
			`+ memcpy (ntf_start, buffer, length);`
			`+`
			`+ for (const reg_syntax_t *psyntax = syntaxes;`
			`+ psyntax < array_end (syntaxes); ++psyntax)`
			`+ for (int do_icase = 0; do_icase < 2; ++do_icase)`
			`+ {`
			`+ re_syntax_options = *psyntax;`
			`+ if (do_icase)`
			`+ re_syntax_options \|= RE_ICASE;`
			`+`
			`+ regex_t reg;`
			`+ memset (&reg, 0, sizeof (reg));`
			`+ const char *msg = re_compile_pattern`
			`+ (ntf_start, length, &reg);`
			`+ if (msg != NULL)`
			`+ {`
			`+ if (test_verbose > 0)`
			`+ {`
			`+ char *quoted = support_quote_blob`
			`+ (buffer, length);`
			`+ printf ("info: compilation failed for pattern"`
			`+ " \"%s\", syntax 0x%lx: %s\n",`
			`+ quoted, re_syntax_options, msg);`
			`+ free (quoted);`
			`+ }`
			`+ }`
			`+ else`
			`+ regfree (&reg);`
			`+ }`
			`+ }`
			`+ }`
			`+ }`
			`+ }`
			`+`
			`+ support_next_to_fault_free (&ntf);`
			`+`
			`+ return 0;`
			`+}`
			`+`
			`+#include <support/test-driver.c>`