From 7e7241f6206d2f68286aa56853db7594e749c4f6 Mon Sep 17 00:00:00 2001
From: Florian Weimer <fweimer@redhat.com>
Date: Thu, 23 Jan 2025 09:08:39 +0100
Subject: [PATCH] Sync with upstream branch release/2.39/master (CVE-2025-0395)

Upstream commit: 808a84a8b81468b517a4d721fdc62069cb8c211f

- Fix underallocation of abort_msg_s struct (CVE-2025-0395)
- x86/string: Fixup alignment of main loop in str{n}cmp-evex [BZ #32212]
- x86: Improve large memset perf with non-temporal stores [RHEL-29312]
- x86: Avoid integer truncation with large cache sizes (bug 32470)
- math: Exclude internal math symbols for tests [BZ #32414]
- malloc: add indirection for malloc(-like) functions in tests [BZ #32366]
- Pass -nostdlib -nostartfiles together with -r [BZ #31753]
- nptl: initialize cpu_id_start prior to rseq registration
- nptl: initialize rseq area prior to registration
---
 glibc-upstream-2.39-138.patch |  55 ++++++++
 glibc-upstream-2.39-139.patch |  29 ++++
 glibc-upstream-2.39-140.patch |  28 ++++
 glibc-upstream-2.39-141.patch | 172 +++++++++++++++++++++++
 glibc-upstream-2.39-142.patch |  44 ++++++
 glibc-upstream-2.39-143.patch |  30 ++++
 glibc-upstream-2.39-144.patch | 250 ++++++++++++++++++++++++++++++++++
 glibc-upstream-2.39-145.patch | 143 +++++++++++++++++++
 glibc-upstream-2.39-146.patch |  57 ++++++++
 glibc.spec                    |  24 +++-
 10 files changed, 831 insertions(+), 1 deletion(-)
 create mode 100644 glibc-upstream-2.39-138.patch
 create mode 100644 glibc-upstream-2.39-139.patch
 create mode 100644 glibc-upstream-2.39-140.patch
 create mode 100644 glibc-upstream-2.39-141.patch
 create mode 100644 glibc-upstream-2.39-142.patch
 create mode 100644 glibc-upstream-2.39-143.patch
 create mode 100644 glibc-upstream-2.39-144.patch
 create mode 100644 glibc-upstream-2.39-145.patch
 create mode 100644 glibc-upstream-2.39-146.patch

diff --git a/glibc-upstream-2.39-138.patch b/glibc-upstream-2.39-138.patch
new file mode 100644
index 0000000..c2ed486
--- /dev/null
+++ b/glibc-upstream-2.39-138.patch
@@ -0,0 +1,55 @@
+commit 9a0e174a39a3a65f628c6a55e29fe35f6d67bf42
+Author: Michael Jeanson <mjeanson@efficios.com>
+Date:   Thu Nov 7 22:23:49 2024 +0100
+
+    nptl: initialize rseq area prior to registration
+    
+    Per the rseq syscall documentation, 3 fields are required to be
+    initialized by userspace prior to registration, they are 'cpu_id',
+    'rseq_cs' and 'flags'. Since we have no guarantee that 'struct pthread'
+    is cleared on all architectures, explicitly set those 3 fields prior to
+    registration.
+    
+    Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
+    Reviewed-by: Florian Weimer <fweimer@redhat.com>
+    (cherry picked from commit 97f60abd25628425971f07e9b0e7f8eec0741235)
+
+diff --git a/nptl/descr.h b/nptl/descr.h
+index 4697f633e16c7359..a83df327e4bcba2e 100644
+--- a/nptl/descr.h
++++ b/nptl/descr.h
+@@ -417,6 +417,8 @@ struct pthread
+     {
+       uint32_t cpu_id_start;
+       uint32_t cpu_id;
++      uint64_t rseq_cs;
++      uint32_t flags;
+     };
+     char pad[32];		/* Original rseq area size.  */
+   } rseq_area __attribute__ ((aligned (32)));
+diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h
+index 7ea935b4adab8c20..37a8f630b6519ff0 100644
+--- a/sysdeps/unix/sysv/linux/rseq-internal.h
++++ b/sysdeps/unix/sysv/linux/rseq-internal.h
+@@ -51,11 +51,21 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq)
+         /* The initial implementation used only 20 bytes out of 32,
+            but still expected size 32.  */
+         size = RSEQ_AREA_SIZE_INITIAL;
++
++      /* Initialize the rseq fields that are read by the kernel on
++         registration, there is no guarantee that struct pthread is
++         cleared on all architectures.  */
++      THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_UNINITIALIZED);
++      THREAD_SETMEM (self, rseq_area.rseq_cs, 0);
++      THREAD_SETMEM (self, rseq_area.flags, 0);
++
+       int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area,
+                                        size, 0, RSEQ_SIG);
+       if (!INTERNAL_SYSCALL_ERROR_P (ret))
+         return true;
+     }
++  /* When rseq is disabled by tunables or the registration fails, inform
++     userspace by setting 'cpu_id' to RSEQ_CPU_ID_REGISTRATION_FAILED.  */
+   THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
+   return false;
+ }
diff --git a/glibc-upstream-2.39-139.patch b/glibc-upstream-2.39-139.patch
new file mode 100644
index 0000000..20d6367
--- /dev/null
+++ b/glibc-upstream-2.39-139.patch
@@ -0,0 +1,29 @@
+commit 350db2839387659e1500a54d276e401c9c6b2dee
+Author: Michael Jeanson <mjeanson@efficios.com>
+Date:   Wed Nov 20 14:15:42 2024 -0500
+
+    nptl: initialize cpu_id_start prior to rseq registration
+    
+    When adding explicit initialization of rseq fields prior to
+    registration, I glossed over the fact that 'cpu_id_start' is also
+    documented as initialized by user-space.
+    
+    While current kernels don't validate the content of this field on
+    registration, future ones could.
+    
+    Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
+    Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+    (cherry picked from commit d9f40387d3305d97e30a8cf8724218c42a63680a)
+
+diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h
+index 37a8f630b6519ff0..ef3eab1fefd4d90d 100644
+--- a/sysdeps/unix/sysv/linux/rseq-internal.h
++++ b/sysdeps/unix/sysv/linux/rseq-internal.h
+@@ -56,6 +56,7 @@ rseq_register_current_thread (struct pthread *self, bool do_rseq)
+          registration, there is no guarantee that struct pthread is
+          cleared on all architectures.  */
+       THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_UNINITIALIZED);
++      THREAD_SETMEM (self, rseq_area.cpu_id_start, 0);
+       THREAD_SETMEM (self, rseq_area.rseq_cs, 0);
+       THREAD_SETMEM (self, rseq_area.flags, 0);
+ 
diff --git a/glibc-upstream-2.39-140.patch b/glibc-upstream-2.39-140.patch
new file mode 100644
index 0000000..aa67ae2
--- /dev/null
+++ b/glibc-upstream-2.39-140.patch
@@ -0,0 +1,28 @@
+commit aa8768999e94fcee1695feb766c69dd8a93b706b
+Author: H.J. Lu <hjl.tools@gmail.com>
+Date:   Fri May 17 20:00:38 2024 -0700
+
+    Pass -nostdlib -nostartfiles together with -r [BZ #31753]
+    
+    Since -r in GCC 6/7/8 doesn't imply -nostdlib -nostartfiles, update the
+    link-static-libc.out rule to also pass -nostdlib -nostartfiles.  This
+    fixes BZ #31753.
+    
+    Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
+    Reviewed-by: Florian Weimer <fweimer@redhat.com>
+    (cherry picked from commit 2be3352f0b1ebaa39596393fffe1062275186669)
+
+diff --git a/Makefile b/Makefile
+index 37bf70aa4ad4403f..ae9bc09327dd2d5b 100644
+--- a/Makefile
++++ b/Makefile
+@@ -581,7 +581,8 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh
+ # definitions of any symbols.
+ tests-special += $(objpfx)link-static-libc.out
+ $(objpfx)link-static-libc.out:
+-	$(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \
++	$(LINK.o) $(whole-archive) -nostdlib -nostartfiles -r \
++	  $(objpfx)libc.a -o /dev/null > $@ 2>&1; \
+ 	$(evaluate-test)
+ 
+ # Print test summary for tests in $1 .sum file;
diff --git a/glibc-upstream-2.39-141.patch b/glibc-upstream-2.39-141.patch
new file mode 100644
index 0000000..2801ac0
--- /dev/null
+++ b/glibc-upstream-2.39-141.patch
@@ -0,0 +1,172 @@
+commit 51da74a97e0f024fd89b57304b3ab010a3cfaef1
+Author: Sam James <sam@gentoo.org>
+Date:   Mon Dec 9 23:11:25 2024 +0000
+
+    malloc: add indirection for malloc(-like) functions in tests [BZ #32366]
+    
+    GCC 15 introduces allocation dead code removal (DCE) for PR117370 in
+    r15-5255-g7828dc070510f8. This breaks various glibc tests which want
+    to assert various properties of the allocator without doing anything
+    obviously useful with the allocated memory.
+    
+    Alexander Monakov rightly pointed out that we can and should do better
+    than passing -fno-malloc-dce to paper over the problem. Not least because
+    GCC 14 already does such DCE where there's no testing of malloc's return
+    value against NULL, and LLVM has such optimisations too.
+    
+    Handle this by providing malloc (and friends) wrappers with a volatile
+    function pointer to obscure that we're calling malloc (et. al) from the
+    compiler.
+    
+    Reviewed-by: Paul Eggert <eggert@cs.ucla.edu>
+    (cherry picked from commit a9944a52c967ce76a5894c30d0274b824df43c7a)
+
+diff --git a/malloc/tst-aligned-alloc.c b/malloc/tst-aligned-alloc.c
+index 91167d1392c0e626..b0f05a8fec78d5e8 100644
+--- a/malloc/tst-aligned-alloc.c
++++ b/malloc/tst-aligned-alloc.c
+@@ -25,6 +25,8 @@
+ #include <libc-diag.h>
+ #include <support/check.h>
+ 
++#include "tst-malloc-aux.h"
++
+ static int
+ do_test (void)
+ {
+diff --git a/malloc/tst-compathooks-off.c b/malloc/tst-compathooks-off.c
+index d0106f3fb74ff3b1..4cce6e5a8076f6b6 100644
+--- a/malloc/tst-compathooks-off.c
++++ b/malloc/tst-compathooks-off.c
+@@ -25,6 +25,8 @@
+ #include <support/check.h>
+ #include <support/support.h>
+ 
++#include "tst-malloc-aux.h"
++
+ extern void (*volatile __free_hook) (void *, const void *);
+ extern void *(*volatile __malloc_hook)(size_t, const void *);
+ extern void *(*volatile __realloc_hook)(void *, size_t, const void *);
+diff --git a/malloc/tst-malloc-aux.h b/malloc/tst-malloc-aux.h
+new file mode 100644
+index 0000000000000000..54908b4a2464d510
+--- /dev/null
++++ b/malloc/tst-malloc-aux.h
+@@ -0,0 +1,41 @@
++/* Wrappers for malloc-like functions to allow testing the implementation
++   without optimization.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public License as
++   published by the Free Software Foundation; either version 2.1 of the
++   License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; see the file COPYING.LIB.  If
++   not, see <https://www.gnu.org/licenses/>.  */
++
++#ifndef TST_MALLOC_AUX_H
++#define TST_MALLOC_AUX_H
++
++#include <stddef.h>
++#include <stdlib.h>
++
++static void *(*volatile aligned_alloc_indirect)(size_t, size_t) = aligned_alloc;
++static void *(*volatile calloc_indirect)(size_t, size_t) = calloc;
++static void *(*volatile malloc_indirect)(size_t) = malloc;
++static void *(*volatile realloc_indirect)(void*, size_t) = realloc;
++
++#undef aligned_alloc
++#undef calloc
++#undef malloc
++#undef realloc
++
++#define aligned_alloc aligned_alloc_indirect
++#define calloc calloc_indirect
++#define malloc malloc_indirect
++#define realloc realloc_indirect
++
++#endif /* TST_MALLOC_AUX_H */
+diff --git a/malloc/tst-malloc-check.c b/malloc/tst-malloc-check.c
+index fde8863ad7561a71..cc88bff3b39a421c 100644
+--- a/malloc/tst-malloc-check.c
++++ b/malloc/tst-malloc-check.c
+@@ -20,6 +20,8 @@
+ #include <stdlib.h>
+ #include <libc-diag.h>
+ 
++#include "tst-malloc-aux.h"
++
+ static int errors = 0;
+ 
+ static void
+diff --git a/malloc/tst-malloc-too-large.c b/malloc/tst-malloc-too-large.c
+index 8e9e0d5fa2b4b907..2b91377e54cdc485 100644
+--- a/malloc/tst-malloc-too-large.c
++++ b/malloc/tst-malloc-too-large.c
+@@ -43,6 +43,7 @@
+ #include <unistd.h>
+ #include <sys/param.h>
+ 
++#include "tst-malloc-aux.h"
+ 
+ /* This function prepares for each 'too-large memory allocation' test by
+    performing a small successful malloc/free and resetting errno prior to
+diff --git a/malloc/tst-malloc.c b/malloc/tst-malloc.c
+index f7a6e4654c374d01..68af399022543111 100644
+--- a/malloc/tst-malloc.c
++++ b/malloc/tst-malloc.c
+@@ -22,6 +22,8 @@
+ #include <libc-diag.h>
+ #include <time.h>
+ 
++#include "tst-malloc-aux.h"
++
+ static int errors = 0;
+ 
+ static void
+diff --git a/malloc/tst-realloc.c b/malloc/tst-realloc.c
+index f50499ecb114d574..74a28fb45ed80bf5 100644
+--- a/malloc/tst-realloc.c
++++ b/malloc/tst-realloc.c
+@@ -23,6 +23,8 @@
+ #include <libc-diag.h>
+ #include <support/check.h>
+ 
++#include "tst-malloc-aux.h"
++
+ static int
+ do_test (void)
+ {
+diff --git a/support/support.h b/support/support.h
+index ba21ec9b5add7c02..1a77f7979330d60c 100644
+--- a/support/support.h
++++ b/support/support.h
+@@ -113,7 +113,7 @@ void *xposix_memalign (size_t alignment, size_t n)
+   __attribute_malloc__ __attribute_alloc_align__ ((1))
+   __attribute_alloc_size__ ((2)) __attr_dealloc_free __returns_nonnull;
+ char *xasprintf (const char *format, ...)
+-  __attribute__ ((format (printf, 1, 2), malloc)) __attr_dealloc_free
++  __attribute__ ((format (printf, 1, 2), __malloc__)) __attr_dealloc_free
+   __returns_nonnull;
+ char *xstrdup (const char *) __attr_dealloc_free __returns_nonnull;
+ char *xstrndup (const char *, size_t) __attr_dealloc_free __returns_nonnull;
+diff --git a/test-skeleton.c b/test-skeleton.c
+index ae185a4f2821de00..690f26e7cf229622 100644
+--- a/test-skeleton.c
++++ b/test-skeleton.c
+@@ -27,7 +27,6 @@
+ #include <errno.h>
+ #include <fcntl.h>
+ #include <getopt.h>
+-#include <malloc.h>
+ #include <paths.h>
+ #include <search.h>
+ #include <signal.h>
diff --git a/glibc-upstream-2.39-142.patch b/glibc-upstream-2.39-142.patch
new file mode 100644
index 0000000..72c8751
--- /dev/null
+++ b/glibc-upstream-2.39-142.patch
@@ -0,0 +1,44 @@
+commit 2c882bf9c15d206aaf04766d1b8e3ae5b1002cc2
+Author: H.J. Lu <hjl.tools@gmail.com>
+Date:   Thu Dec 5 08:39:44 2024 +0800
+
+    math: Exclude internal math symbols for tests [BZ #32414]
+    
+    Since internal tests don't have access to internal symbols in libm,
+    exclude them for internal tests.  Also make tst-strtod5 and tst-strtod5i
+    depend on $(libm) to support older versions of GCC which can't inline
+    copysign family functions.  This fixes BZ #32414.
+    
+    Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
+    Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
+    (cherry picked from commit 5df09b444835fca6e64b3d4b4a5beb19b3b2ba21)
+
+diff --git a/include/math.h b/include/math.h
+index fa11a710a6c152a4..035fd160ffb9e032 100644
+--- a/include/math.h
++++ b/include/math.h
+@@ -130,7 +130,10 @@ fabsf128 (_Float128 x)
+ }
+ # endif
+ 
+-# if !(defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0)
++
++/* NB: Internal tests don't have access to internal symbols.  */
++# if !IS_IN (testsuite_internal) \
++     && !(defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0)
+ #  ifndef NO_MATH_REDIRECT
+ /* Declare some functions for use within GLIBC.  Compilers typically
+    inline those functions as a single instruction.  Use an asm to
+diff --git a/stdlib/Makefile b/stdlib/Makefile
+index 70d7291c6e3454a8..ff1418f5bb2ea5c9 100644
+--- a/stdlib/Makefile
++++ b/stdlib/Makefile
+@@ -607,6 +607,8 @@ $(objpfx)bug-strtod2: $(libm)
+ $(objpfx)tst-strtod-round: $(libm)
+ $(objpfx)tst-tininess: $(libm)
+ $(objpfx)tst-strtod-underflow: $(libm)
++$(objpfx)tst-strtod5: $(libm)
++$(objpfx)tst-strtod5i: $(libm)
+ $(objpfx)tst-strtod6: $(libm)
+ $(objpfx)tst-strtod-nan-locale: $(libm)
+ $(objpfx)tst-strtod-nan-sign: $(libm)
diff --git a/glibc-upstream-2.39-143.patch b/glibc-upstream-2.39-143.patch
new file mode 100644
index 0000000..ade64c9
--- /dev/null
+++ b/glibc-upstream-2.39-143.patch
@@ -0,0 +1,30 @@
+commit 2c8a7f14fac3628b6a06cc76cdfda54a7ac20386
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Tue Dec 17 18:12:03 2024 +0100
+
+    x86: Avoid integer truncation with large cache sizes (bug 32470)
+    
+    Some hypervisors report 1 TiB L3 cache size.  This results
+    in some variables incorrectly getting zeroed, causing crashes
+    in memcpy/memmove because invariants are violated.
+    
+    (cherry picked from commit 61c3450db96dce96ad2b24b4f0b548e6a46d68e5)
+
+diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
+index 5a98f70364220da4..1f68968a9a457586 100644
+--- a/sysdeps/x86/dl-cacheinfo.h
++++ b/sysdeps/x86/dl-cacheinfo.h
+@@ -959,11 +959,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+     non_temporal_threshold = maximum_non_temporal_threshold;
+ 
+   /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8.  */
+-  unsigned int minimum_rep_movsb_threshold;
++  unsigned long int minimum_rep_movsb_threshold;
+   /* NB: The default REP MOVSB threshold is 4096 * (VEC_SIZE / 16) for
+      VEC_SIZE == 64 or 32.  For VEC_SIZE == 16, the default REP MOVSB
+      threshold is 2048 * (VEC_SIZE / 16).  */
+-  unsigned int rep_movsb_threshold;
++  unsigned long int rep_movsb_threshold;
+   if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
+       && !CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512))
+     {
diff --git a/glibc-upstream-2.39-144.patch b/glibc-upstream-2.39-144.patch
new file mode 100644
index 0000000..17ca3b3
--- /dev/null
+++ b/glibc-upstream-2.39-144.patch
@@ -0,0 +1,250 @@
+commit 61daaa76390e0ff73eade3a688d3626b7e7e0c20
+Author: Noah Goldstein <goldstein.w.n@gmail.com>
+Date:   Fri May 24 12:38:50 2024 -0500
+
+    x86: Improve large memset perf with non-temporal stores [RHEL-29312]
+    
+    Previously we use `rep stosb` for all medium/large memsets. This is
+    notably worse than non-temporal stores for large (above a
+    few MBs) memsets.
+    See:
+    https://docs.google.com/spreadsheets/d/1opzukzvum4n6-RUVHTGddV6RjAEil4P2uMjjQGLbLcU/edit?usp=sharing
+    For data using different stategies for large memset on ICX and SKX.
+    
+    Using non-temporal stores can be up to 3x faster on ICX and 2x faster
+    on SKX. Historically, these numbers would not have been so good
+    because of the zero-over-zero writeback optimization that `rep stosb`
+    is able to do. But, the zero-over-zero writeback optimization has been
+    removed as a potential side-channel attack, so there is no longer any
+    good reason to only rely on `rep stosb` for large memsets. On the flip
+    size, non-temporal writes can avoid data in their RFO requests saving
+    memory bandwidth.
+    
+    All of the other changes to the file are to re-organize the
+    code-blocks to maintain "good" alignment given the new code added in
+    the `L(stosb_local)` case.
+    
+    The results from running the GLIBC memset benchmarks on TGL-client for
+    N=20 runs:
+    
+    Geometric Mean across the suite New / Old EXEX256: 0.979
+    Geometric Mean across the suite New / Old EXEX512: 0.979
+    Geometric Mean across the suite New / Old AVX2   : 0.986
+    Geometric Mean across the suite New / Old SSE2   : 0.979
+    
+    Most of the cases are essentially unchanged, this is mostly to show
+    that adding the non-temporal case didn't add any regressions to the
+    other cases.
+    
+    The results on the memset-large benchmark suite on TGL-client for N=20
+    runs:
+    
+    Geometric Mean across the suite New / Old EXEX256: 0.926
+    Geometric Mean across the suite New / Old EXEX512: 0.925
+    Geometric Mean across the suite New / Old AVX2   : 0.928
+    Geometric Mean across the suite New / Old SSE2   : 0.924
+    
+    So roughly a 7.5% speedup. This is lower than what we see on servers
+    (likely because clients typically have faster single-core bandwidth so
+    saving bandwidth on RFOs is less impactful), but still advantageous.
+    
+    Full test-suite passes on x86_64 w/ and w/o multiarch.
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    
+    (cherry picked from commit 5bf0ab80573d66e4ae5d94b094659094336da90f)
+
+diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+index 97839a22483b0613..637caadb406b2544 100644
+--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+@@ -21,10 +21,13 @@
+    2. If size is less than VEC, use integer register stores.
+    3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
+    4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
+-   5. On machines ERMS feature, if size is greater or equal than
+-      __x86_rep_stosb_threshold then REP STOSB will be used.
+-   6. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
+-      4 VEC stores and store 4 * VEC at a time until done.  */
++   5. If size is more to 4 * VEC_SIZE, align to 1 * VEC_SIZE with
++      4 VEC stores and store 4 * VEC at a time until done.
++   6. On machines ERMS feature, if size is range
++	  [__x86_rep_stosb_threshold, __x86_shared_non_temporal_threshold)
++	  then REP STOSB will be used.
++   7. If size >= __x86_shared_non_temporal_threshold, use a
++	  non-temporal stores.  */
+ 
+ #include <sysdep.h>
+ 
+@@ -147,6 +150,41 @@ L(entry_from_wmemset):
+ 	VMOVU	%VMM(0), -VEC_SIZE(%rdi,%rdx)
+ 	VMOVU	%VMM(0), (%rdi)
+ 	VZEROUPPER_RETURN
++
++	/* If have AVX512 mask instructions put L(less_vec) close to
++	   entry as it doesn't take much space and is likely a hot target.  */
++#ifdef USE_LESS_VEC_MASK_STORE
++    /* Align to ensure the L(less_vec) logic all fits in 1x cache lines.  */
++	.p2align 6,, 47
++	.p2align 4
++L(less_vec):
++L(less_vec_from_wmemset):
++	/* Less than 1 VEC.  */
++# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
++#  error Unsupported VEC_SIZE!
++# endif
++	/* Clear high bits from edi. Only keeping bits relevant to page
++	   cross check. Note that we are using rax which is set in
++	   MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out.  */
++	andl	$(PAGE_SIZE - 1), %edi
++	/* Check if VEC_SIZE store cross page. Mask stores suffer
++	   serious performance degradation when it has to fault suppress.  */
++	cmpl	$(PAGE_SIZE - VEC_SIZE), %edi
++	/* This is generally considered a cold target.  */
++	ja	L(cross_page)
++# if VEC_SIZE > 32
++	movq	$-1, %rcx
++	bzhiq	%rdx, %rcx, %rcx
++	kmovq	%rcx, %k1
++# else
++	movl	$-1, %ecx
++	bzhil	%edx, %ecx, %ecx
++	kmovd	%ecx, %k1
++# endif
++	vmovdqu8 %VMM(0), (%rax){%k1}
++	VZEROUPPER_RETURN
++#endif
++
+ #if defined USE_MULTIARCH && IS_IN (libc)
+ END (MEMSET_SYMBOL (__memset, unaligned))
+ 
+@@ -185,54 +223,6 @@ L(last_2x_vec):
+ #endif
+ 	VZEROUPPER_RETURN
+ 
+-	/* If have AVX512 mask instructions put L(less_vec) close to
+-	   entry as it doesn't take much space and is likely a hot target.
+-	 */
+-#ifdef USE_LESS_VEC_MASK_STORE
+-	.p2align 4,, 10
+-L(less_vec):
+-L(less_vec_from_wmemset):
+-	/* Less than 1 VEC.  */
+-# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
+-#  error Unsupported VEC_SIZE!
+-# endif
+-	/* Clear high bits from edi. Only keeping bits relevant to page
+-	   cross check. Note that we are using rax which is set in
+-	   MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out.  */
+-	andl	$(PAGE_SIZE - 1), %edi
+-	/* Check if VEC_SIZE store cross page. Mask stores suffer
+-	   serious performance degradation when it has to fault suppress.
+-	 */
+-	cmpl	$(PAGE_SIZE - VEC_SIZE), %edi
+-	/* This is generally considered a cold target.  */
+-	ja	L(cross_page)
+-# if VEC_SIZE > 32
+-	movq	$-1, %rcx
+-	bzhiq	%rdx, %rcx, %rcx
+-	kmovq	%rcx, %k1
+-# else
+-	movl	$-1, %ecx
+-	bzhil	%edx, %ecx, %ecx
+-	kmovd	%ecx, %k1
+-# endif
+-	vmovdqu8 %VMM(0), (%rax){%k1}
+-	VZEROUPPER_RETURN
+-
+-# if defined USE_MULTIARCH && IS_IN (libc)
+-	/* Include L(stosb_local) here if including L(less_vec) between
+-	   L(stosb_more_2x_vec) and ENTRY. This is to cache align the
+-	   L(stosb_more_2x_vec) target.  */
+-	.p2align 4,, 10
+-L(stosb_local):
+-	movzbl	%sil, %eax
+-	mov	%RDX_LP, %RCX_LP
+-	mov	%RDI_LP, %RDX_LP
+-	rep	stosb
+-	mov	%RDX_LP, %RAX_LP
+-	VZEROUPPER_RETURN
+-# endif
+-#endif
+-
+ #if defined USE_MULTIARCH && IS_IN (libc)
+ 	.p2align 4
+ L(stosb_more_2x_vec):
+@@ -318,21 +308,33 @@ L(return_vzeroupper):
+ 	ret
+ #endif
+ 
+-	.p2align 4,, 10
+-#ifndef USE_LESS_VEC_MASK_STORE
+-# if defined USE_MULTIARCH && IS_IN (libc)
++#ifdef USE_WITH_AVX2
++	.p2align 4
++#else
++	.p2align 4,, 4
++#endif
++
++#if defined USE_MULTIARCH && IS_IN (libc)
+ 	/* If no USE_LESS_VEC_MASK put L(stosb_local) here. Will be in
+ 	   range for 2-byte jump encoding.  */
+ L(stosb_local):
++	cmp	__x86_shared_non_temporal_threshold(%rip), %RDX_LP
++	jae	L(nt_memset)
+ 	movzbl	%sil, %eax
+ 	mov	%RDX_LP, %RCX_LP
+ 	mov	%RDI_LP, %RDX_LP
+ 	rep	stosb
++# if (defined USE_WITH_SSE2) || (defined USE_WITH_AVX512)
++	/* Use xchg to save 1-byte (this helps align targets below).  */
++	xchg	%RDX_LP, %RAX_LP
++# else
+ 	mov	%RDX_LP, %RAX_LP
+-	VZEROUPPER_RETURN
+ # endif
++	VZEROUPPER_RETURN
++#endif
++#ifndef USE_LESS_VEC_MASK_STORE
+ 	/* Define L(less_vec) only if not otherwise defined.  */
+-	.p2align 4
++	.p2align 4,, 12
+ L(less_vec):
+ 	/* Broadcast esi to partial register (i.e VEC_SIZE == 32 broadcast to
+ 	   xmm). This is only does anything for AVX2.  */
+@@ -423,4 +425,35 @@ L(between_2_3):
+ 	movb	%SET_REG8, -1(%LESS_VEC_REG, %rdx)
+ #endif
+ 	ret
+-END (MEMSET_SYMBOL (__memset, unaligned_erms))
++
++#if defined USE_MULTIARCH && IS_IN (libc)
++# ifdef USE_WITH_AVX512
++	/* Force align so the loop doesn't cross a cache-line.  */
++	.p2align 4
++# endif
++	.p2align 4,, 7
++    /* Memset using non-temporal stores.  */
++L(nt_memset):
++	VMOVU	%VMM(0), (VEC_SIZE * 0)(%rdi)
++	leaq	(VEC_SIZE * -4)(%rdi, %rdx), %rdx
++    /* Align DST.  */
++	orq	$(VEC_SIZE * 1 - 1), %rdi
++	incq	%rdi
++	.p2align 4,, 7
++L(nt_loop):
++	VMOVNT	%VMM(0), (VEC_SIZE * 0)(%rdi)
++	VMOVNT	%VMM(0), (VEC_SIZE * 1)(%rdi)
++	VMOVNT	%VMM(0), (VEC_SIZE * 2)(%rdi)
++	VMOVNT	%VMM(0), (VEC_SIZE * 3)(%rdi)
++	subq	$(VEC_SIZE * -4), %rdi
++	cmpq	%rdx, %rdi
++	jb	L(nt_loop)
++	sfence
++	VMOVU	%VMM(0), (VEC_SIZE * 0)(%rdx)
++	VMOVU	%VMM(0), (VEC_SIZE * 1)(%rdx)
++	VMOVU	%VMM(0), (VEC_SIZE * 2)(%rdx)
++	VMOVU	%VMM(0), (VEC_SIZE * 3)(%rdx)
++	VZEROUPPER_RETURN
++#endif
++
++END(MEMSET_SYMBOL(__memset, unaligned_erms))
diff --git a/glibc-upstream-2.39-145.patch b/glibc-upstream-2.39-145.patch
new file mode 100644
index 0000000..1248613
--- /dev/null
+++ b/glibc-upstream-2.39-145.patch
@@ -0,0 +1,143 @@
+commit 994b129a35ca5218ecddd1add74aea68f1314560
+Author: Noah Goldstein <goldstein.w.n@gmail.com>
+Date:   Fri Sep 27 15:50:10 2024 -0700
+
+    x86/string: Fixup alignment of main loop in str{n}cmp-evex [BZ #32212]
+    
+    The loop should be aligned to 32-bytes so that it can ideally run out
+    the DSB. This is particularly important on Skylake-Server where
+    deficiencies in it's DSB implementation make it prone to not being
+    able to run loops out of the DSB.
+    
+    For example running strcmp-evex on 200Mb string:
+    
+    32-byte aligned loop:
+        - 43,399,578,766      idq.dsb_uops
+    not 32-byte aligned loop:
+        - 6,060,139,704       idq.dsb_uops
+    
+    This results in a 25% performance degradation for the non-aligned
+    version.
+    
+    The fix is to just ensure the code layout is such that the loop is
+    aligned. (Which was previously the case but was accidentally dropped
+    in 84e7c46df).
+    
+    NB: The fix was actually 64-byte alignment. This is because 64-byte
+    alignment generally produces more stable performance than 32-byte
+    aligned code (cache line crosses can affect perf), so if we are going
+    past 16-byte alignmnent, might as well go to 64. 64-byte alignment
+    also matches most other functions we over-align, so it creates a
+    common point of optimization.
+    
+    Times are reported as ratio of Time_With_Patch /
+    Time_Without_Patch. Lower is better.
+    
+    The values being reported is the geometric mean of the ratio across
+    all tests in bench-strcmp and bench-strncmp.
+    
+    Note this patch is only attempting to improve the Skylake-Server
+    strcmp for long strings. The rest of the numbers are only to test for
+    regressions.
+    
+    Tigerlake Results Strings <= 512:
+        strcmp : 1.026
+        strncmp: 0.949
+    
+    Tigerlake Results Strings > 512:
+        strcmp : 0.994
+        strncmp: 0.998
+    
+    Skylake-Server Results Strings <= 512:
+        strcmp : 0.945
+        strncmp: 0.943
+    
+    Skylake-Server Results Strings > 512:
+        strcmp : 0.778
+        strncmp: 1.000
+    
+    The 2.6% regression on TGL-strcmp is due to slowdowns caused by
+    changes in alignment of code handling small sizes (most on the
+    page-cross logic). These should be safe to ignore because 1) We
+    previously only 16-byte aligned the function so this behavior is not
+    new and was essentially up to chance before this patch and 2) this
+    type of alignment related regression on small sizes really only comes
+    up in tight micro-benchmark loops and is unlikely to have any affect
+    on realworld performance.
+    
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+    (cherry picked from commit 483443d3211532903d7e790211af5a1d55fdb1f3)
+
+diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S
+index 06730ab2a18f72a0..cea034f394ab45e2 100644
+--- a/sysdeps/x86_64/multiarch/strcmp-evex.S
++++ b/sysdeps/x86_64/multiarch/strcmp-evex.S
+@@ -209,7 +209,9 @@
+    returned.  */
+ 
+ 	.section SECTION(.text), "ax", @progbits
+-	.align	16
++	/* Align 64 bytes here. This is to get the L(loop) block ideally
++	   aligned for the DSB.  */
++	.align	64
+ 	.type	STRCMP, @function
+ 	.globl	STRCMP
+ # ifdef USE_AS_STRCASECMP_L
+@@ -509,9 +511,7 @@ L(ret4):
+ 	ret
+ # endif
+ 
+-	/* 32 byte align here ensures the main loop is ideally aligned
+-	   for DSB.  */
+-	.p2align 5
++	.p2align 4,, 4
+ L(more_3x_vec):
+ 	/* Safe to compare 4x vectors.  */
+ 	VMOVU	(VEC_SIZE)(%rdi), %VMM(0)
+@@ -1426,10 +1426,9 @@ L(less_32_till_page):
+ L(ret_zero_page_cross_slow_case0):
+ 	xorl	%eax, %eax
+ 	ret
+-# endif
+-
+-
++# else
+ 	.p2align 4,, 10
++# endif
+ L(less_16_till_page):
+ 	cmpl	$((VEC_SIZE - 8) / SIZE_OF_CHAR), %eax
+ 	ja	L(less_8_till_page)
+@@ -1482,8 +1481,12 @@ L(less_16_till_page):
+ # endif
+ 	jmp	L(prepare_loop_aligned)
+ 
+-
+-
++# ifndef USE_AS_STRNCMP
++	/* Fits in aligning bytes.  */
++L(ret_zero_4_loop):
++	xorl	%eax, %eax
++	ret
++# endif
+ 
+ 	.p2align 4,, 10
+ L(less_8_till_page):
+@@ -1554,6 +1557,7 @@ L(ret_less_8_wcs):
+ 
+ #  ifdef USE_AS_STRNCMP
+ 	.p2align 4,, 2
++L(ret_zero_4_loop):
+ L(ret_zero_page_cross_slow_case1):
+ 	xorl	%eax, %eax
+ 	ret
+@@ -1586,10 +1590,6 @@ L(less_4_loop):
+ 	subq	$-(CHAR_PER_VEC * 4), %rdx
+ #  endif
+ 	jmp	L(prepare_loop_aligned)
+-
+-L(ret_zero_4_loop):
+-	xorl	%eax, %eax
+-	ret
+ L(ret_less_4_loop):
+ 	xorl	%r8d, %eax
+ 	subl	%r8d, %eax
diff --git a/glibc-upstream-2.39-146.patch b/glibc-upstream-2.39-146.patch
new file mode 100644
index 0000000..9d16b94
--- /dev/null
+++ b/glibc-upstream-2.39-146.patch
@@ -0,0 +1,57 @@
+commit 808a84a8b81468b517a4d721fdc62069cb8c211f
+Author: Siddhesh Poyarekar <siddhesh@sourceware.org>
+Date:   Tue Jan 21 16:11:06 2025 -0500
+
+    Fix underallocation of abort_msg_s struct (CVE-2025-0395)
+    
+    Include the space needed to store the length of the message itself, in
+    addition to the message string.  This resolves BZ #32582.
+    
+    Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
+    Reviewed: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+    (cherry picked from commit 68ee0f704cb81e9ad0a78c644a83e1e9cd2ee578)
+
+diff --git a/assert/assert.c b/assert/assert.c
+index c29629f5f68921a0..b6e37d694cf4b779 100644
+--- a/assert/assert.c
++++ b/assert/assert.c
+@@ -18,6 +18,7 @@
+ #include <assert.h>
+ #include <atomic.h>
+ #include <ldsodefs.h>
++#include <libc-pointer-arith.h>
+ #include <libintl.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+@@ -65,7 +66,8 @@ __assert_fail_base (const char *fmt, const char *assertion, const char *file,
+       (void) __fxprintf (NULL, "%s", str);
+       (void) fflush (stderr);
+ 
+-      total = (total + 1 + GLRO(dl_pagesize) - 1) & ~(GLRO(dl_pagesize) - 1);
++      total = ALIGN_UP (total + sizeof (struct abort_msg_s) + 1,
++			GLRO(dl_pagesize));
+       struct abort_msg_s *buf = __mmap (NULL, total, PROT_READ | PROT_WRITE,
+ 					MAP_ANON | MAP_PRIVATE, -1, 0);
+       if (__glibc_likely (buf != MAP_FAILED))
+diff --git a/sysdeps/posix/libc_fatal.c b/sysdeps/posix/libc_fatal.c
+index f9e3425e04496a26..089c47b04b8af049 100644
+--- a/sysdeps/posix/libc_fatal.c
++++ b/sysdeps/posix/libc_fatal.c
+@@ -20,6 +20,7 @@
+ #include <errno.h>
+ #include <fcntl.h>
+ #include <ldsodefs.h>
++#include <libc-pointer-arith.h>
+ #include <paths.h>
+ #include <stdarg.h>
+ #include <stdbool.h>
+@@ -105,7 +106,8 @@ __libc_message_impl (const char *fmt, ...)
+     {
+       WRITEV_FOR_FATAL (fd, iov, iovcnt, total);
+ 
+-      total = (total + 1 + GLRO(dl_pagesize) - 1) & ~(GLRO(dl_pagesize) - 1);
++      total = ALIGN_UP (total + sizeof (struct abort_msg_s) + 1,
++			GLRO(dl_pagesize));
+       struct abort_msg_s *buf = __mmap (NULL, total,
+ 					PROT_READ | PROT_WRITE,
+ 					MAP_ANON | MAP_PRIVATE, -1, 0);
diff --git a/glibc.spec b/glibc.spec
index 66c4e1f..ef89355 100644
--- a/glibc.spec
+++ b/glibc.spec
@@ -145,7 +145,7 @@ Version: %{glibcversion}
 # - It allows using the Release number without the %%dist tag in the dependency
 #   generator to make the generated requires interchangeable between Rawhide
 #   and ELN (.elnYY < .fcXX).
-%global baserelease 33
+%global baserelease 34
 Release: %{baserelease}%{?dist}
 
 # Licenses:
@@ -495,6 +495,15 @@ Patch177: glibc-RHEL-71530-7.patch
 Patch178: glibc-RHEL-71530-8.patch
 Patch179: glibc-RHEL-71530-9.patch
 Patch180: glibc-RHEL-71530-10.patch
+Patch181: glibc-upstream-2.39-138.patch
+Patch182: glibc-upstream-2.39-139.patch
+Patch183: glibc-upstream-2.39-140.patch
+Patch184: glibc-upstream-2.39-141.patch
+Patch185: glibc-upstream-2.39-142.patch
+Patch186: glibc-upstream-2.39-143.patch
+Patch187: glibc-upstream-2.39-144.patch
+Patch188: glibc-upstream-2.39-145.patch
+Patch189: glibc-upstream-2.39-146.patch
 
 ##############################################################################
 # Continued list of core "glibc" package information:
@@ -2490,6 +2499,19 @@ update_gconv_modules_cache ()
 %endif
 
 %changelog
+* Thu Jan 23 2025 Florian Weimer <fweimer@redhat.com> - 2.39-34
+- Sync with upstream branch release/2.39/master,
+  commit 808a84a8b81468b517a4d721fdc62069cb8c211f:
+- Fix underallocation of abort_msg_s struct (CVE-2025-0395)
+- x86/string: Fixup alignment of main loop in str{n}cmp-evex [BZ #32212]
+- x86: Improve large memset perf with non-temporal stores [RHEL-29312]
+- x86: Avoid integer truncation with large cache sizes (bug 32470)
+- math: Exclude internal math symbols for tests [BZ #32414]
+- malloc: add indirection for malloc(-like) functions in tests [BZ #32366]
+- Pass -nostdlib -nostartfiles together with -r [BZ #31753]
+- nptl: initialize cpu_id_start prior to rseq registration
+- nptl: initialize rseq area prior to registration
+
 * Mon Dec 23 2024 Florian Weimer <fweimer@redhat.com> - 2.39-33
 - Support in-place file conversion in the iconv tool (RHEL-71530)