Sync with upstream branch release/2.39/master (RHEL-126766)

Relevant commits already backported; skipped from this sync:

- elf: handle addition overflow in _dl_find_object_update_1 [BZ #32245]
    (glibc-RHEL-119398.patch)
- Avoid uninitialized result in sem_open when file does not exist
    (glibc-RHEL-119392-1.patch)
- Rename new tst-sem17 test to tst-sem18
    (glibc-RHEL-119392-2.patch)
- nss: Group merge does not react to ERANGE during merge (bug 33361)
    (glibc-RHEL-114265.patch)
- AArch64: Fix instability in AdvSIMD tan
    (glibc-RHEL-118273-44.patch)

RPM-Changelog: - Sync with upstream branch release/2.39/master (RHEL-126766)
 - Upstream commit: ce65d944e38a20cb70af2a48a4b8aa5d8fabe1cc
 - posix: Reset wordexp_t fields with WRDE_REUSE (CVE-2025-15281 / BZ 33814)
 - resolv: Fix NSS DNS backend for getnetbyaddr (CVE-2026-0915)
 - memalign: reinstate alignment overflow check (CVE-2026-0861)
 - support: Exit on consistency check failure in resolv_response_add_name
 - support: Fix FILE * leak in check_for_unshare_hints in test-container
 - sprof: fix -Wformat warnings on 32-bit hosts
 - sprof: check pread size and offset for overflow
 - getaddrinfo.c: Avoid uninitialized pointer access [BZ #32465]
 - nptl: Optimize trylock for high cache contention workloads (BZ #33704)
 - ppc64le: Power 10 rawmemchr clobbers v20 (bug #33091)
 - ppc64le: Restore optimized strncmp for power10
 - ppc64le: Restore optimized strcmp for power10
 - AArch64: Optimise SVE scalar callbacks
 - aarch64: fix includes in SME tests
 - aarch64: fix cfi directives around __libc_arm_za_disable
 - aarch64: tests for SME
 - aarch64: clear ZA state of SME before clone and clone3 syscalls
 - aarch64: define macro for calling __libc_arm_za_disable
 - aarch64: update tests for SME
 - aarch64: Disable ZA state of SME in setjmp and sigsetjmp
 - linux: Also check pkey_get for ENOSYS on tst-pkey (BZ 31996)
 - aarch64: Do not link conform tests with -Wl,-z,force-bti (bug 33601)
 - x86: fix wmemset ifunc stray '!' (bug 33542)
 - x86: Detect Intel Nova Lake Processor
 - x86: Detect Intel Wildcat Lake Processor
Resolves: RHEL-126766
Resolves: RHEL-45143
Resolves: RHEL-45145
Resolves: RHEL-142786
Resolves: RHEL-141852
Resolves: RHEL-141733
This commit is contained in:
Arjun Shankar 2026-01-22 11:25:08 +01:00
parent 020ea20969
commit b333c27787
25 changed files with 3259 additions and 0 deletions

View File

@ -0,0 +1,41 @@
commit 835b1e337904a282125300ec08f09b2b3dd3358e
Author: Sunil K Pandey <sunil.k.pandey@intel.com>
Date: Mon Oct 6 18:13:04 2025 -0700
x86: Detect Intel Wildcat Lake Processor
Detect Intel Wildcat Lake Processor and tune it similar to Intel Panther
Lake. https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit f8dd52901b72805a831d5a4cb7d971e4a3c9970b)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 4dc2b60aab65ed8e..c00502328fbcb839 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -546,6 +546,7 @@ enum intel_microarch
INTEL_BIGCORE_PANTHERLAKE,
INTEL_BIGCORE_GRANITERAPIDS,
INTEL_BIGCORE_DIAMONDRAPIDS,
+ INTEL_BIGCORE_WILDCATLAKE,
/* Mixed (bigcore + atom SOC). */
INTEL_MIXED_LAKEFIELD,
@@ -705,6 +706,8 @@ intel_get_fam6_microarch (unsigned int model,
return INTEL_BIGCORE_ARROWLAKE;
case 0xCC:
return INTEL_BIGCORE_PANTHERLAKE;
+ case 0xD5:
+ return INTEL_BIGCORE_WILDCATLAKE;
case 0xAD:
case 0xAE:
return INTEL_BIGCORE_GRANITERAPIDS;
@@ -937,6 +940,7 @@ disable_tsx:
case INTEL_BIGCORE_LUNARLAKE:
case INTEL_BIGCORE_ARROWLAKE:
case INTEL_BIGCORE_PANTHERLAKE:
+ case INTEL_BIGCORE_WILDCATLAKE:
case INTEL_BIGCORE_SAPPHIRERAPIDS:
case INTEL_BIGCORE_EMERALDRAPIDS:
case INTEL_BIGCORE_GRANITERAPIDS:

View File

@ -0,0 +1,50 @@
commit 58cbbd43fe82910cf8ae9008351b0b0665104500
Author: Sunil K Pandey <sunil.k.pandey@intel.com>
Date: Wed Sep 24 09:38:17 2025 -0700
x86: Detect Intel Nova Lake Processor
Detect Intel Nova Lake Processor and tune it similar to Intel Panther
Lake. https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit a114e29ddd530962d2b44aa9d89f1f6075abe7fa)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index c00502328fbcb839..f87d6d354924f3d9 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -547,6 +547,7 @@ enum intel_microarch
INTEL_BIGCORE_GRANITERAPIDS,
INTEL_BIGCORE_DIAMONDRAPIDS,
INTEL_BIGCORE_WILDCATLAKE,
+ INTEL_BIGCORE_NOVALAKE,
/* Mixed (bigcore + atom SOC). */
INTEL_MIXED_LAKEFIELD,
@@ -824,6 +825,17 @@ disable_tsx:
break;
}
}
+ else if (family == 18)
+ switch (model)
+ {
+ case 0x01:
+ case 0x03:
+ microarch = INTEL_BIGCORE_NOVALAKE;
+ break;
+
+ default:
+ break;
+ }
else if (family == 19)
switch (model)
{
@@ -941,6 +953,7 @@ disable_tsx:
case INTEL_BIGCORE_ARROWLAKE:
case INTEL_BIGCORE_PANTHERLAKE:
case INTEL_BIGCORE_WILDCATLAKE:
+ case INTEL_BIGCORE_NOVALAKE:
case INTEL_BIGCORE_SAPPHIRERAPIDS:
case INTEL_BIGCORE_EMERALDRAPIDS:
case INTEL_BIGCORE_GRANITERAPIDS:

View File

@ -0,0 +1,47 @@
commit 323ad087a1b275bbc88cf824b873681ec667da0f
Author: Jiamei Xie <xiejiamei@hygon.cn>
Date: Tue Oct 14 20:14:11 2025 +0800
x86: fix wmemset ifunc stray '!' (bug 33542)
The ifunc selector for wmemset had a stray '!' in the
X86_ISA_CPU_FEATURES_ARCH_P(...) check:
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, !))
This effectively negated the predicate and caused the AVX2/AVX512
paths to be skipped, making the dispatcher fall back to the SSE2
implementation even on CPUs where AVX2/AVX512 are available. The
regression leads to noticeable throughput loss for wmemset.
Remove the stray '!' so the AVX_Fast_Unaligned_Load capability is
tested as intended and the correct AVX2/EVEX variants are selected.
Impact:
- On AVX2/AVX512-capable x86_64, wmemset no longer incorrectly
falls back to SSE2; perf now shows __wmemset_evex/avx2 variants.
Testing:
- benchtests/bench-wmemset shows improved bandwidth across sizes.
- perf confirm the selected symbol is no longer SSE2.
Signed-off-by: xiejiamei <xiejiamei@hygon.com>
Signed-off-by: Li jing <lijing@hygon.cn>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
(cherry picked from commit 4d86b6cdd8132e0410347e07262239750f86dfb4)
diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
index 6e20d9dd5a22a52e..727b69f95b0ebad0 100644
--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h
+++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
@@ -35,7 +35,7 @@ IFUNC_SELECTOR (void)
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
- AVX_Fast_Unaligned_Load, !))
+ AVX_Fast_Unaligned_Load,))
{
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
{

View File

@ -0,0 +1,36 @@
commit c74d59a656f8f98ee48b501a0d0653f4d9b3b59d
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Nov 6 11:14:53 2025 +0100
aarch64: Do not link conform tests with -Wl,-z,force-bti (bug 33601)
If the toolchain does not default to generate BTI markers in GCC,
the main program for conform runtime tests will not have the
BTI marker that -Wl,-z,force-bti requires. Without -Wl,-z,force-bti,
the link editor will not tell the dynamic linker to enable BTI,
and the missing BTI marker is harmless.
Reviewed-by: Yury Khrustalev <yury.khrustalev@arm.com>
(cherry picked from commit 75b6b263e928eaca01d836f6bb8b539346b6bb2d)
diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile
index 141d7d9cc2bbbce7..eb34f4541e9710ba 100644
--- a/sysdeps/aarch64/Makefile
+++ b/sysdeps/aarch64/Makefile
@@ -2,11 +2,15 @@ long-double-fcts = yes
ifeq (yes,$(aarch64-bti))
# Mark linker output BTI compatible, it warns on non-BTI inputs.
+# Do not do this for conform tests because they may not be compiled
+# with the appropriate compiler flags.
+ifneq ($(subdir),conform)
sysdep-LDFLAGS += -Wl,-z,force-bti
# Make warnings fatal outside the test system.
LDFLAGS-lib.so += -Wl,--fatal-warnings
LDFLAGS-rtld += -Wl,-z,force-bti,--fatal-warnings
-endif
+endif # $(subdir) != conform
+endif # $(aarch64-bit)
ifeq ($(subdir),elf)
sysdep-dl-routines += dl-bti

View File

@ -0,0 +1,36 @@
commit 38942a336bfb29c0f174b8a627fc84be092692db
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Fri Jul 19 11:23:15 2024 -0300
linux: Also check pkey_get for ENOSYS on tst-pkey (BZ 31996)
The powerpc pkey_get/pkey_set support was only added for 64-bit [1],
and tst-pkey only checks if the support was present with pkey_alloc
(which does not fail on powerpc32, at least running a 64-bit kernel).
Checked on powerpc-linux-gnu.
[1] https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=a803367bab167f5ec4fde1f0d0ec447707c29520
Reviewed-By: Andreas K. Huettel <dilfridge@gentoo.org>
(cherry picked from commit 6b7e2e1d6139b1fb61b911ab897a956042bf7f89)
diff --git a/sysdeps/unix/sysv/linux/tst-pkey.c b/sysdeps/unix/sysv/linux/tst-pkey.c
index d9083daab9874a96..46f55666dad2733b 100644
--- a/sysdeps/unix/sysv/linux/tst-pkey.c
+++ b/sysdeps/unix/sysv/linux/tst-pkey.c
@@ -205,7 +205,13 @@ do_test (void)
" protection keys");
FAIL_EXIT1 ("pkey_alloc: %m");
}
- TEST_COMPARE (pkey_get (keys[0]), 0);
+ if (pkey_get (keys[0]) < 0)
+ {
+ if (errno == ENOSYS)
+ FAIL_UNSUPPORTED
+ ("glibc does not support memory protection keys");
+ FAIL_EXIT1 ("pkey_alloc: %m");
+ }
for (int i = 1; i < key_count; ++i)
{
keys[i] = pkey_alloc (0, i);

View File

@ -0,0 +1,48 @@
commit 1b3bd9a9a6913c5a17cd2aca9b897406405a95d2
Author: Yury Khrustalev <yury.khrustalev@arm.com>
Date: Mon Apr 28 15:12:04 2025 +0100
aarch64: Disable ZA state of SME in setjmp and sigsetjmp
Due to the nature of the ZA state, setjmp() should clear it in the
same manner as it is already done by longjmp.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
(cherry picked from commit a7f6fd976c17b82dc198290b4ab7087f35855a0e)
(cherry picked from commit 1f57ffdf35334ab245544cbb88f3abf2e6c77c54)
(cherry picked from commit cdc1665bb188319b5f16ee05c04de7f2ba580e27)
diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S
index 92dc34e3e9a2650c..b2addd44ed7604b7 100644
--- a/sysdeps/aarch64/setjmp.S
+++ b/sysdeps/aarch64/setjmp.S
@@ -35,6 +35,29 @@ libc_hidden_def (_setjmp)
ENTRY (__sigsetjmp)
1:
+
+#if IS_IN(libc)
+ /* Disable ZA state of SME in libc.a and libc.so, but not in ld.so. */
+# if HAVE_AARCH64_PAC_RET
+ PACIASP
+ cfi_window_save
+# endif
+ stp x29, x30, [sp, -16]!
+ cfi_adjust_cfa_offset (16)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+ mov x29, sp
+ bl __libc_arm_za_disable
+ ldp x29, x30, [sp], 16
+ cfi_adjust_cfa_offset (-16)
+ cfi_restore (x29)
+ cfi_restore (x30)
+# if HAVE_AARCH64_PAC_RET
+ AUTIASP
+ cfi_window_save
+# endif
+#endif
+
stp x19, x20, [x0, #JB_X19<<3]
stp x21, x22, [x0, #JB_X21<<3]
stp x23, x24, [x0, #JB_X23<<3]

View File

@ -0,0 +1,361 @@
commit 58cf4aa42100f869a2cd9dda0604d0a03d252ad6
Author: Yury Khrustalev <yury.khrustalev@arm.com>
Date: Tue Apr 29 13:44:11 2025 +0100
aarch64: update tests for SME
Add test that checks that ZA state is disabled after setjmp and sigsetjmp
Update existing SME test that uses setjmp
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
(cherry picked from commit 251f93262483b9c1184f5b72993d77a5d1c95f68)
(cherry picked from commit 97076e0cf14c635ef1d4ce1241e41c2c497533c8)
(cherry picked from commit 51bcc73d95051e74512bb32fe17096b3db329cf3)
diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile
index eb34f4541e9710ba..62ae97fab07d54db 100644
--- a/sysdeps/aarch64/Makefile
+++ b/sysdeps/aarch64/Makefile
@@ -74,7 +74,9 @@ sysdep_routines += \
__arm_za_disable
tests += \
- tst-sme-jmp
+ tst-sme-jmp \
+ tst-sme-za-state \
+ # tests
endif
ifeq ($(subdir),malloc)
diff --git a/sysdeps/aarch64/tst-sme-helper.h b/sysdeps/aarch64/tst-sme-helper.h
new file mode 100644
index 0000000000000000..f049416c2bd6d377
--- /dev/null
+++ b/sysdeps/aarch64/tst-sme-helper.h
@@ -0,0 +1,97 @@
+/* Utility functions for SME tests.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Streaming SVE vector register size. */
+static unsigned long svl;
+
+struct blk {
+ void *za_save_buffer;
+ uint16_t num_za_save_slices;
+ char __reserved[6];
+};
+
+/* Read SVCR to get SM (bit0) and ZA (bit1) state. */
+static unsigned long
+get_svcr (void)
+{
+ register unsigned long x0 asm ("x0");
+ asm volatile (
+ ".inst 0xd53b4240 /* mrs x0, svcr */\n"
+ : "=r" (x0));
+ return x0;
+}
+
+/* Returns tpidr2. */
+static void *
+get_tpidr2 (void)
+{
+ register unsigned long x0 asm ("x0");
+ asm volatile (
+ ".inst 0xd53bd0a0 /* mrs x0, tpidr2_el0 */\n"
+ : "=r"(x0) :: "memory");
+ return (void *) x0;
+}
+
+/* Obtains current streaming SVE vector register size. */
+static unsigned long
+get_svl (void)
+{
+ register unsigned long x0 asm ("x0");
+ asm volatile (
+ ".inst 0x04bf5820 /* rdsvl x0, 1 */\n"
+ : "=r" (x0));
+ return x0;
+}
+
+/* PSTATE.ZA = 1, set ZA state to active. */
+static void
+start_za (void)
+{
+ asm volatile (
+ ".inst 0xd503457f /* smstart za */");
+}
+
+/* Load data into ZA byte by byte from p. */
+static void __attribute__ ((noinline))
+load_za (const void *p)
+{
+ register unsigned long x15 asm ("x15") = 0;
+ register unsigned long x16 asm ("x16") = (unsigned long)p;
+ register unsigned long x17 asm ("x17") = svl;
+
+ asm volatile (
+ ".inst 0xd503437f /* smstart sm */\n"
+ ".L_ldr_loop:\n"
+ ".inst 0xe1006200 /* ldr za[w15, 0], [x16] */\n"
+ "add w15, w15, 1\n"
+ ".inst 0x04305030 /* addvl x16, x16, 1 */\n"
+ "cmp w15, w17\n"
+ "bne .L_ldr_loop\n"
+ ".inst 0xd503427f /* smstop sm */\n"
+ : "+r"(x15), "+r"(x16), "+r"(x17));
+}
+
+/* Set tpidr2 to BLK. */
+static void
+set_tpidr2 (struct blk *blk)
+{
+ register unsigned long x0 asm ("x0") = (unsigned long)blk;
+ asm volatile (
+ ".inst 0xd51bd0a0 /* msr tpidr2_el0, x0 */\n"
+ :: "r"(x0) : "memory");
+}
diff --git a/sysdeps/aarch64/tst-sme-jmp.c b/sysdeps/aarch64/tst-sme-jmp.c
index 35769340e700e6a5..b9638c6a71f22cd2 100644
--- a/sysdeps/aarch64/tst-sme-jmp.c
+++ b/sysdeps/aarch64/tst-sme-jmp.c
@@ -27,87 +27,12 @@
#include <support/support.h>
#include <support/test-driver.h>
-struct blk {
- void *za_save_buffer;
- uint16_t num_za_save_slices;
- char __reserved[6];
-};
+#include "tst-sme-helper.h"
-static unsigned long svl;
static uint8_t *za_orig;
static uint8_t *za_dump;
static uint8_t *za_save;
-static unsigned long
-get_svl (void)
-{
- register unsigned long x0 asm ("x0");
- asm volatile (
- ".inst 0x04bf5820 /* rdsvl x0, 1 */\n"
- : "=r" (x0));
- return x0;
-}
-
-/* PSTATE.ZA = 1, set ZA state to active. */
-static void
-start_za (void)
-{
- asm volatile (
- ".inst 0xd503457f /* smstart za */");
-}
-
-/* Read SVCR to get SM (bit0) and ZA (bit1) state. */
-static unsigned long
-get_svcr (void)
-{
- register unsigned long x0 asm ("x0");
- asm volatile (
- ".inst 0xd53b4240 /* mrs x0, svcr */\n"
- : "=r" (x0));
- return x0;
-}
-
-/* Load data into ZA byte by byte from p. */
-static void __attribute__ ((noinline))
-load_za (const void *p)
-{
- register unsigned long x15 asm ("x15") = 0;
- register unsigned long x16 asm ("x16") = (unsigned long)p;
- register unsigned long x17 asm ("x17") = svl;
-
- asm volatile (
- ".inst 0xd503437f /* smstart sm */\n"
- ".L_ldr_loop:\n"
- ".inst 0xe1006200 /* ldr za[w15, 0], [x16] */\n"
- "add w15, w15, 1\n"
- ".inst 0x04305030 /* addvl x16, x16, 1 */\n"
- "cmp w15, w17\n"
- "bne .L_ldr_loop\n"
- ".inst 0xd503427f /* smstop sm */\n"
- : "+r"(x15), "+r"(x16), "+r"(x17));
-}
-
-/* Set tpidr2 to BLK. */
-static void
-set_tpidr2 (struct blk *blk)
-{
- register unsigned long x0 asm ("x0") = (unsigned long)blk;
- asm volatile (
- ".inst 0xd51bd0a0 /* msr tpidr2_el0, x0 */\n"
- :: "r"(x0) : "memory");
-}
-
-/* Returns tpidr2. */
-static void *
-get_tpidr2 (void)
-{
- register unsigned long x0 asm ("x0");
- asm volatile (
- ".inst 0xd53bd0a0 /* mrs x0, tpidr2_el0 */\n"
- : "=r"(x0) :: "memory");
- return (void *) x0;
-}
-
static void
print_data(const char *msg, void *p)
{
@@ -168,8 +93,8 @@ longjmp_test (void)
{
p = get_tpidr2 ();
printf ("before longjmp: tp2 = %p\n", p);
- if (p != &blk)
- FAIL_EXIT1 ("tpidr2 is clobbered");
+ if (p != NULL)
+ FAIL_EXIT1 ("tpidr2 has not been reset to null");
do_longjmp (env);
FAIL_EXIT1 ("longjmp returned");
}
diff --git a/sysdeps/aarch64/tst-sme-za-state.c b/sysdeps/aarch64/tst-sme-za-state.c
new file mode 100644
index 0000000000000000..63f6eebeb4f2aea7
--- /dev/null
+++ b/sysdeps/aarch64/tst-sme-za-state.c
@@ -0,0 +1,119 @@
+/* Test for SME ZA state being cleared on setjmp and longjmp.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/auxv.h>
+
+#include <support/check.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+
+#include "tst-sme-helper.h"
+
+static uint8_t *state;
+
+static void
+enable_sme_za_state (struct blk *ptr)
+{
+ set_tpidr2 (ptr);
+ start_za ();
+ load_za (state);
+}
+
+static void
+check_sme_za_state (const char msg[], bool clear)
+{
+ unsigned long svcr = get_svcr ();
+ void *tpidr2 = get_tpidr2 ();
+ printf ("[%s]\n", msg);
+ printf ("svcr = %016lx\n", svcr);
+ printf ("tpidr2 = %016lx\n", (unsigned long)tpidr2);
+ if (clear)
+ {
+ TEST_VERIFY (svcr == 0);
+ TEST_VERIFY (tpidr2 == NULL);
+ }
+ else
+ {
+ TEST_VERIFY (svcr != 0);
+ TEST_VERIFY (tpidr2 != NULL);
+ }
+}
+
+static void
+run (struct blk *ptr)
+{
+ jmp_buf buf;
+ int ret;
+
+ check_sme_za_state ("initial state", /* Clear. */ true);
+
+ /* Enabled ZA state so that effect of disabling be observable. */
+ enable_sme_za_state (ptr);
+ check_sme_za_state ("before setjmp", /* Clear. */ false);
+
+ if ((ret = setjmp (buf)) == 0)
+ {
+ check_sme_za_state ("after setjmp", /* Clear. */ true);
+
+ /* Enabled ZA state so that effect of disabling be observable. */
+ enable_sme_za_state (ptr);
+ check_sme_za_state ("before longjmp", /* Clear. */ false);
+
+ longjmp (buf, 42);
+
+ /* Unreachable. */
+ TEST_VERIFY (false);
+ __builtin_unreachable ();
+ }
+
+ TEST_COMPARE (ret, 42);
+ check_sme_za_state ("after longjmp", /* Clear. */ true);
+}
+
+static int
+do_test (void)
+{
+ unsigned long hwcap2 = getauxval (AT_HWCAP2);
+ if ((hwcap2 & HWCAP2_SME) == 0)
+ return EXIT_UNSUPPORTED;
+
+ /* Get current streaming SVE vector register size. */
+ svl = get_svl ();
+ printf ("svl: %lu\n", svl);
+ TEST_VERIFY_EXIT (!(svl < 16 || svl % 16 != 0 || svl >= (1 << 16)));
+
+ /* Initialise buffer for ZA state of SME. */
+ state = xmalloc (svl * svl);
+ memset (state, 1, svl * svl);
+ struct blk blk = {
+ .za_save_buffer = state,
+ .num_za_save_slices = svl,
+ .__reserved = {0},
+ };
+
+ run (&blk);
+
+ free (state);
+ return 0;
+}
+
+#include <support/test-driver.c>

View File

@ -0,0 +1,137 @@
commit dbe1904b7c0da4a2e9a548a8826a46d277ff8cca
Author: Yury Khrustalev <yury.khrustalev@arm.com>
Date: Tue Oct 21 13:48:34 2025 +0100
aarch64: define macro for calling __libc_arm_za_disable
A common sequence of instructions is used in several places
in assembly files, so define it in one place as an assembly
macro.
Note that PAC instructions are not included in the new macro
because they are redundant given how we call the arm_za_disable
function (return address is not saved on stack, so no need to
sign it).
(based on commits 6de12fc9ad56bc19fa6fcbd8ee502f29b5170d47
and c0f0db2d59e0908057205b22b21dd9d626d780c1)
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
(cherry picked from commit 1a0ee267147d002d66af29bcf3f5002d19b3c75a)
(cherry picked from commit 7af8db46d2493521cb8e0c13907f601a61236ebf)
diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S
index 30b36cb25d921795..312651c9bc3bed41 100644
--- a/sysdeps/aarch64/__longjmp.S
+++ b/sysdeps/aarch64/__longjmp.S
@@ -27,24 +27,7 @@ ENTRY (__longjmp)
#if IS_IN(libc)
/* Disable ZA state of SME in libc.a and libc.so, but not in ld.so. */
-# if HAVE_AARCH64_PAC_RET
- PACIASP
- cfi_window_save
-# endif
- stp x29, x30, [sp, -16]!
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
- mov x29, sp
- bl __libc_arm_za_disable
- ldp x29, x30, [sp], 16
- cfi_adjust_cfa_offset (-16)
- cfi_restore (x29)
- cfi_restore (x30)
-# if HAVE_AARCH64_PAC_RET
- AUTIASP
- cfi_window_save
-# endif
+ CALL_LIBC_ARM_ZA_DISABLE
#endif
cfi_def_cfa (x0, 0)
diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S
index b2addd44ed7604b7..7d069e91efa00563 100644
--- a/sysdeps/aarch64/setjmp.S
+++ b/sysdeps/aarch64/setjmp.S
@@ -38,24 +38,7 @@ ENTRY (__sigsetjmp)
#if IS_IN(libc)
/* Disable ZA state of SME in libc.a and libc.so, but not in ld.so. */
-# if HAVE_AARCH64_PAC_RET
- PACIASP
- cfi_window_save
-# endif
- stp x29, x30, [sp, -16]!
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
- mov x29, sp
- bl __libc_arm_za_disable
- ldp x29, x30, [sp], 16
- cfi_adjust_cfa_offset (-16)
- cfi_restore (x29)
- cfi_restore (x30)
-# if HAVE_AARCH64_PAC_RET
- AUTIASP
- cfi_window_save
-# endif
+ CALL_LIBC_ARM_ZA_DISABLE
#endif
stp x19, x20, [x0, #JB_X19<<3]
diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
index 8c072781cdf98c2b..81ed2b92423a52d0 100644
--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
@@ -48,25 +48,8 @@ ENTRY (__setcontext)
cbz x0, 1f
b C_SYMBOL_NAME (__syscall_error)
1:
- /* Disable ZA of SME. */
-#if HAVE_AARCH64_PAC_RET
- PACIASP
- cfi_window_save
-#endif
- stp x29, x30, [sp, -16]!
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
- mov x29, sp
- bl __libc_arm_za_disable
- ldp x29, x30, [sp], 16
- cfi_adjust_cfa_offset (-16)
- cfi_restore (x29)
- cfi_restore (x30)
-#if HAVE_AARCH64_PAC_RET
- AUTIASP
- cfi_window_save
-#endif
+ /* Clear ZA state of SME. */
+ CALL_LIBC_ARM_ZA_DISABLE
/* Restore the general purpose registers. */
mov x0, x9
cfi_def_cfa (x0, 0)
diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
index 974b503b2f93511d..2f039015190d7b24 100644
--- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
@@ -150,6 +150,18 @@
mov x8, SYS_ify (syscall_name); \
svc 0
+/* Clear ZA state of SME (ASM version). */
+/* The __libc_arm_za_disable function has special calling convention
+ that allows to call it without stack manipulation and preserving
+ most of the registers. */
+ .macro CALL_LIBC_ARM_ZA_DISABLE
+ mov x13, x30
+ .cfi_register x30, x13
+ bl __libc_arm_za_disable
+ mov x30, x13
+ .cfi_register x13, x30
+ .endm
+
#else /* not __ASSEMBLER__ */
# ifdef __LP64__

View File

@ -0,0 +1,221 @@
commit d1d0d09e9e5e086d3de9217a9572b634ea74857a
Author: Yury Khrustalev <yury.khrustalev@arm.com>
Date: Thu Sep 25 15:54:36 2025 +0100
aarch64: clear ZA state of SME before clone and clone3 syscalls
This change adds a call to the __arm_za_disable() function immediately
before the SVC instruction inside clone() and clone3() wrappers. It also
adds a macro for inline clone() used in fork() and adds the same call to
the vfork implementation. This sets the ZA state of SME to "off" on return
from these functions (for both the child and the parent).
The __arm_za_disable() function is described in [1] (8.1.3). Note that
the internal Glibc name for this function is __libc_arm_za_disable().
When this change was originally proposed [2,3], it generated a long
discussion where several questions and concerns were raised. Here we
will address these concerns and explain why this change is useful and,
in fact, necessary.
In a nutshell, a C library that conforms to the AAPCS64 spec [1] (pertinent
to this change, mainly, the chapters 6.2 and 6.6), should have a call to the
__arm_za_disable() function in clone() and clone3() wrappers. The following
explains in detail why this is the case.
When we consider using the __arm_za_disable() function inside the clone()
and clone3() libc wrappers, we talk about the C library subroutines clone()
and clone3() rather than the syscalls with similar names. In the current
version of Glibc, clone() is public and clone3() is private, but it being
private is not pertinent to this discussion.
We will begin with stating that this change is NOT a bug fix for something
in the kernel. The requirement to call __arm_za_disable() does NOT come from
the kernel. It also is NOT needed to satisfy a contract between the kernel
and userspace. This is why it is not for the kernel documentation to describe
this requirement. This requirement is instead needed to satisfy a pure userspace
scheme outlined in [1] and to make sure that software that uses Glibc (or any
other C library that has correct handling of SME states (see below)) conforms
to [1] without having to unnecessarily become SME-aware thus losing portability.
To recap (see [1] (6.2)), SME extension defines SME state which is part of
processor state. Part of this SME state is ZA state that is necessary to
manage ZA storage register in the context of the ZA lazy saving scheme [1]
(6.6). This scheme exists because it would be challenging to handle ZA
storage of SME in either callee-saved or caller-saved manner.
There are 3 kinds of ZA state that are defined in terms of the PSTATE.ZA
bit and the TPIDR2_EL0 register (see [1] (6.6.3)):
- "off":       PSTATE.ZA == 0
- "active":    PSTATE.ZA == 1 TPIDR2_EL0 == null
- "dormant":   PSTATE.ZA == 1 TPIDR2_EL0 != null
As [1] (6.7.2) outlines, every subroutine has exactly one SME-interface
depending on the permitted ZA-states on entry and on normal return from
a call to this subroutine. Callers of a subroutine must know and respect
the ZA-interface of the subroutines they are using. Using a subroutine
in a way that is not permitted by its ZA-interface is undefined behaviour.
In particular, clone() and clone3() (the C library functions) have the
ZA-private interface. This means that the permitted ZA-states on entry
are "off" and "dormant" and that the permitted states on return are "off"
or "dormant" (but if and only if it was "dormant" on entry).
This means that both functions in question should correctly handle both
"off" and "dormant" ZA-states on entry. The conforming states on return
are "off" and "dormant" (if inbound state was already "dormant").
This change ensures that the ZA-state on return is always "off". Note,
that, in the context of clone() and clone3(), "on return" means a point
when execution resumes at certain address after transferring from clone()
or clone3(). For the caller (we may refer to it as "parent") this is the
return address in the link register where the RET instruction jumps. For
the "child", this is the target branch address.
So, the "off" state on return is permitted and conformant. Why can't we
retain the "dormant" state? In theory, we can, but we shouldn't, here is
why.
Every subroutine with a private-ZA interface, including clone() and clone3(),
must comply with the lazy saving scheme [1] (6.7.2). This puts additional
responsibility on a subroutine if ZA-state on return is "dormant" because
this state has special meaning. The "caller" (that is the place in code
where execution is transferred to, so this include both "parent" and "child")
may check the ZA-state and use it as per the spec of the "dormant" state that
is outlined in [1] (6.6.6 and 6.6.7).
Conforming to this would require more code inside of clone() and clone3()
which hardly is desirable.
For the return to "parent" this could be achieved in theory, but given that
neither clone() nor clone3() are supposed to be used in the middle of an
SME operation, if wouldn't be useful. For the "return" to "child" this
would be particularly difficult to achieve given the complexity of these
functions and their interfaces. Most importantly, it would be illegal
and somewhat meaningless to allow a "child" to start execution in the
"dormant" ZA-state because the very essence of the "dormant" state implies
that there is a place to return and that there is some outer context that
we are allowed to interact with.
To sum up, calling __arm_za_disable() to ensure the "off" ZA-state when the
execution resumes after a call to clone() or clone3() is correct and also
the most simple way to conform to [1].
Can there be situations when we can avoid calling __arm_za_disable()?
Calling __arm_za_disable() implies certain (sufficiently small) overhead,
so one might rightly ponder avoiding making a call to this function when
we can afford not to. The most trivial cases like this (e.g. when the
calling thread doesn't have access to SME or to the TPIDR2_EL0 register)
are already handled by this function (see [1] (8.1.3 and 8.1.2)). Reasoning
about other possible use cases would require making code inside clone() and
clone3() more complicated and it would defeat the point of trying to make
an optimisation of not calling __arm_za_disable().
Why can't the kernel do this instead?
The handling of SME state by the kernel is described in [4]. In short,
kernel must not impose a specific ZA-interface onto a userspace function.
Interaction with the kernel happens (among other thing) via system calls.
In Glibc many of the system calls (notably, including SYS_clone and
SYS_clone3) are used via wrappers, and the kernel has no control of them
and, moreover, it cannot dictate how these wrappers should behave because
it is simply outside of the kernel's remit.
However, in certain cases, the kernel may ensure that a "child" doesn't
start in an incorrect state. This is what is done by the recent change
included in 6.16 kernel [5]. This is not enough to ensure that code that
uses clone() and clone3() function conforms to [1] when it runs on a
system that provides SME, hence this change.
[1]: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst
[2]: https://inbox.sourceware.org/libc-alpha/20250522114828.2291047-1-yury.khrustalev@arm.com
[3]: https://inbox.sourceware.org/libc-alpha/20250609121407.3316070-1-yury.khrustalev@arm.com
[4]: https://www.kernel.org/doc/html/v6.16/arch/arm64/sme.html
[5]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cde5c32db55740659fca6d56c09b88800d88fd29
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
(cherry picked from commit 27effb3d50424fb9634be77a2acd614b0386ff25)
(cherry picked from commit 256030b9842a10b1f22851b1de0c119761417544)
(cherry picked from commit 889ae4bdbb4a6fbf37c2303da8cdae3d18880d9e)
(cherry picked from commit 899ebf35691f01c357fc582b3e88db87accb5ee1)
diff --git a/sysdeps/unix/sysv/linux/aarch64/clone.S b/sysdeps/unix/sysv/linux/aarch64/clone.S
index fed19acc2f78351f..585f312a3f7319ed 100644
--- a/sysdeps/unix/sysv/linux/aarch64/clone.S
+++ b/sysdeps/unix/sysv/linux/aarch64/clone.S
@@ -45,6 +45,9 @@ ENTRY(__clone)
and x1, x1, -16
cbz x1, .Lsyscall_error
+ /* Clear ZA state of SME. */
+ CALL_LIBC_ARM_ZA_DISABLE
+
/* Do the system call. */
/* X0:flags, x1:newsp, x2:parenttidptr, x3:newtls, x4:childtid. */
mov x0, x2 /* flags */
diff --git a/sysdeps/unix/sysv/linux/aarch64/clone3.S b/sysdeps/unix/sysv/linux/aarch64/clone3.S
index 9b00b6b8853e9b8b..ec6874830ae98676 100644
--- a/sysdeps/unix/sysv/linux/aarch64/clone3.S
+++ b/sysdeps/unix/sysv/linux/aarch64/clone3.S
@@ -46,6 +46,9 @@ ENTRY(__clone3)
cbz x10, .Lsyscall_error /* No NULL cl_args pointer. */
cbz x2, .Lsyscall_error /* No NULL function pointer. */
+ /* Clear ZA state of SME. */
+ CALL_LIBC_ARM_ZA_DISABLE
+
/* Do the system call, the kernel expects:
x8: system call number
x0: cl_args
diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
index 2f039015190d7b24..19e66f77202add1a 100644
--- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
@@ -247,6 +247,31 @@
#undef HAVE_INTERNAL_BRK_ADDR_SYMBOL
#define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1
+/* Clear ZA state of SME (C version). */
+/* The __libc_arm_za_disable function has special calling convention
+ that allows to call it without stack manipulation and preserving
+ most of the registers. */
+#define CALL_LIBC_ARM_ZA_DISABLE() \
+({ \
+ unsigned long int __tmp; \
+ asm volatile ( \
+ " mov %0, x30\n" \
+ " .cfi_register x30, %0\n" \
+ " bl __libc_arm_za_disable\n" \
+ " mov x30, %0\n" \
+ " .cfi_register %0, x30\n" \
+ : "=r" (__tmp) \
+ : \
+ : "x14", "x15", "x16", "x17", "x18", "memory" ); \
+})
+
+/* Do clear ZA state of SME before making normal clone syscall. */
+#define INLINE_CLONE_SYSCALL(a0, a1, a2, a3, a4) \
+({ \
+ CALL_LIBC_ARM_ZA_DISABLE (); \
+ INLINE_SYSCALL_CALL (clone, a0, a1, a2, a3, a4); \
+})
+
#endif /* __ASSEMBLER__ */
#endif /* linux/aarch64/sysdep.h */
diff --git a/sysdeps/unix/sysv/linux/aarch64/vfork.S b/sysdeps/unix/sysv/linux/aarch64/vfork.S
index e71e492da339b25a..65fa85ae895b61a1 100644
--- a/sysdeps/unix/sysv/linux/aarch64/vfork.S
+++ b/sysdeps/unix/sysv/linux/aarch64/vfork.S
@@ -27,6 +27,9 @@
ENTRY (__vfork)
+ /* Clear ZA state of SME. */
+ CALL_LIBC_ARM_ZA_DISABLE
+
mov x0, #0x4111 /* CLONE_VM | CLONE_VFORK | SIGCHLD */
mov x1, sp
DO_CALL (clone, 2)

View File

@ -0,0 +1,679 @@
commit d60f15dc890c4dbbdaa227ed88e1247136e29256
Author: Yury Khrustalev <yury.khrustalev@arm.com>
Date: Fri Sep 26 10:03:45 2025 +0100
aarch64: tests for SME
This commit adds tests for the following use cases relevant to handing of
the SME state:
- fork() and vfork()
- clone() and clone3()
- signal handler
While most cases are trivial, the case of clone3() is more complicated since
the clone3() symbol is not public in Glibc.
To avoid having to check all possible ways clone3() may be called via other
public functions (e.g. vfork() or pthread_create()), we put together a test
that links directly with clone3.o. All the existing functions that have calls
to clone3() may not actually use it, in which case the outcome of such tests
would be unexpected. Having a direct call to the clone3() symbol in the test
allows to check precisely what we need to test: that the __arm_za_disable()
function is indeed called and has the desired effect.
Linking to clone3.o also requires linking to __arm_za_disable.o that in
turn requires the _dl_hwcap2 hidden symbol which to provide in the test
and initialise it before using.
Co-authored-by: Adhemerval Zanella Netto <adhemerval.zanella@linaro.org>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
(cherry picked from commit ecb0fc2f0f839f36cd2a106283142c9df8ea8214)
(cherry picked from commit 71874f167aa5bb1538ff7e394beaacee28ebe65f)
(cherry picked from commit e4ffcf32b9213352917dcf7dc43adcaa0ff76503)
(cherry picked from commit df3bcda9ac85eb720d27bbbc469cdc898cc9b02b)
diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile
index 62ae97fab07d54db..f205884fe19a3a13 100644
--- a/sysdeps/aarch64/Makefile
+++ b/sysdeps/aarch64/Makefile
@@ -75,8 +75,18 @@ sysdep_routines += \
tests += \
tst-sme-jmp \
+ tst-sme-signal \
tst-sme-za-state \
# tests
+tests-internal += \
+ tst-sme-clone \
+ tst-sme-clone3 \
+ tst-sme-fork \
+ tst-sme-vfork \
+ # tests-internal
+
+$(objpfx)tst-sme-clone3: $(objpfx)clone3.o $(objpfx)__arm_za_disable.o
+
endif
ifeq ($(subdir),malloc)
diff --git a/sysdeps/aarch64/tst-sme-clone.c b/sysdeps/aarch64/tst-sme-clone.c
new file mode 100644
index 0000000000000000..7106ec7926fb64b8
--- /dev/null
+++ b/sysdeps/aarch64/tst-sme-clone.c
@@ -0,0 +1,53 @@
+/* Test that ZA state of SME is cleared in both parent and child
+ when clone() syscall is used.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-sme-skeleton.c"
+
+#include <support/xsched.h>
+
+static int
+fun (void * const arg)
+{
+ printf ("in child: %s\n", (const char *)arg);
+ /* Check that ZA state of SME was disabled in child. */
+ check_sme_za_state ("after clone in child", /* Clear. */ true);
+ return 0;
+}
+
+static char __attribute__((aligned(16)))
+stack[1024 * 1024];
+
+static void
+run (struct blk *ptr)
+{
+ char *syscall_name = (char *)"clone";
+ printf ("in parent: before %s\n", syscall_name);
+
+ /* Enabled ZA state so that effect of disabling be observable. */
+ enable_sme_za_state (ptr);
+ check_sme_za_state ("before clone", /* Clear. */ false);
+
+ pid_t pid = xclone (fun, syscall_name, stack, sizeof (stack),
+ CLONE_NEWUSER | CLONE_NEWNS | SIGCHLD);
+
+ /* Check that ZA state of SME was disabled in parent. */
+ check_sme_za_state ("after clone in parent", /* Clear. */ true);
+
+ TEST_VERIFY (xwaitpid (pid, NULL, 0) == pid);
+}
diff --git a/sysdeps/aarch64/tst-sme-clone3.c b/sysdeps/aarch64/tst-sme-clone3.c
new file mode 100644
index 0000000000000000..402b040cfd69acd9
--- /dev/null
+++ b/sysdeps/aarch64/tst-sme-clone3.c
@@ -0,0 +1,84 @@
+/* Test that ZA state of SME is cleared in both parent and child
+ when clone3() syscall is used.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-sme-skeleton.c"
+
+#include <clone3.h>
+
+#include <errno.h>
+#include <sys/wait.h>
+#include <support/xsched.h>
+
+/* Since clone3 is not a public symbol, we link this test explicitly
+ with clone3.o and have to provide this declaration. */
+int __clone3 (struct clone_args *cl_args, size_t size,
+ int (*func)(void *arg), void *arg);
+
+static int
+fun (void * const arg)
+{
+ printf ("in child: %s\n", (const char *)arg);
+ /* Check that ZA state of SME was disabled in child. */
+ check_sme_za_state ("after clone3 in child", /* Clear. */ true);
+ return 0;
+}
+
+static char __attribute__((aligned(16)))
+stack[1024 * 1024];
+
+/* Required by __arm_za_disable.o and provided by the startup code
+ as a hidden symbol. */
+uint64_t _dl_hwcap2;
+
+static void
+run (struct blk *ptr)
+{
+ _dl_hwcap2 = getauxval (AT_HWCAP2);
+
+ char *syscall_name = (char *)"clone3";
+ struct clone_args args = {
+ .flags = CLONE_VM | CLONE_VFORK,
+ .exit_signal = SIGCHLD,
+ .stack = (uintptr_t) stack,
+ .stack_size = sizeof (stack),
+ };
+ printf ("in parent: before %s\n", syscall_name);
+
+ /* Enabled ZA state so that effect of disabling be observable. */
+ enable_sme_za_state (ptr);
+ check_sme_za_state ("before clone3", /* Clear. */ false);
+
+ pid_t pid = __clone3 (&args, sizeof (args), fun, syscall_name);
+
+ /* Check that ZA state of SME was disabled in parent. */
+ check_sme_za_state ("after clone3 in parent", /* Clear. */ true);
+
+ printf ("%s child pid: %d\n", syscall_name, pid);
+
+ xwaitpid (pid, NULL, 0);
+ printf ("in parent: after %s\n", syscall_name);
+}
+
+/* Workaround to simplify linking with clone3.o. */
+void __syscall_error(int code)
+{
+ int err = -code;
+ fprintf (stderr, "syscall error %d (%s)\n", err, strerror (err));
+ exit (err);
+}
diff --git a/sysdeps/aarch64/tst-sme-fork.c b/sysdeps/aarch64/tst-sme-fork.c
new file mode 100644
index 0000000000000000..b003b08884f605fa
--- /dev/null
+++ b/sysdeps/aarch64/tst-sme-fork.c
@@ -0,0 +1,43 @@
+/* Test that ZA state of SME is cleared in both parent and child
+ when fork() function is used.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-sme-skeleton.c"
+
+static void
+run (struct blk *blk)
+{
+ /* Enabled ZA state so that effect of disabling be observable. */
+ enable_sme_za_state (blk);
+ check_sme_za_state ("before fork", /* Clear. */ false);
+ fflush (stdout);
+
+ pid_t pid = xfork ();
+
+ if (pid == 0)
+ {
+ /* Check that ZA state of SME was disabled in child. */
+ check_sme_za_state ("after fork in child", /* Clear. */ true);
+ exit (0);
+ }
+
+ /* Check that ZA state of SME was disabled in parent. */
+ check_sme_za_state ("after fork in parent", /* Clear. */ true);
+
+ TEST_VERIFY (xwaitpid (pid, NULL, 0) == pid);
+}
diff --git a/sysdeps/aarch64/tst-sme-helper.h b/sysdeps/aarch64/tst-sme-helper.h
index f049416c2bd6d377..ab9c503e45664c54 100644
--- a/sysdeps/aarch64/tst-sme-helper.h
+++ b/sysdeps/aarch64/tst-sme-helper.h
@@ -16,9 +16,6 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-/* Streaming SVE vector register size. */
-static unsigned long svl;
-
struct blk {
void *za_save_buffer;
uint16_t num_za_save_slices;
@@ -68,10 +65,10 @@ start_za (void)
/* Load data into ZA byte by byte from p. */
static void __attribute__ ((noinline))
-load_za (const void *p)
+load_za (const void *buf, unsigned long svl)
{
register unsigned long x15 asm ("x15") = 0;
- register unsigned long x16 asm ("x16") = (unsigned long)p;
+ register unsigned long x16 asm ("x16") = (unsigned long)buf;
register unsigned long x17 asm ("x17") = svl;
asm volatile (
diff --git a/sysdeps/aarch64/tst-sme-jmp.c b/sysdeps/aarch64/tst-sme-jmp.c
index b9638c6a71f22cd2..0c1d1a4ff8669958 100644
--- a/sysdeps/aarch64/tst-sme-jmp.c
+++ b/sysdeps/aarch64/tst-sme-jmp.c
@@ -29,6 +29,9 @@
#include "tst-sme-helper.h"
+/* Streaming SVE vector register size. */
+static unsigned long svl;
+
static uint8_t *za_orig;
static uint8_t *za_dump;
static uint8_t *za_save;
@@ -82,7 +85,7 @@ longjmp_test (void)
FAIL_EXIT1 ("svcr != 0: %lu", svcr);
set_tpidr2 (&blk);
start_za ();
- load_za (za_orig);
+ load_za (za_orig, svl);
print_data ("za save space", za_save);
p = get_tpidr2 ();
@@ -131,7 +134,7 @@ setcontext_test (void)
FAIL_EXIT1 ("svcr != 0: %lu", svcr);
set_tpidr2 (&blk);
start_za ();
- load_za (za_orig);
+ load_za (za_orig, svl);
print_data ("za save space", za_save);
p = get_tpidr2 ();
diff --git a/sysdeps/aarch64/tst-sme-signal.c b/sysdeps/aarch64/tst-sme-signal.c
new file mode 100644
index 0000000000000000..b4b07bcc4424cd15
--- /dev/null
+++ b/sysdeps/aarch64/tst-sme-signal.c
@@ -0,0 +1,115 @@
+/* Test handling of SME state in a signal handler.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-sme-skeleton.c"
+
+#include <support/xsignal.h>
+
+static struct _aarch64_ctx *
+extension (void *p)
+{
+ return p;
+}
+
+#ifndef TPIDR2_MAGIC
+#define TPIDR2_MAGIC 0x54504902
+#endif
+
+#ifndef ZA_MAGIC
+#define ZA_MAGIC 0x54366345
+#endif
+
+#ifndef ZT_MAGIC
+#define ZT_MAGIC 0x5a544e01
+#endif
+
+#ifndef EXTRA_MAGIC
+#define EXTRA_MAGIC 0x45585401
+#endif
+
+/* We use a pipe to make sure that the final check of the SME state
+ happens after signal handler finished. */
+static int pipefd[2];
+
+#define WRITE(msg) xwrite (1, msg, sizeof (msg));
+
+static void
+handler (int signo, siginfo_t *si, void *ctx)
+{
+ TEST_VERIFY (signo == SIGUSR1);
+ WRITE ("in the handler\n");
+ check_sme_za_state ("during signal", true /* State is clear. */);
+ ucontext_t *uc = ctx;
+ void *p = uc->uc_mcontext.__reserved;
+ unsigned int found = 0;
+ uint32_t m;
+ while ((m = extension (p)->magic))
+ {
+ if (m == TPIDR2_MAGIC)
+ {
+ WRITE ("found TPIDR2_MAGIC\n");
+ found += 1;
+ }
+ if (m == ZA_MAGIC)
+ {
+ WRITE ("found ZA_MAGIC\n");
+ found += 1;
+ }
+ if (m == ZT_MAGIC)
+ {
+ WRITE ("found ZT_MAGIC\n");
+ found += 1;
+ }
+ if (m == EXTRA_MAGIC)
+ {
+ WRITE ("found EXTRA_MAGIC\n");
+ struct { struct _aarch64_ctx h; uint64_t data; } *e = p;
+ p = (char *)e->data;
+ continue;
+ }
+ p = (char *)p + extension (p)->size;
+ }
+ TEST_COMPARE (found, 3);
+
+ /* Signal that the wait is over (see below). */
+ char message = '\0';
+ xwrite (pipefd[1], &message, 1);
+}
+
+static void
+run (struct blk *blk)
+{
+ xpipe (pipefd);
+
+ struct sigaction sigact;
+ sigemptyset (&sigact.sa_mask);
+ sigact.sa_flags = 0;
+ sigact.sa_flags |= SA_SIGINFO;
+ sigact.sa_sigaction = handler;
+ xsigaction (SIGUSR1, &sigact, NULL);
+
+ enable_sme_za_state (blk);
+ check_sme_za_state ("before signal", false /* State is not clear. */);
+ xraise (SIGUSR1);
+
+ /* Wait for signal handler to complete. */
+ char response;
+ xread (pipefd[0], &response, 1);
+
+ check_sme_za_state ("after signal", false /* State is not clear. */);
+}
diff --git a/sysdeps/aarch64/tst-sme-skeleton.c b/sysdeps/aarch64/tst-sme-skeleton.c
new file mode 100644
index 0000000000000000..ba84dda1cbd8d523
--- /dev/null
+++ b/sysdeps/aarch64/tst-sme-skeleton.c
@@ -0,0 +1,101 @@
+/* Template for SME tests.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/auxv.h>
+
+#include <support/check.h>
+#include <support/support.h>
+#include <support/xstdlib.h>
+#include <support/xunistd.h>
+#include <support/test-driver.h>
+
+#include "tst-sme-helper.h"
+
+/* Streaming SVE vector register size. */
+static unsigned long svl;
+
+static uint8_t *state;
+
+static void
+enable_sme_za_state (struct blk *blk)
+{
+ start_za ();
+ set_tpidr2 (blk);
+ load_za (blk, svl);
+}
+
+/* Check if SME state is disabled (when CLEAR is true) or
+ enabled (when CLEAR is false). */
+static void
+check_sme_za_state (const char msg[], bool clear)
+{
+ unsigned long svcr = get_svcr ();
+ void *tpidr2 = get_tpidr2 ();
+ printf ("[%s]\n", msg);
+ printf ("svcr = %016lx\n", svcr);
+ printf ("tpidr2 = %016lx\n", (unsigned long)tpidr2);
+ if (clear)
+ {
+ TEST_VERIFY (svcr == 0);
+ TEST_VERIFY (tpidr2 == NULL);
+ }
+ else
+ {
+ TEST_VERIFY (svcr != 0);
+ TEST_VERIFY (tpidr2 != NULL);
+ }
+}
+
+/* Should be defined in actual test that includes this
+ skeleton file. */
+static void
+run (struct blk *ptr);
+
+static int
+do_test (void)
+{
+ unsigned long hwcap2 = getauxval (AT_HWCAP2);
+ if ((hwcap2 & HWCAP2_SME) == 0)
+ return EXIT_UNSUPPORTED;
+
+ /* Get current streaming SVE vector length in bytes. */
+ svl = get_svl ();
+ printf ("svl: %lu\n", svl);
+
+ TEST_VERIFY_EXIT (!(svl < 16 || svl % 16 != 0 || svl >= (1 << 16)));
+
+ /* Initialise buffer for ZA state of SME. */
+ state = xmalloc (svl * svl);
+ memset (state, 1, svl * svl);
+ struct blk blk = {
+ .za_save_buffer = state,
+ .num_za_save_slices = svl,
+ .__reserved = {0},
+ };
+
+ run (&blk);
+
+ free (state);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/aarch64/tst-sme-vfork.c b/sysdeps/aarch64/tst-sme-vfork.c
new file mode 100644
index 0000000000000000..3feea065e53625e1
--- /dev/null
+++ b/sysdeps/aarch64/tst-sme-vfork.c
@@ -0,0 +1,43 @@
+/* Test that ZA state of SME is cleared in both parent and child
+ when vfork() function is used.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-sme-skeleton.c"
+
+static void
+run (struct blk *blk)
+{
+ /* Enabled ZA state so that effect of disabling be observable. */
+ enable_sme_za_state (blk);
+ check_sme_za_state ("before vfork", /* Clear. */ false);
+ fflush (stdout);
+
+ pid_t pid = vfork ();
+
+ if (pid == 0)
+ {
+ /* Check that ZA state of SME was disabled in child. */
+ check_sme_za_state ("after vfork in child", /* Clear. */ true);
+ _exit (0);
+ }
+
+ /* Check that ZA state of SME was disabled in parent. */
+ check_sme_za_state ("after vfork in parent", /* Clear. */ true);
+
+ TEST_VERIFY (xwaitpid (pid, NULL, 0) == pid);
+}
diff --git a/sysdeps/aarch64/tst-sme-za-state.c b/sysdeps/aarch64/tst-sme-za-state.c
index 63f6eebeb4f2aea7..00118ef506683ca0 100644
--- a/sysdeps/aarch64/tst-sme-za-state.c
+++ b/sysdeps/aarch64/tst-sme-za-state.c
@@ -16,47 +16,9 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <stdio.h>
-#include <setjmp.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/auxv.h>
-
-#include <support/check.h>
-#include <support/support.h>
-#include <support/test-driver.h>
-
-#include "tst-sme-helper.h"
-
-static uint8_t *state;
-
-static void
-enable_sme_za_state (struct blk *ptr)
-{
- set_tpidr2 (ptr);
- start_za ();
- load_za (state);
-}
+#include "tst-sme-skeleton.c"
-static void
-check_sme_za_state (const char msg[], bool clear)
-{
- unsigned long svcr = get_svcr ();
- void *tpidr2 = get_tpidr2 ();
- printf ("[%s]\n", msg);
- printf ("svcr = %016lx\n", svcr);
- printf ("tpidr2 = %016lx\n", (unsigned long)tpidr2);
- if (clear)
- {
- TEST_VERIFY (svcr == 0);
- TEST_VERIFY (tpidr2 == NULL);
- }
- else
- {
- TEST_VERIFY (svcr != 0);
- TEST_VERIFY (tpidr2 != NULL);
- }
-}
+#include <setjmp.h>
static void
run (struct blk *ptr)
@@ -88,32 +50,3 @@ run (struct blk *ptr)
TEST_COMPARE (ret, 42);
check_sme_za_state ("after longjmp", /* Clear. */ true);
}
-
-static int
-do_test (void)
-{
- unsigned long hwcap2 = getauxval (AT_HWCAP2);
- if ((hwcap2 & HWCAP2_SME) == 0)
- return EXIT_UNSUPPORTED;
-
- /* Get current streaming SVE vector register size. */
- svl = get_svl ();
- printf ("svl: %lu\n", svl);
- TEST_VERIFY_EXIT (!(svl < 16 || svl % 16 != 0 || svl >= (1 << 16)));
-
- /* Initialise buffer for ZA state of SME. */
- state = xmalloc (svl * svl);
- memset (state, 1, svl * svl);
- struct blk blk = {
- .za_save_buffer = state,
- .num_za_save_slices = svl,
- .__reserved = {0},
- };
-
- run (&blk);
-
- free (state);
- return 0;
-}
-
-#include <support/test-driver.c>

View File

@ -0,0 +1,50 @@
commit c1dc4412f887ef1e3c1dbe95d9afb8f666860d3f
Author: Yury Khrustalev <yury.khrustalev@arm.com>
Date: Tue Oct 28 11:01:50 2025 +0000
aarch64: fix cfi directives around __libc_arm_za_disable
Incorrect CFI directive corrupted call stack information
and prevented debuggers from correctly displaying call
stack information.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
(cherry picked from commit 2f77aec043f61e8533487850b11941a640ae2dea)
(cherry picked from commit de1fe81f471496366580ad728b8986a3424b2fd7)
(cherry picked from commit 5bf8ee7ad559fd60bedd3f5ec831d0b12b5000b8)
(cherry picked from commit 1c0ad5ea63b2e5d39eb55f734b0e2bea7f766523)
diff --git a/sysdeps/unix/sysv/linux/aarch64/sysdep.h b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
index 19e66f77202add1a..4e70101b7cadd5fe 100644
--- a/sysdeps/unix/sysv/linux/aarch64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/aarch64/sysdep.h
@@ -155,11 +155,12 @@
that allows to call it without stack manipulation and preserving
most of the registers. */
.macro CALL_LIBC_ARM_ZA_DISABLE
+ cfi_remember_state
mov x13, x30
- .cfi_register x30, x13
+ cfi_register(x30, x13)
bl __libc_arm_za_disable
mov x30, x13
- .cfi_register x13, x30
+ cfi_restore_state
.endm
#else /* not __ASSEMBLER__ */
@@ -255,11 +256,12 @@
({ \
unsigned long int __tmp; \
asm volatile ( \
+ " .cfi_remember_state\n" \
" mov %0, x30\n" \
- " .cfi_register x30, %0\n" \
+ " .cfi_register x30, %0\n" \
" bl __libc_arm_za_disable\n" \
" mov x30, %0\n" \
- " .cfi_register %0, x30\n" \
+ " .cfi_restore_state\n" \
: "=r" (__tmp) \
: \
: "x14", "x15", "x16", "x17", "x18", "memory" ); \

View File

@ -0,0 +1,39 @@
commit 0d05a895f10ddec0d0ff63715f5744fc70471c41
Author: Yury Khrustalev <yury.khrustalev@arm.com>
Date: Tue Nov 11 11:40:25 2025 +0000
aarch64: fix includes in SME tests
Use the correct include for the SIGCHLD macro: signal.h
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
(cherry picked from commit a9c426bcca59a9e228c4fbe75e75154217ec4ada)
(cherry picked from commit 17c3eab387c3ceb6972e57888a89b1480793f81a)
(cherry picked from commit 215e9155ea06064342151d05446ae51da16e0f65)
(cherry picked from commit a66680adf3b2266c177f94f3f63e4b182e6362fe)
diff --git a/sysdeps/aarch64/tst-sme-clone.c b/sysdeps/aarch64/tst-sme-clone.c
index 7106ec7926fb64b8..b6ad54fa37213b0c 100644
--- a/sysdeps/aarch64/tst-sme-clone.c
+++ b/sysdeps/aarch64/tst-sme-clone.c
@@ -19,6 +19,7 @@
#include "tst-sme-skeleton.c"
+#include <signal.h>
#include <support/xsched.h>
static int
diff --git a/sysdeps/aarch64/tst-sme-clone3.c b/sysdeps/aarch64/tst-sme-clone3.c
index 402b040cfd69acd9..f420d5984d17fe74 100644
--- a/sysdeps/aarch64/tst-sme-clone3.c
+++ b/sysdeps/aarch64/tst-sme-clone3.c
@@ -22,7 +22,7 @@
#include <clone3.h>
#include <errno.h>
-#include <sys/wait.h>
+#include <signal.h>
#include <support/xsched.h>
/* Since clone3 is not a public symbol, we link this test explicitly

View File

@ -0,0 +1,161 @@
commit 03d03933436508e6441dde5b9bbee079deb55b17
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Thu Nov 6 15:36:03 2025 +0000
AArch64: Optimise SVE scalar callbacks
Instead of using SVE instructions to marshall special results into the
correct lane, just write the entire vector (and the predicate) to
memory, then use cheaper scalar operations.
Geomean speedup of 16% in special intervals on Neoverse with GCC 14.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
(cherry picked from commit 5b82fb18827e962af9f080fdf3c1a69802783f67)
diff --git a/sysdeps/aarch64/fpu/sv_math.h b/sysdeps/aarch64/fpu/sv_math.h
index 41a201392918118e..c2dae543a19464dd 100644
--- a/sysdeps/aarch64/fpu/sv_math.h
+++ b/sysdeps/aarch64/fpu/sv_math.h
@@ -24,11 +24,29 @@
#include "vecmath_config.h"
+#if !defined(__ARM_FEATURE_SVE_BITS) || __ARM_FEATURE_SVE_BITS == 0
+/* If not specified by -msve-vector-bits, assume maximum vector length. */
+# define SVE_VECTOR_BYTES 256
+#else
+# define SVE_VECTOR_BYTES (__ARM_FEATURE_SVE_BITS / 8)
+#endif
+#define SVE_NUM_FLTS (SVE_VECTOR_BYTES / sizeof (float))
+#define SVE_NUM_DBLS (SVE_VECTOR_BYTES / sizeof (double))
+/* Predicate is stored as one bit per byte of VL so requires VL / 64 bytes. */
+#define SVE_NUM_PG_BYTES (SVE_VECTOR_BYTES / sizeof (uint64_t))
+
#define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
#define SV_NAME_D1(fun) _ZGVsMxv_##fun
#define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
#define SV_NAME_D2(fun) _ZGVsMxvv_##fun
+static inline void
+svstr_p (uint8_t *dst, svbool_t p)
+{
+ /* Predicate STR does not currently have an intrinsic. */
+ __asm__("str %0, [%x1]\n" : : "Upa"(p), "r"(dst) : "memory");
+}
+
/* Double precision. */
static inline svint64_t
sv_s64 (int64_t x)
@@ -51,33 +69,35 @@ sv_f64 (double x)
static inline svfloat64_t
sv_call_f64 (double (*f) (double), svfloat64_t x, svfloat64_t y, svbool_t cmp)
{
- svbool_t p = svpfirst (cmp, svpfalse ());
- while (svptest_any (cmp, p))
+ double tmp[SVE_NUM_DBLS];
+ uint8_t pg_bits[SVE_NUM_PG_BYTES];
+ svstr_p (pg_bits, cmp);
+ svst1 (svptrue_b64 (), tmp, svsel (cmp, x, y));
+
+ for (int i = 0; i < svcntd (); i++)
{
- double elem = svclastb_n_f64 (p, 0, x);
- elem = (*f) (elem);
- svfloat64_t y2 = svdup_n_f64 (elem);
- y = svsel_f64 (p, y2, y);
- p = svpnext_b64 (cmp, p);
+ if (pg_bits[i] & 1)
+ tmp[i] = f (tmp[i]);
}
- return y;
+ return svld1 (svptrue_b64 (), tmp);
}
static inline svfloat64_t
sv_call2_f64 (double (*f) (double, double), svfloat64_t x1, svfloat64_t x2,
svfloat64_t y, svbool_t cmp)
{
- svbool_t p = svpfirst (cmp, svpfalse ());
- while (svptest_any (cmp, p))
+ double tmp1[SVE_NUM_DBLS], tmp2[SVE_NUM_DBLS];
+ uint8_t pg_bits[SVE_NUM_PG_BYTES];
+ svstr_p (pg_bits, cmp);
+ svst1 (svptrue_b64 (), tmp1, svsel (cmp, x1, y));
+ svst1 (cmp, tmp2, x2);
+
+ for (int i = 0; i < svcntd (); i++)
{
- double elem1 = svclastb_n_f64 (p, 0, x1);
- double elem2 = svclastb_n_f64 (p, 0, x2);
- double ret = (*f) (elem1, elem2);
- svfloat64_t y2 = svdup_n_f64 (ret);
- y = svsel_f64 (p, y2, y);
- p = svpnext_b64 (cmp, p);
+ if (pg_bits[i] & 1)
+ tmp1[i] = f (tmp1[i], tmp2[i]);
}
- return y;
+ return svld1 (svptrue_b64 (), tmp1);
}
static inline svuint64_t
@@ -109,33 +129,40 @@ sv_f32 (float x)
static inline svfloat32_t
sv_call_f32 (float (*f) (float), svfloat32_t x, svfloat32_t y, svbool_t cmp)
{
- svbool_t p = svpfirst (cmp, svpfalse ());
- while (svptest_any (cmp, p))
+ float tmp[SVE_NUM_FLTS];
+ uint8_t pg_bits[SVE_NUM_PG_BYTES];
+ svstr_p (pg_bits, cmp);
+ svst1 (svptrue_b32 (), tmp, svsel (cmp, x, y));
+
+ for (int i = 0; i < svcntd (); i++)
{
- float elem = svclastb_n_f32 (p, 0, x);
- elem = f (elem);
- svfloat32_t y2 = svdup_n_f32 (elem);
- y = svsel_f32 (p, y2, y);
- p = svpnext_b32 (cmp, p);
+ uint8_t p = pg_bits[i];
+ if (p & 1)
+ tmp[i * 2] = f (tmp[i * 2]);
+ if (p & (1 << 4))
+ tmp[i * 2 + 1] = f (tmp[i * 2 + 1]);
}
- return y;
+ return svld1 (svptrue_b32 (), tmp);
}
static inline svfloat32_t
sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2,
svfloat32_t y, svbool_t cmp)
{
- svbool_t p = svpfirst (cmp, svpfalse ());
- while (svptest_any (cmp, p))
+ float tmp1[SVE_NUM_FLTS], tmp2[SVE_NUM_FLTS];
+ uint8_t pg_bits[SVE_NUM_PG_BYTES];
+ svstr_p (pg_bits, cmp);
+ svst1 (svptrue_b32 (), tmp1, svsel (cmp, x1, y));
+ svst1 (cmp, tmp2, x2);
+
+ for (int i = 0; i < svcntd (); i++)
{
- float elem1 = svclastb_n_f32 (p, 0, x1);
- float elem2 = svclastb_n_f32 (p, 0, x2);
- float ret = f (elem1, elem2);
- svfloat32_t y2 = svdup_n_f32 (ret);
- y = svsel_f32 (p, y2, y);
- p = svpnext_b32 (cmp, p);
+ uint8_t p = pg_bits[i];
+ if (p & 1)
+ tmp1[i * 2] = f (tmp1[i * 2], tmp2[i * 2]);
+ if (p & (1 << 4))
+ tmp1[i * 2 + 1] = f (tmp1[i * 2 + 1], tmp2[i * 2 + 1]);
}
- return y;
+ return svld1 (svptrue_b32 (), tmp1);
}
-
#endif

View File

@ -0,0 +1,291 @@
commit 0daa4e46b870f13a0837c9d3bbe4ab3029a5c26f
Author: Sachin Monga <smonga@linux.ibm.com>
Date: Fri Nov 21 01:55:45 2025 -0500
ppc64le: Restore optimized strcmp for power10
This patch addresses the actual cause of CVE-2025-5702
The vector non-volatile registers are not used anymore for
32 byte load and comparison operation
Additionally, the assembler workaround used earlier for the
instruction lxvp is replaced with actual instruction.
Signed-off-by: Sachin Monga <smonga@linux.ibm.com>
Co-authored-by: Paul Murphy <paumurph@redhat.com>
(cherry picked from commit 9a40b1cda519cc4f532acb6d020390829df3d81b)
diff --git a/sysdeps/powerpc/powerpc64/le/power10/strcmp.S b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S
new file mode 100644
index 0000000000000000..0d4a53317c889045
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power10/strcmp.S
@@ -0,0 +1,185 @@
+/* Optimized strcmp implementation for PowerPC64/POWER10.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+#include <sysdep.h>
+
+#ifndef STRCMP
+# define STRCMP strcmp
+#endif
+
+/* Implements the function
+ int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]). */
+
+
+#define COMPARE_16(vreg1,vreg2,offset) \
+ lxv vreg1+32,offset(r3); \
+ lxv vreg2+32,offset(r4); \
+ vcmpnezb. v7,vreg1,vreg2; \
+ bne cr6,L(different); \
+
+#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \
+ lxvp vreg1+32,offset(r3); \
+ lxvp vreg2+32,offset(r4); \
+ vcmpnezb. v7,vreg1+1,vreg2+1; \
+ bne cr6,L(label1); \
+ vcmpnezb. v7,vreg1,vreg2; \
+ bne cr6,L(label2); \
+
+#define TAIL(vreg1,vreg2) \
+ vctzlsbb r6,v7; \
+ vextubrx r5,r6,vreg1; \
+ vextubrx r4,r6,vreg2; \
+ subf r3,r4,r5; \
+ blr; \
+
+#define CHECK_N_BYTES(reg1,reg2,len_reg) \
+ sldi r0,len_reg,56; \
+ lxvl 32+v4,reg1,r0; \
+ lxvl 32+v5,reg2,r0; \
+ add reg1,reg1,len_reg; \
+ add reg2,reg2,len_reg; \
+ vcmpnezb. v7,v4,v5; \
+ vctzlsbb r6,v7; \
+ cmpld cr7,r6,len_reg; \
+ blt cr7,L(different); \
+
+
+ .machine power10
+ENTRY_TOCLESS (STRCMP, 4)
+ li r11,16
+ /* eq bit of cr1 used as swap status flag to indicate if
+ source pointers were swapped. */
+ crclr 4*cr1+eq
+ andi. r7,r3,15
+ sub r7,r11,r7 /* r7(nalign1) = 16 - (str1 & 15). */
+ andi. r9,r4,15
+ sub r5,r11,r9 /* r5(nalign2) = 16 - (str2 & 15). */
+ cmpld cr7,r7,r5
+ beq cr7,L(same_aligned)
+ blt cr7,L(nalign1_min)
+ /* Swap r3 and r4, and r7 and r5 such that r3 and r7 hold the
+ pointer which is closer to the next 16B boundary so that only
+ one CHECK_N_BYTES is needed before entering the loop below. */
+ mr r8,r4
+ mr r4,r3
+ mr r3,r8
+ mr r12,r7
+ mr r7,r5
+ mr r5,r12
+ crset 4*cr1+eq /* Set bit on swapping source pointers. */
+
+ .p2align 5
+L(nalign1_min):
+ CHECK_N_BYTES(r3,r4,r7)
+
+ .p2align 5
+L(s1_aligned):
+ /* r9 and r5 is number of bytes to be read after and before
+ page boundary correspondingly. */
+ sub r5,r5,r7
+ subfic r9,r5,16
+ /* Now let r7 hold the count of quadwords which can be
+ checked without crossing a page boundary. quadword offset is
+ (str2>>4)&0xFF. */
+ rlwinm r7,r4,28,0xFF
+ /* Below check is required only for first iteration. For second
+ iteration and beyond, the new loop counter is always 255. */
+ cmpldi r7,255
+ beq L(L3)
+ /* Get the initial loop count by 255-((str2>>4)&0xFF). */
+ subfic r11,r7,255
+
+ .p2align 5
+L(L1):
+ mtctr r11
+
+ .p2align 5
+L(L2):
+ COMPARE_16(v4,v5,0) /* Load 16B blocks using lxv. */
+ addi r3,r3,16
+ addi r4,r4,16
+ bdnz L(L2)
+ /* Cross the page boundary of s2, carefully. */
+
+ .p2align 5
+L(L3):
+ CHECK_N_BYTES(r3,r4,r5)
+ CHECK_N_BYTES(r3,r4,r9)
+ li r11,255 /* Load the new loop counter. */
+ b L(L1)
+
+ .p2align 5
+L(same_aligned):
+ CHECK_N_BYTES(r3,r4,r7)
+ /* Align s1 to 32B and adjust s2 address.
+ Use lxvp only if both s1 and s2 are 32B aligned. */
+ COMPARE_16(v4,v5,0)
+ COMPARE_16(v4,v5,16)
+ COMPARE_16(v4,v5,32)
+ COMPARE_16(v4,v5,48)
+ addi r3,r3,64
+ addi r4,r4,64
+ COMPARE_16(v4,v5,0)
+ COMPARE_16(v4,v5,16)
+
+ clrldi r6,r3,59
+ subfic r5,r6,32
+ add r3,r3,r5
+ add r4,r4,r5
+ andi. r5,r4,0x1F
+ beq cr0,L(32B_aligned_loop)
+
+ .p2align 5
+L(16B_aligned_loop):
+ COMPARE_16(v4,v5,0)
+ COMPARE_16(v4,v5,16)
+ COMPARE_16(v4,v5,32)
+ COMPARE_16(v4,v5,48)
+ addi r3,r3,64
+ addi r4,r4,64
+ b L(16B_aligned_loop)
+
+ /* Calculate and return the difference. */
+L(different):
+ vctzlsbb r6,v7
+ vextubrx r5,r6,v4
+ vextubrx r4,r6,v5
+ bt 4*cr1+eq,L(swapped)
+ subf r3,r4,r5
+ blr
+
+ /* If src pointers were swapped, then swap the
+ indices and calculate the return value. */
+L(swapped):
+ subf r3,r5,r4
+ blr
+
+ .p2align 5
+L(32B_aligned_loop):
+ COMPARE_32(v14,v16,0,tail1,tail2)
+ COMPARE_32(v14,v16,32,tail1,tail2)
+ COMPARE_32(v14,v16,64,tail1,tail2)
+ COMPARE_32(v14,v16,96,tail1,tail2)
+ addi r3,r3,128
+ addi r4,r4,128
+ b L(32B_aligned_loop)
+
+L(tail1): TAIL(v15,v17)
+L(tail2): TAIL(v14,v16)
+
+END (STRCMP)
+libc_hidden_builtin_def (strcmp)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 27d8495503a5a1fe..d7824a922b0de470 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -33,7 +33,8 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
ifneq (,$(filter %le,$(config-machine)))
sysdep_routines += memcmp-power10 memcpy-power10 memmove-power10 memset-power10 \
rawmemchr-power9 rawmemchr-power10 \
- strcmp-power9 strncmp-power9 strcpy-power9 stpcpy-power9 \
+ strcmp-power9 strcmp-power10 strncmp-power9 \
+ strcpy-power9 stpcpy-power9 \
strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10
endif
CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index ad6080f1991f4080..e2f733eb82fa6199 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -377,6 +377,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/powerpc/powerpc64/multiarch/strcmp.c. */
IFUNC_IMPL (i, name, strcmp,
#ifdef __LITTLE_ENDIAN__
+ IFUNC_IMPL_ADD (array, i, strcmp,
+ (hwcap2 & PPC_FEATURE2_ARCH_3_1)
+ && (hwcap & PPC_FEATURE_HAS_VSX),
+ __strcmp_power10)
IFUNC_IMPL_ADD (array, i, strcmp,
hwcap2 & PPC_FEATURE2_ARCH_3_00
&& hwcap & PPC_FEATURE_HAS_ALTIVEC,
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S
new file mode 100644
index 0000000000000000..a4ee7fb53ccf5e01
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp-power10.S
@@ -0,0 +1,26 @@
+/* Optimized strcmp implementation for POWER10/PPC64.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if defined __LITTLE_ENDIAN__ && IS_IN (libc)
+#define STRCMP __strcmp_power10
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/le/power10/strcmp.S>
+#endif /* __LITTLE_ENDIAN__ && IS_IN (libc) */
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c
index 06b9b4090ff23ee1..ff32496fabba2e47 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strcmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcmp.c
@@ -29,12 +29,16 @@ extern __typeof (strcmp) __strcmp_power7 attribute_hidden;
extern __typeof (strcmp) __strcmp_power8 attribute_hidden;
# ifdef __LITTLE_ENDIAN__
extern __typeof (strcmp) __strcmp_power9 attribute_hidden;
+extern __typeof (strcmp) __strcmp_power10 attribute_hidden;
# endif
# undef strcmp
libc_ifunc_redirected (__redirect_strcmp, strcmp,
# ifdef __LITTLE_ENDIAN__
+ (hwcap2 & PPC_FEATURE2_ARCH_3_1
+ && hwcap & PPC_FEATURE_HAS_VSX)
+ ? __strcmp_power10 :
(hwcap2 & PPC_FEATURE2_ARCH_3_00
&& hwcap & PPC_FEATURE_HAS_ALTIVEC)
? __strcmp_power9 :

View File

@ -0,0 +1,356 @@
commit f6becd8ae8091846e8cc098cfc4dcacf352d9300
Author: Sachin Monga <smonga@linux.ibm.com>
Date: Fri Nov 21 01:56:13 2025 -0500
ppc64le: Restore optimized strncmp for power10
This patch addresses the actual cause of CVE-2025-5745
The vector non-volatile registers are not used anymore for
32 byte load and comparison operation
Additionally, the assembler workaround used earlier for the
instruction lxvp is replaced with actual instruction.
Signed-off-by: Sachin Monga <smonga@linux.ibm.com>
Co-authored-by: Paul Murphy <paumurph@redhat.com>
(cherry picked from commit 2ea943f7d487d6a4166658b32af7c5365889fc34)
diff --git a/sysdeps/powerpc/powerpc64/le/power10/strncmp.S b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S
new file mode 100644
index 0000000000000000..6e09fcb7f222d77e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power10/strncmp.S
@@ -0,0 +1,252 @@
+/* Optimized strncmp implementation for PowerPC64/POWER10.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* Implements the function
+
+ int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n)
+
+ The implementation uses unaligned doubleword access to avoid specialized
+ code paths depending of data alignment for first 32 bytes and uses
+ vectorised loops after that. */
+
+#ifndef STRNCMP
+# define STRNCMP strncmp
+#endif
+
+#define COMPARE_16(vreg1,vreg2,offset) \
+ lxv vreg1+32,offset(r3); \
+ lxv vreg2+32,offset(r4); \
+ vcmpnezb. v7,vreg1,vreg2; \
+ bne cr6,L(different); \
+ cmpldi cr7,r5,16; \
+ ble cr7,L(ret0); \
+ addi r5,r5,-16;
+
+#define COMPARE_32(vreg1,vreg2,offset,label1,label2) \
+ lxvp vreg1+32,offset(r3); \
+ lxvp vreg2+32,offset(r4); \
+ vcmpnezb. v7,vreg1+1,vreg2+1; \
+ bne cr6,L(label1); \
+ vcmpnezb. v7,vreg1,vreg2; \
+ bne cr6,L(label2); \
+ cmpldi cr7,r5,32; \
+ ble cr7,L(ret0); \
+ addi r5,r5,-32;
+
+#define TAIL_FIRST_16B(vreg1,vreg2) \
+ vctzlsbb r6,v7; \
+ cmpld cr7,r5,r6; \
+ ble cr7,L(ret0); \
+ vextubrx r5,r6,vreg1; \
+ vextubrx r4,r6,vreg2; \
+ subf r3,r4,r5; \
+ blr;
+
+#define TAIL_SECOND_16B(vreg1,vreg2) \
+ vctzlsbb r6,v7; \
+ addi r0,r6,16; \
+ cmpld cr7,r5,r0; \
+ ble cr7,L(ret0); \
+ vextubrx r5,r6,vreg1; \
+ vextubrx r4,r6,vreg2; \
+ subf r3,r4,r5; \
+ blr;
+
+#define CHECK_N_BYTES(reg1,reg2,len_reg) \
+ sldi r6,len_reg,56; \
+ lxvl 32+v4,reg1,r6; \
+ lxvl 32+v5,reg2,r6; \
+ add reg1,reg1,len_reg; \
+ add reg2,reg2,len_reg; \
+ vcmpnezb v7,v4,v5; \
+ vctzlsbb r6,v7; \
+ cmpld cr7,r6,len_reg; \
+ blt cr7,L(different); \
+ cmpld cr7,r5,len_reg; \
+ ble cr7,L(ret0); \
+ sub r5,r5,len_reg; \
+
+ .machine power10
+ENTRY_TOCLESS (STRNCMP, 4)
+ /* Check if size is 0. */
+ cmpdi cr0,r5,0
+ beq cr0,L(ret0)
+ andi. r7,r3,4095
+ andi. r8,r4,4095
+ cmpldi cr0,r7,4096-16
+ cmpldi cr1,r8,4096-16
+ bgt cr0,L(crosses)
+ bgt cr1,L(crosses)
+ COMPARE_16(v4,v5,0)
+ addi r3,r3,16
+ addi r4,r4,16
+
+L(crosses):
+ andi. r7,r3,15
+ subfic r7,r7,16 /* r7(nalign1) = 16 - (str1 & 15). */
+ andi. r9,r4,15
+ subfic r8,r9,16 /* r8(nalign2) = 16 - (str2 & 15). */
+ cmpld cr7,r7,r8
+ beq cr7,L(same_aligned)
+ blt cr7,L(nalign1_min)
+
+ /* nalign2 is minimum and s2 pointer is aligned. */
+ CHECK_N_BYTES(r3,r4,r8)
+ /* Are we on the 64B hunk which crosses a page? */
+ andi. r10,r3,63 /* Determine offset into 64B hunk. */
+ andi. r8,r3,15 /* The offset into the 16B hunk. */
+ neg r7,r3
+ andi. r9,r7,15 /* Number of bytes after a 16B cross. */
+ rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */
+ beq L(compare_64_pagecross)
+ mtctr r7
+ b L(compare_64B_unaligned)
+
+ /* nalign1 is minimum and s1 pointer is aligned. */
+L(nalign1_min):
+ CHECK_N_BYTES(r3,r4,r7)
+ /* Are we on the 64B hunk which crosses a page? */
+ andi. r10,r4,63 /* Determine offset into 64B hunk. */
+ andi. r8,r4,15 /* The offset into the 16B hunk. */
+ neg r7,r4
+ andi. r9,r7,15 /* Number of bytes after a 16B cross. */
+ rlwinm. r7,r7,26,0x3F /* ((r4-4096))>>6&63. */
+ beq L(compare_64_pagecross)
+ mtctr r7
+
+ .p2align 5
+L(compare_64B_unaligned):
+ COMPARE_16(v4,v5,0)
+ COMPARE_16(v4,v5,16)
+ COMPARE_16(v4,v5,32)
+ COMPARE_16(v4,v5,48)
+ addi r3,r3,64
+ addi r4,r4,64
+ bdnz L(compare_64B_unaligned)
+
+ /* Cross the page boundary of s2, carefully. Only for first
+ iteration we have to get the count of 64B blocks to be checked.
+ From second iteration and beyond, loop counter is always 63. */
+L(compare_64_pagecross):
+ li r11, 63
+ mtctr r11
+ cmpldi r10,16
+ ble L(cross_4)
+ cmpldi r10,32
+ ble L(cross_3)
+ cmpldi r10,48
+ ble L(cross_2)
+L(cross_1):
+ CHECK_N_BYTES(r3,r4,r9)
+ CHECK_N_BYTES(r3,r4,r8)
+ COMPARE_16(v4,v5,0)
+ COMPARE_16(v4,v5,16)
+ COMPARE_16(v4,v5,32)
+ addi r3,r3,48
+ addi r4,r4,48
+ b L(compare_64B_unaligned)
+L(cross_2):
+ COMPARE_16(v4,v5,0)
+ addi r3,r3,16
+ addi r4,r4,16
+ CHECK_N_BYTES(r3,r4,r9)
+ CHECK_N_BYTES(r3,r4,r8)
+ COMPARE_16(v4,v5,0)
+ COMPARE_16(v4,v5,16)
+ addi r3,r3,32
+ addi r4,r4,32
+ b L(compare_64B_unaligned)
+L(cross_3):
+ COMPARE_16(v4,v5,0)
+ COMPARE_16(v4,v5,16)
+ addi r3,r3,32
+ addi r4,r4,32
+ CHECK_N_BYTES(r3,r4,r9)
+ CHECK_N_BYTES(r3,r4,r8)
+ COMPARE_16(v4,v5,0)
+ addi r3,r3,16
+ addi r4,r4,16
+ b L(compare_64B_unaligned)
+L(cross_4):
+ COMPARE_16(v4,v5,0)
+ COMPARE_16(v4,v5,16)
+ COMPARE_16(v4,v5,32)
+ addi r3,r3,48
+ addi r4,r4,48
+ CHECK_N_BYTES(r3,r4,r9)
+ CHECK_N_BYTES(r3,r4,r8)
+ b L(compare_64B_unaligned)
+
+L(same_aligned):
+ CHECK_N_BYTES(r3,r4,r7)
+ /* Align s1 to 32B and adjust s2 address.
+ Use lxvp only if both s1 and s2 are 32B aligned. */
+ COMPARE_16(v4,v5,0)
+ COMPARE_16(v4,v5,16)
+ COMPARE_16(v4,v5,32)
+ COMPARE_16(v4,v5,48)
+ addi r3,r3,64
+ addi r4,r4,64
+ COMPARE_16(v4,v5,0)
+ COMPARE_16(v4,v5,16)
+ addi r5,r5,32
+
+ clrldi r6,r3,59
+ subfic r7,r6,32
+ add r3,r3,r7
+ add r4,r4,r7
+ subf r5,r7,r5
+ andi. r7,r4,0x1F
+ beq cr0,L(32B_aligned_loop)
+
+ .p2align 5
+L(16B_aligned_loop):
+ COMPARE_16(v4,v5,0)
+ COMPARE_16(v4,v5,16)
+ COMPARE_16(v4,v5,32)
+ COMPARE_16(v4,v5,48)
+ addi r3,r3,64
+ addi r4,r4,64
+ b L(16B_aligned_loop)
+
+ /* Calculate and return the difference. */
+L(different):
+ TAIL_FIRST_16B(v4,v5)
+
+ .p2align 5
+L(32B_aligned_loop):
+ COMPARE_32(v14,v16,0,tail1,tail2)
+ COMPARE_32(v14,v16,32,tail1,tail2)
+ COMPARE_32(v14,v16,64,tail1,tail2)
+ COMPARE_32(v14,v16,96,tail1,tail2)
+ addi r3,r3,128
+ addi r4,r4,128
+ b L(32B_aligned_loop)
+
+L(tail1): TAIL_FIRST_16B(v15,v17)
+L(tail2): TAIL_SECOND_16B(v14,v16)
+
+ .p2align 5
+L(ret0):
+ li r3,0
+ blr
+
+END(STRNCMP)
+libc_hidden_builtin_def(strncmp)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index d7824a922b0de470..f6d93f255182bc9f 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -33,7 +33,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
ifneq (,$(filter %le,$(config-machine)))
sysdep_routines += memcmp-power10 memcpy-power10 memmove-power10 memset-power10 \
rawmemchr-power9 rawmemchr-power10 \
- strcmp-power9 strcmp-power10 strncmp-power9 \
+ strcmp-power9 strcmp-power10 strncmp-power9 strncmp-power10 \
strcpy-power9 stpcpy-power9 \
strlen-power9 strncpy-power9 stpncpy-power9 strlen-power10
endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index e2f733eb82fa6199..99d24ed1ff204b57 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -164,6 +164,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/powerpc/powerpc64/multiarch/strncmp.c. */
IFUNC_IMPL (i, name, strncmp,
#ifdef __LITTLE_ENDIAN__
+ IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_1
+ && hwcap & PPC_FEATURE_HAS_VSX,
+ __strncmp_power10)
IFUNC_IMPL_ADD (array, i, strncmp, hwcap2 & PPC_FEATURE2_ARCH_3_00
&& hwcap & PPC_FEATURE_HAS_ALTIVEC,
__strncmp_power9)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S
new file mode 100644
index 0000000000000000..bb25bc75b8e7f43b
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power10.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if defined __LITTLE_ENDIAN__ && IS_IN (libc)
+#define STRNCMP __strncmp_power10
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/le/power10/strncmp.S>
+#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
index 6178f4a432bb33a4..a5ed67f766317aa3 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
@@ -29,6 +29,7 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden;
extern __typeof (strncmp) __strncmp_power8 attribute_hidden;
# ifdef __LITTLE_ENDIAN__
extern __typeof (strncmp) __strncmp_power9 attribute_hidden;
+extern __typeof (strncmp) __strncmp_power10 attribute_hidden;
# endif
# undef strncmp
@@ -36,6 +37,9 @@ extern __typeof (strncmp) __strncmp_power9 attribute_hidden;
ifunc symbol properly. */
libc_ifunc_redirected (__redirect_strncmp, strncmp,
# ifdef __LITTLE_ENDIAN__
+ (hwcap2 & PPC_FEATURE2_ARCH_3_1
+ && hwcap & PPC_FEATURE_HAS_VSX)
+ ? __strncmp_power10 :
(hwcap2 & PPC_FEATURE2_ARCH_3_00
&& hwcap & PPC_FEATURE_HAS_ALTIVEC)
? __strncmp_power9 :

View File

@ -0,0 +1,25 @@
commit efff7cb65907125f0fb46840e8c44f0b5b6ef6f9
Author: Sachin Monga <smonga@linux.ibm.com>
Date: Thu Nov 27 11:10:01 2025 -0500
ppc64le: Power 10 rawmemchr clobbers v20 (bug #33091)
Replace non-volatile(v20) by volatile(v17)
since v20 is not restored
Reviewed-by: Peter Bergner <bergner@tenstorrent.com>
(cherry picked from commit b59799f14f97f697c3a36b4380bd4ce2fbe65f11)
diff --git a/sysdeps/powerpc/powerpc64/le/power10/strlen.S b/sysdeps/powerpc/powerpc64/le/power10/strlen.S
index 0bd794540f846236..574a24b586d32c56 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/strlen.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/strlen.S
@@ -31,7 +31,7 @@
# define FUNCNAME RAWMEMCHR
# endif
# define MCOUNT_NARGS 2
-# define VREG_ZERO v20
+# define VREG_ZERO v17
# define OFF_START_LOOP 256
# define RAWMEMCHR_SUBTRACT_VECTORS \
vsububm v4,v4,v18; \

View File

@ -0,0 +1,44 @@
commit ae5fb9355918811679fa7ee01f1f41cea280b615
Author: Sunil K Pandey <sunil.k.pandey@intel.com>
Date: Tue Dec 9 08:57:44 2025 -0800
nptl: Optimize trylock for high cache contention workloads (BZ #33704)
Check lock availability before acquisition to reduce cache line
bouncing. Significantly improves trylock throughput on multi-core
systems under heavy contention.
Tested on x86_64.
Fixes BZ #33704.
Co-authored-by: Alex M Wells <alex.m.wells@intel.com>
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
(cherry picked from commit 63716823dbad9482e09972907ae98e9cb00f9b86)
diff --git a/nptl/pthread_mutex_trylock.c b/nptl/pthread_mutex_trylock.c
index 720c103f3f1f92f7..6cf47403dd6a40fe 100644
--- a/nptl/pthread_mutex_trylock.c
+++ b/nptl/pthread_mutex_trylock.c
@@ -48,7 +48,8 @@ ___pthread_mutex_trylock (pthread_mutex_t *mutex)
return 0;
}
- if (lll_trylock (mutex->__data.__lock) == 0)
+ if (atomic_load_relaxed (&(mutex->__data.__lock)) == 0
+ && lll_trylock (mutex->__data.__lock) == 0)
{
/* Record the ownership. */
mutex->__data.__owner = id;
@@ -71,7 +72,10 @@ ___pthread_mutex_trylock (pthread_mutex_t *mutex)
/*FALL THROUGH*/
case PTHREAD_MUTEX_ADAPTIVE_NP:
case PTHREAD_MUTEX_ERRORCHECK_NP:
- if (lll_trylock (mutex->__data.__lock) != 0)
+ /* Mutex type is already loaded, lock check overhead should
+ be minimal. */
+ if (atomic_load_relaxed (&(mutex->__data.__lock)) != 0
+ || lll_trylock (mutex->__data.__lock) != 0)
break;
/* Record the ownership. */

View File

@ -0,0 +1,127 @@
commit c07002038f0f4392e4546a30543272e9a4bbad06
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Tue Dec 17 15:18:36 2024 +0800
getaddrinfo.c: Avoid uninitialized pointer access [BZ #32465]
Add valid_decimal_value to check valid decimal value in a string to
avoid uninitialized endp in add_prefixlist and gaiconf_init as reported
by Clang 19:
./getaddrinfo.c:1884:11: error: variable 'endp' is used uninitialized whenever '||' condition is true [-Werror,-Wsometimes-uninitialized]
1884 | && (cp == NULL
| ^~~~~~~~~~
./getaddrinfo.c:1887:11: note: uninitialized use occurs here
1887 | && *endp == '\0'
| ^~~~
./getaddrinfo.c:1884:11: note: remove the '||' if its condition is always false
1884 | && (cp == NULL
| ^~~~~~~~~~
1885 | || (bits = strtoul (cp, &endp, 10)) != ULONG_MAX
| ~~
./getaddrinfo.c:1875:13: note: initialize the variable 'endp' to silence this warning
1875 | char *endp;
| ^
| = NULL
This fixes BZ #32465.
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Reviewed-by: Sam James <sam@gentoo.org>
(cherry picked from commit 33aeb88c5bc9a0c6b1bd7190a0ead7570972b719)
diff --git a/nss/getaddrinfo.c b/nss/getaddrinfo.c
index 78f50954df1cca36..2add236973538da6 100644
--- a/nss/getaddrinfo.c
+++ b/nss/getaddrinfo.c
@@ -1865,6 +1865,22 @@ scopecmp (const void *p1, const void *p2)
return 1;
}
+/* Return true if PTR points to a valid decimal value string and
+ store the value in *VALUE_P. Otherwise, return false. */
+
+static bool
+valid_decimal_value (const char *str, unsigned long int *value_p)
+{
+ char *endp;
+ unsigned long int value = strtoul (str, &endp, 10);
+ if (str == endp
+ || *endp != '\0'
+ || (value == ULONG_MAX && errno == ERANGE))
+ return false;
+ *value_p = value;
+ return true;
+}
+
static bool
add_prefixlist (struct prefixlist **listp, size_t *lenp, bool *nullbitsp,
char *val1, char *val2, char **pos)
@@ -1872,7 +1888,6 @@ add_prefixlist (struct prefixlist **listp, size_t *lenp, bool *nullbitsp,
struct in6_addr prefix;
unsigned long int bits;
unsigned long int val;
- char *endp;
bits = 128;
__set_errno (0);
@@ -1881,14 +1896,9 @@ add_prefixlist (struct prefixlist **listp, size_t *lenp, bool *nullbitsp,
*cp++ = '\0';
*pos = cp;
if (inet_pton (AF_INET6, val1, &prefix)
- && (cp == NULL
- || (bits = strtoul (cp, &endp, 10)) != ULONG_MAX
- || errno != ERANGE)
- && *endp == '\0'
+ && (cp == NULL || valid_decimal_value (cp, &bits))
&& bits <= 128
- && ((val = strtoul (val2, &endp, 10)) != ULONG_MAX
- || errno != ERANGE)
- && *endp == '\0'
+ && valid_decimal_value (val2, &val)
&& val <= INT_MAX)
{
struct prefixlist *newp = malloc (sizeof (*newp));
@@ -2031,7 +2041,6 @@ gaiconf_init (void)
struct in6_addr prefix;
unsigned long int bits;
unsigned long int val;
- char *endp;
bits = 32;
__set_errno (0);
@@ -2042,15 +2051,10 @@ gaiconf_init (void)
{
bits = 128;
if (IN6_IS_ADDR_V4MAPPED (&prefix)
- && (cp == NULL
- || (bits = strtoul (cp, &endp, 10)) != ULONG_MAX
- || errno != ERANGE)
- && *endp == '\0'
+ && (cp == NULL || valid_decimal_value (cp, &bits))
&& bits >= 96
&& bits <= 128
- && ((val = strtoul (val2, &endp, 10)) != ULONG_MAX
- || errno != ERANGE)
- && *endp == '\0'
+ && valid_decimal_value (val2, &val)
&& val <= INT_MAX)
{
if (!add_scopelist (&scopelist, &nscopelist,
@@ -2064,14 +2068,9 @@ gaiconf_init (void)
}
}
else if (inet_pton (AF_INET, val1, &prefix.s6_addr32[3])
- && (cp == NULL
- || (bits = strtoul (cp, &endp, 10)) != ULONG_MAX
- || errno != ERANGE)
- && *endp == '\0'
+ && (cp == NULL || valid_decimal_value (cp, &bits))
&& bits <= 32
- && ((val = strtoul (val2, &endp, 10)) != ULONG_MAX
- || errno != ERANGE)
- && *endp == '\0'
+ && valid_decimal_value (val2, &val)
&& val <= INT_MAX)
{
if (!add_scopelist (&scopelist, &nscopelist,

View File

@ -0,0 +1,114 @@
commit beb8267909f0cc95e3e8a542e67c1143e38da18f
Author: DJ Delorie <dj@redhat.com>
Date: Wed Oct 15 21:37:56 2025 -0400
sprof: check pread size and offset for overflow
Add a bit of descriptive paranoia to the values we read from
the ELF headers and use to access data.
Reviewed-by: Collin Funk <collin.funk1@gmail.com>
(cherry picked from commit 324084649b2da2f6840e3a1b84159a4e9a9e9a74)
diff --git a/elf/sprof.c b/elf/sprof.c
index b19aca329241a41a..0c687eab491101a2 100644
--- a/elf/sprof.c
+++ b/elf/sprof.c
@@ -38,6 +38,7 @@
#include <sys/mman.h>
#include <sys/param.h>
#include <sys/stat.h>
+#include <intprops.h>
/* Get libc version number. */
#include "../version.h"
@@ -410,6 +411,7 @@ load_shobj (const char *name)
int fd;
ElfW(Shdr) *shdr;
size_t pagesize = getpagesize ();
+ struct stat st;
/* Since we use dlopen() we must be prepared to work around the sometimes
strange lookup rules for the shared objects. If we have a file foo.so
@@ -553,14 +555,39 @@ load_shobj (const char *name)
error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed"),
map->l_name);
+ if (fstat (fd, &st) < 0)
+ error (EXIT_FAILURE, errno, _("stat(%s) failure"), map->l_name);
+
+ /* We're depending on data that's being read from the file, so be a
+ bit paranoid here and make sure the requests are reasonable -
+ i.e. both size and offset are nonnegative and smaller than the
+ file size, as well as the offset of the end of the data. PREAD
+ would have failed anyway, but this is more robust and explains
+ what happened better. Note that SZ must be unsigned and OFF may
+ be signed or unsigned. */
+#define PCHECK(sz1,off1) { \
+ size_t sz = sz1, end_off; \
+ off_t off = off1; \
+ if (sz > st.st_size \
+ || off < 0 || off > st.st_size \
+ || INT_ADD_WRAPV (sz, off, &end_off) \
+ || end_off > st.st_size) \
+ error (EXIT_FAILURE, ERANGE, \
+ _("read outside of file extents %zu + %zd > %zu"), \
+ sz, off, st.st_size); \
+ }
+
/* Map the section header. */
size_t size = ehdr->e_shnum * sizeof (ElfW(Shdr));
shdr = (ElfW(Shdr) *) alloca (size);
+ PCHECK (size, ehdr->e_shoff);
if (pread (fd, shdr, size, ehdr->e_shoff) != size)
error (EXIT_FAILURE, errno, _("reading of section headers failed"));
/* Get the section header string table. */
char *shstrtab = (char *) alloca (shdr[ehdr->e_shstrndx].sh_size);
+ PCHECK (shdr[ehdr->e_shstrndx].sh_size,
+ shdr[ehdr->e_shstrndx].sh_offset);
if (pread (fd, shstrtab, shdr[ehdr->e_shstrndx].sh_size,
shdr[ehdr->e_shstrndx].sh_offset)
!= shdr[ehdr->e_shstrndx].sh_size)
@@ -588,6 +615,7 @@ load_shobj (const char *name)
size_t size = debuglink_entry->sh_size;
char *debuginfo_fname = (char *) alloca (size + 1);
debuginfo_fname[size] = '\0';
+ PCHECK (size, debuglink_entry->sh_offset);
if (pread (fd, debuginfo_fname, size, debuglink_entry->sh_offset)
!= size)
{
@@ -641,21 +669,32 @@ load_shobj (const char *name)
if (fd2 != -1)
{
ElfW(Ehdr) ehdr2;
+ struct stat st;
+
+ if (fstat (fd2, &st) < 0)
+ error (EXIT_FAILURE, errno, _("stat(%s) failure"), workbuf);
/* Read the ELF header. */
+ PCHECK (sizeof (ehdr2), 0);
if (pread (fd2, &ehdr2, sizeof (ehdr2), 0) != sizeof (ehdr2))
error (EXIT_FAILURE, errno,
_("reading of ELF header failed"));
/* Map the section header. */
- size_t size = ehdr2.e_shnum * sizeof (ElfW(Shdr));
+ size_t size;
+ if (INT_MULTIPLY_WRAPV (ehdr2.e_shnum, sizeof (ElfW(Shdr)), &size))
+ error (EXIT_FAILURE, errno, _("too many section headers"));
+
ElfW(Shdr) *shdr2 = (ElfW(Shdr) *) alloca (size);
+ PCHECK (size, ehdr2.e_shoff);
if (pread (fd2, shdr2, size, ehdr2.e_shoff) != size)
error (EXIT_FAILURE, errno,
_("reading of section headers failed"));
/* Get the section header string table. */
shstrtab = (char *) alloca (shdr2[ehdr2.e_shstrndx].sh_size);
+ PCHECK (shdr2[ehdr2.e_shstrndx].sh_size,
+ shdr2[ehdr2.e_shstrndx].sh_offset);
if (pread (fd2, shstrtab, shdr2[ehdr2.e_shstrndx].sh_size,
shdr2[ehdr2.e_shstrndx].sh_offset)
!= shdr2[ehdr2.e_shstrndx].sh_size)

View File

@ -0,0 +1,24 @@
commit 4a53354eaf26a6b5c4e1c67f92ea41eae949ac09
Author: Collin Funk <collin.funk1@gmail.com>
Date: Wed Oct 22 01:51:09 2025 -0700
sprof: fix -Wformat warnings on 32-bit hosts
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit 9681f645ba20fc3c18eb12ffebf94e3df1f888e3)
diff --git a/elf/sprof.c b/elf/sprof.c
index 0c687eab491101a2..1f5ab25ac39bfff4 100644
--- a/elf/sprof.c
+++ b/elf/sprof.c
@@ -573,8 +573,8 @@ load_shobj (const char *name)
|| INT_ADD_WRAPV (sz, off, &end_off) \
|| end_off > st.st_size) \
error (EXIT_FAILURE, ERANGE, \
- _("read outside of file extents %zu + %zd > %zu"), \
- sz, off, st.st_size); \
+ _("read outside of file extents %zu + %jd > %jd"), \
+ sz, (intmax_t) off, (intmax_t) st.st_size); \
}
/* Map the section header. */

View File

@ -0,0 +1,22 @@
commit f47dd22366d5f66a4ca635f7be700e65c2ab2a81
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Nov 6 14:33:22 2025 +0100
support: Fix FILE * leak in check_for_unshare_hints in test-container
The file opened via fopen is never closed.
(cherry picked from commit 20a2a756089eacd7e7f4c02e381e82b5d0e40a2c)
diff --git a/support/test-container.c b/support/test-container.c
index 2813b0ddf572fc48..a9c9926c21f3a21e 100644
--- a/support/test-container.c
+++ b/support/test-container.c
@@ -703,6 +703,7 @@ check_for_unshare_hints (int require_pidns)
val = -1; /* Sentinel. */
int cnt = fscanf (f, "%d", &val);
+ fclose (f);
if (cnt == 1 && val != files[i].bad_value)
continue;

View File

@ -0,0 +1,27 @@
commit 10c0bcb3d3935f9b79a828502513c2084c90772c
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Nov 6 14:49:21 2025 +0100
support: Exit on consistency check failure in resolv_response_add_name
Using TEST_VERIFY (crname_target != crname) instructs some analysis
tools that crname_target == crname might hold. Under this assumption,
they report a use-after-free for crname_target->offset below, caused
by the previous free (crname).
Reviewed-by: Collin Funk <collin.funk1@gmail.com>
(cherry picked from commit b64335ff111c071fde61aec1c1a8460afb3d16d4)
diff --git a/support/resolv_test.c b/support/resolv_test.c
index f1613bd255c086e1..d4cc26b4aa24ce3a 100644
--- a/support/resolv_test.c
+++ b/support/resolv_test.c
@@ -326,7 +326,7 @@ resolv_response_add_name (struct resolv_response_builder *b,
crname_target = *ptr;
else
crname_target = NULL;
- TEST_VERIFY (crname_target != crname);
+ TEST_VERIFY_EXIT (crname_target != crname);
/* Not added to the tree. */
free (crname);
}

View File

@ -0,0 +1,82 @@
commit fb22fd3f5b415dd4cd6f7b5741c2f0412374e242
Author: Siddhesh Poyarekar <siddhesh@gotplt.org>
Date: Thu Jan 15 06:06:40 2026 -0500
memalign: reinstate alignment overflow check (CVE-2026-0861)
The change to cap valid sizes to PTRDIFF_MAX inadvertently dropped the
overflow check for alignment in memalign functions, _mid_memalign and
_int_memalign. Reinstate the overflow check in _int_memalign, aligned
with the PTRDIFF_MAX change since that is directly responsible for the
CVE. The missing _mid_memalign check is not relevant (and does not have
a security impact) and may need a different approach to fully resolve,
so it has been omitted.
CVE-Id: CVE-2026-0861
Vulnerable-Commit: 9bf8e29ca136094f73f69f725f15c51facc97206
Reported-by: Igor Morgenstern, Aisle Research
Fixes: BZ #33796
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
Signed-off-by: Siddhesh Poyarekar <siddhesh@gotplt.org>
(cherry picked from commit c9188d333717d3ceb7e3020011651f424f749f93)
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 09dc60bf74c59945..e1aacde4ac94f53b 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -5049,7 +5049,7 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
INTERNAL_SIZE_T size;
nb = checked_request2size (bytes);
- if (nb == 0)
+ if (nb == 0 || alignment > PTRDIFF_MAX)
{
__set_errno (ENOMEM);
return NULL;
@@ -5065,7 +5065,10 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
we don't find anything in those bins, the common malloc code will
scan starting at 2x. */
- /* Call malloc with worst case padding to hit alignment. */
+ /* Call malloc with worst case padding to hit alignment. ALIGNMENT is a
+ power of 2, so it tops out at (PTRDIFF_MAX >> 1) + 1, leaving plenty of
+ space to add MINSIZE and whatever checked_request2size adds to BYTES to
+ get NB. Consequently, total below also does not overflow. */
m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
if (m == NULL)
diff --git a/malloc/tst-malloc-too-large.c b/malloc/tst-malloc-too-large.c
index 2b91377e54cdc485..15b25cf01d482951 100644
--- a/malloc/tst-malloc-too-large.c
+++ b/malloc/tst-malloc-too-large.c
@@ -152,7 +152,6 @@ test_large_allocations (size_t size)
}
-static long pagesize;
/* This function tests the following aligned memory allocation functions
using several valid alignments and precedes each allocation test with a
@@ -171,8 +170,8 @@ test_large_aligned_allocations (size_t size)
/* All aligned memory allocation functions expect an alignment that is a
power of 2. Given this, we test each of them with every valid
- alignment from 1 thru PAGESIZE. */
- for (align = 1; align <= pagesize; align *= 2)
+ alignment for the type of ALIGN, i.e. until it wraps to 0. */
+ for (align = 1; align > 0; align <<= 1)
{
test_setup ();
#if __GNUC_PREREQ (7, 0)
@@ -265,11 +264,6 @@ do_test (void)
DIAG_IGNORE_NEEDS_COMMENT (7, "-Walloc-size-larger-than=");
#endif
- /* Aligned memory allocation functions need to be tested up to alignment
- size equivalent to page size, which should be a power of 2. */
- pagesize = sysconf (_SC_PAGESIZE);
- TEST_VERIFY_EXIT (powerof2 (pagesize));
-
/* Loop 1: Ensure that all allocations with SIZE close to SIZE_MAX, i.e.
in the range (SIZE_MAX - 2^14, SIZE_MAX], fail.

View File

@ -0,0 +1,71 @@
commit 831f63b94ceb92fb14c0d1a7ddad35a0d1404c71
Author: Carlos O'Donell <carlos@redhat.com>
Date: Thu Jan 15 15:09:38 2026 -0500
resolv: Fix NSS DNS backend for getnetbyaddr (CVE-2026-0915)
The default network value of zero for net was never tested for and
results in a DNS query constructed from uninitialized stack bytes.
The solution is to provide a default query for the case where net
is zero.
Adding a test case for this was straight forward given the existence of
tst-resolv-network and if the test is added without the fix you observe
this failure:
FAIL: resolv/tst-resolv-network
original exit status 1
error: tst-resolv-network.c:174: invalid QNAME: \146\218\129\128
error: 1 test failures
With a random QNAME resulting from the use of uninitialized stack bytes.
After the fix the test passes.
Additionally verified using wireshark before and after to ensure
on-the-wire bytes for the DNS query were as expected.
No regressions on x86_64.
Reviewed-by: Florian Weimer <fweimer@redhat.com>
(cherry picked from commit e56ff82d5034ec66c6a78f517af6faa427f65b0b)
diff --git a/resolv/nss_dns/dns-network.c b/resolv/nss_dns/dns-network.c
index b32fd0fcab4fcc2c..71d67aa6f89722e2 100644
--- a/resolv/nss_dns/dns-network.c
+++ b/resolv/nss_dns/dns-network.c
@@ -207,6 +207,10 @@ _nss_dns_getnetbyaddr_r (uint32_t net, int type, struct netent *result,
sprintf (qbuf, "%u.%u.%u.%u.in-addr.arpa", net_bytes[3], net_bytes[2],
net_bytes[1], net_bytes[0]);
break;
+ default:
+ /* Default network (net is originally zero). */
+ strcpy (qbuf, "0.0.0.0.in-addr.arpa");
+ break;
}
net_buffer.buf = orig_net_buffer = (querybuf *) alloca (1024);
diff --git a/resolv/tst-resolv-network.c b/resolv/tst-resolv-network.c
index f40e6f926c581f0b..8d4f8badf3781603 100644
--- a/resolv/tst-resolv-network.c
+++ b/resolv/tst-resolv-network.c
@@ -46,6 +46,9 @@ handle_code (const struct resolv_response_context *ctx,
{
switch (code)
{
+ case 0:
+ send_ptr (b, qname, qclass, qtype, "0.in-addr.arpa");
+ break;
case 1:
send_ptr (b, qname, qclass, qtype, "1.in-addr.arpa");
break;
@@ -265,6 +268,9 @@ do_test (void)
"error: TRY_AGAIN\n");
/* Lookup by address, success cases. */
+ check_reverse (0,
+ "name: 0.in-addr.arpa\n"
+ "net: 0x00000000\n");
check_reverse (1,
"name: 1.in-addr.arpa\n"
"net: 0x00000001\n");

View File

@ -0,0 +1,170 @@
commit ce65d944e38a20cb70af2a48a4b8aa5d8fabe1cc
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu Jan 15 10:32:19 2026 -0300
posix: Reset wordexp_t fields with WRDE_REUSE (CVE-2025-15281 / BZ 33814)
The wordexp fails to properly initialize the input wordexp_t when
WRDE_REUSE is used. The wordexp_t struct is properly freed, but
reuses the old wc_wordc value and updates the we_wordv in the
wrong position. A later wordfree will then call free with an
invalid pointer.
Checked on x86_64-linux-gnu and i686-linux-gnu.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
(cherry picked from commit 80cc58ea2de214f85b0a1d902a3b668ad2ecb302)
Conflicts:
posix/Makefile
(Fixup context)
diff --git a/posix/Makefile b/posix/Makefile
index 59cd1dab72acbd54..6dc240a5bf9f3046 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -327,6 +327,7 @@ tests := \
tst-wait4 \
tst-waitid \
tst-wordexp-nocmd \
+ tst-wordexp-reuse \
tstgetopt \
# tests
@@ -455,6 +456,8 @@ generated += \
tst-rxspencer-no-utf8.mtrace \
tst-vfork3-mem.out \
tst-vfork3.mtrace \
+ tst-wordexp-reuse-mem.out \
+ tst-wordexp-reuse.mtrace \
# generated
endif
endif
@@ -490,6 +493,7 @@ tests-special += \
$(objpfx)tst-pcre-mem.out \
$(objpfx)tst-rxspencer-no-utf8-mem.out \
$(objpfx)tst-vfork3-mem.out \
+ $(objpfx)tst-wordexp-reuse.out \
# tests-special
endif
endif
@@ -773,3 +777,10 @@ $(objpfx)posix-conf-vars-def.h: $(..)scripts/gen-posix-conf-vars.awk \
$(make-target-directory)
$(AWK) -f $(filter-out Makefile, $^) > $@.tmp
mv -f $@.tmp $@
+
+tst-wordexp-reuse-ENV += MALLOC_TRACE=$(objpfx)tst-wordexp-reuse.mtrace \
+ LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so
+
+$(objpfx)tst-wordexp-reuse-mem.out: $(objpfx)tst-wordexp-reuse.out
+ $(common-objpfx)malloc/mtrace $(objpfx)tst-wordexp-reuse.mtrace > $@; \
+ $(evaluate-test)
diff --git a/posix/tst-wordexp-reuse.c b/posix/tst-wordexp-reuse.c
new file mode 100644
index 0000000000000000..3926b9f5576750ac
--- /dev/null
+++ b/posix/tst-wordexp-reuse.c
@@ -0,0 +1,89 @@
+/* Test for wordexp with WRDE_REUSE flag.
+ Copyright (C) 2026 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <wordexp.h>
+#include <mcheck.h>
+
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+ mtrace ();
+
+ {
+ wordexp_t p = { 0 };
+ TEST_COMPARE (wordexp ("one", &p, 0), 0);
+ TEST_COMPARE (p.we_wordc, 1);
+ TEST_COMPARE_STRING (p.we_wordv[0], "one");
+ TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE), 0);
+ TEST_COMPARE (p.we_wordc, 1);
+ TEST_COMPARE_STRING (p.we_wordv[0], "two");
+ wordfree (&p);
+ }
+
+ {
+ wordexp_t p = { .we_offs = 2 };
+ TEST_COMPARE (wordexp ("one", &p, 0), 0);
+ TEST_COMPARE (p.we_wordc, 1);
+ TEST_COMPARE_STRING (p.we_wordv[0], "one");
+ TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE | WRDE_DOOFFS), 0);
+ TEST_COMPARE (p.we_wordc, 1);
+ TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "two");
+ wordfree (&p);
+ }
+
+ {
+ wordexp_t p = { 0 };
+ TEST_COMPARE (wordexp ("one", &p, 0), 0);
+ TEST_COMPARE (p.we_wordc, 1);
+ TEST_COMPARE_STRING (p.we_wordv[0], "one");
+ TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE | WRDE_APPEND), 0);
+ TEST_COMPARE (p.we_wordc, 1);
+ TEST_COMPARE_STRING (p.we_wordv[0], "two");
+ wordfree (&p);
+ }
+
+ {
+ wordexp_t p = { .we_offs = 2 };
+ TEST_COMPARE (wordexp ("one", &p, WRDE_DOOFFS), 0);
+ TEST_COMPARE (p.we_wordc, 1);
+ TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "one");
+ TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE
+ | WRDE_DOOFFS), 0);
+ TEST_COMPARE (p.we_wordc, 1);
+ TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "two");
+ wordfree (&p);
+ }
+
+ {
+ wordexp_t p = { .we_offs = 2 };
+ TEST_COMPARE (wordexp ("one", &p, WRDE_DOOFFS), 0);
+ TEST_COMPARE (p.we_wordc, 1);
+ TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "one");
+ TEST_COMPARE (wordexp ("two", &p, WRDE_REUSE
+ | WRDE_DOOFFS | WRDE_APPEND), 0);
+ TEST_COMPARE (p.we_wordc, 1);
+ TEST_COMPARE_STRING (p.we_wordv[p.we_offs + 0], "two");
+ wordfree (&p);
+ }
+
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/posix/wordexp.c b/posix/wordexp.c
index a7362ef31b052800..4cd23645198aed30 100644
--- a/posix/wordexp.c
+++ b/posix/wordexp.c
@@ -2216,7 +2216,9 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags)
{
/* Minimal implementation of WRDE_REUSE for now */
wordfree (pwordexp);
+ old_word.we_wordc = 0;
old_word.we_wordv = NULL;
+ pwordexp->we_wordc = 0;
}
if ((flags & WRDE_APPEND) == 0)