Sync with upstream branch release/2.39/master

Upstream commit: 5d070d12b3a52bc44dd1b71743abc4b6243862ae

- x86: Expand the comment on when REP STOSB is used on memset
- x86: Do not prefer ERMS for memset on Zen3+
- x86: Fix Zen3/Zen4 ERMS selection (BZ 30994)
- Add tst-gnu2-tls2mod1 to test-internal-extras
- elf: Enable TLS descriptor tests on aarch64
- arm: Update _dl_tlsdesc_dynamic to preserve caller-saved registers (BZ 31372)
- Ignore undefined symbols for -mtls-dialect=gnu2
- x86-64: Allocate state buffer space for RDI, RSI and RBX
- x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers
- x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registers
- x86-64: Save APX registers in ld.so trampoline
- LoongArch: Correct {__ieee754, _}_scalb -> {__ieee754, _}_scalbf
- powerpc: Placeholder and infrastructure/build support to add Power11 related changes.
- powerpc: Add HWCAP3/HWCAP4 data to TCB for Power Architecture.
This commit is contained in:
Arjun Shankar 2024-04-04 17:10:36 +02:00
parent 1470fe1da7
commit 24af28d49b
15 changed files with 3713 additions and 1 deletions

View File

@ -0,0 +1,16 @@
commit 983f34a1252de3ca6f2305c211d86530ea42010e
Author: caiyinyu <caiyinyu@loongson.cn>
Date: Mon Mar 11 16:07:48 2024 +0800
LoongArch: Correct {__ieee754, _}_scalb -> {__ieee754, _}_scalbf
diff --git a/sysdeps/loongarch/fpu/e_scalbf.c b/sysdeps/loongarch/fpu/e_scalbf.c
index 9f054852362e2d76..7c0395fbb5afbc58 100644
--- a/sysdeps/loongarch/fpu/e_scalbf.c
+++ b/sysdeps/loongarch/fpu/e_scalbf.c
@@ -57,4 +57,4 @@ __ieee754_scalbf (float x, float fn)
return x;
}
-libm_alias_finite (__ieee754_scalb, __scalb)
+libm_alias_finite (__ieee754_scalbf, __scalbf)

View File

@ -0,0 +1,80 @@
commit 7fc8242bf87828c935ac5df5cafb9dc7ab635fd9
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Fri Feb 16 07:17:10 2024 -0800
x86-64: Save APX registers in ld.so trampoline
Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
ld.so trampoline. This fixes BZ #31371.
Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
be used by i386 _dl_tlsdesc_dynamic.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
(cherry picked from commit dfb05f8e704edac70db38c4c8ee700769d91a413)
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index 85d0a8c943cbb218..837fd28734914a1c 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -21,14 +21,54 @@
#include <sysdeps/generic/sysdep.h>
+/* The extended state feature IDs in the state component bitmap. */
+#define X86_XSTATE_X87_ID 0
+#define X86_XSTATE_SSE_ID 1
+#define X86_XSTATE_AVX_ID 2
+#define X86_XSTATE_BNDREGS_ID 3
+#define X86_XSTATE_BNDCFG_ID 4
+#define X86_XSTATE_K_ID 5
+#define X86_XSTATE_ZMM_H_ID 6
+#define X86_XSTATE_ZMM_ID 7
+#define X86_XSTATE_PKRU_ID 9
+#define X86_XSTATE_TILECFG_ID 17
+#define X86_XSTATE_TILEDATA_ID 18
+#define X86_XSTATE_APX_F_ID 19
+
+#ifdef __x86_64__
/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
- aligned to 16 bytes for fxsave and 64 bytes for xsave. */
-#define STATE_SAVE_OFFSET (8 * 7 + 8)
-
-/* Save SSE, AVX, AVX512, mask and bound registers. */
-#define STATE_SAVE_MASK \
- ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
+ aligned to 16 bytes for fxsave and 64 bytes for xsave.
+
+ NB: Is is non-zero because of the 128-byte red-zone. Some registers
+ are saved on stack without adjusting stack pointer first. When we
+ update stack pointer to allocate more space, we need to take the
+ red-zone into account. */
+# define STATE_SAVE_OFFSET (8 * 7 + 8)
+
+/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
+ registers are mutually exclusive. */
+# define STATE_SAVE_MASK \
+ ((1 << X86_XSTATE_SSE_ID) \
+ | (1 << X86_XSTATE_AVX_ID) \
+ | (1 << X86_XSTATE_BNDREGS_ID) \
+ | (1 << X86_XSTATE_K_ID) \
+ | (1 << X86_XSTATE_ZMM_H_ID) \
+ | (1 << X86_XSTATE_ZMM_ID) \
+ | (1 << X86_XSTATE_APX_F_ID))
+#else
+/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
+ doesn't have red-zone, use 0 here. */
+# define STATE_SAVE_OFFSET 0
+
+/* Save SSE, AVX, AXV512, mask and bound registers. */
+# define STATE_SAVE_MASK \
+ ((1 << X86_XSTATE_SSE_ID) \
+ | (1 << X86_XSTATE_AVX_ID) \
+ | (1 << X86_XSTATE_BNDREGS_ID) \
+ | (1 << X86_XSTATE_K_ID) \
+ | (1 << X86_XSTATE_ZMM_H_ID))
+#endif
/* Constants for bits in __x86_string_control: */

1453
glibc-upstream-2.39-12.patch Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,496 @@
commit 853e915fdd6ae6c5f1a7a68d2594ec8dbfef1286
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Wed Feb 28 12:08:03 2024 -0800
x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers
_dl_tlsdesc_dynamic should also preserve AMX registers which are
caller-saved. Add X86_XSTATE_TILECFG_ID and X86_XSTATE_TILEDATA_ID
to x86-64 TLSDESC_CALL_STATE_SAVE_MASK. Compute the AMX state size
and save it in xsave_state_full_size which is only used by
_dl_tlsdesc_dynamic_xsave and _dl_tlsdesc_dynamic_xsavec. This fixes
the AMX part of BZ #31372. Tested on AMX processor.
AMX test is enabled only for compilers with the fix for
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114098
GCC 14 and GCC 11/12/13 branches have the bug fix.
Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
(cherry picked from commit 9b7091415af47082664717210ac49d51551456ab)
diff --git a/sysdeps/unix/sysv/linux/x86_64/Makefile b/sysdeps/unix/sysv/linux/x86_64/Makefile
index 4223feb95f6fe2f5..9a1e7aa6461725af 100644
--- a/sysdeps/unix/sysv/linux/x86_64/Makefile
+++ b/sysdeps/unix/sysv/linux/x86_64/Makefile
@@ -63,6 +63,33 @@ $(objpfx)libx86-64-isa-level%.os: $(..)/sysdeps/unix/sysv/linux/x86_64/x86-64-is
$(objpfx)libx86-64-isa-level.so: $(objpfx)libx86-64-isa-level-1.so
cp $< $@
endif
+
+ifeq (yes,$(have-mamx-tile))
+tests += \
+ tst-gnu2-tls2-amx \
+# tests
+
+modules-names += \
+ tst-gnu2-tls2-amx-mod0 \
+ tst-gnu2-tls2-amx-mod1 \
+ tst-gnu2-tls2-amx-mod2 \
+# modules-names
+
+$(objpfx)tst-gnu2-tls2-amx: $(shared-thread-library)
+$(objpfx)tst-gnu2-tls2-amx.out: \
+ $(objpfx)tst-gnu2-tls2-amx-mod0.so \
+ $(objpfx)tst-gnu2-tls2-amx-mod1.so \
+ $(objpfx)tst-gnu2-tls2-amx-mod2.so
+$(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport)
+$(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport)
+$(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport)
+
+CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile
+CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2
+endif
+
endif # $(subdir) == elf
ifneq ($(enable-cet),no)
diff --git a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
index 2f511321ad3b3ac1..ef4631bf4b2fd9aa 100644
--- a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
+++ b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
@@ -20,3 +20,8 @@
# define ARCH_SHSTK_SHSTK 0x1
# define ARCH_SHSTK_WRSS 0x2
#endif
+
+#ifndef ARCH_GET_XCOMP_PERM
+# define ARCH_GET_XCOMP_PERM 0x1022
+# define ARCH_REQ_XCOMP_PERM 0x1023
+#endif
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
new file mode 100644
index 0000000000000000..2e0c7b91b7caf3ab
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
@@ -0,0 +1,2 @@
+#include "tst-gnu2-tls2-amx.h"
+#include <tst-gnu2-tls2mod0.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
new file mode 100644
index 0000000000000000..b8a8ccf1c119d443
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
@@ -0,0 +1,2 @@
+#include "tst-gnu2-tls2-amx.h"
+#include <tst-gnu2-tls2mod1.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
new file mode 100644
index 0000000000000000..cdf4a8f3635b327c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
@@ -0,0 +1,2 @@
+#include "tst-gnu2-tls2-amx.h"
+#include <tst-gnu2-tls2mod2.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
new file mode 100644
index 0000000000000000..ae4dd82556c9b2ef
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
@@ -0,0 +1,83 @@
+/* Test TLSDESC relocation with AMX.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdbool.h>
+#include <asm/prctl.h>
+#include <support/check.h>
+#include "tst-gnu2-tls2-amx.h"
+
+extern int arch_prctl (int, ...);
+
+#define X86_XSTATE_TILECFG_ID 17
+#define X86_XSTATE_TILEDATA_ID 18
+
+/* Initialize tile config. */
+__attribute__ ((noinline, noclone))
+static void
+init_tile_config (__tilecfg *tileinfo)
+{
+ int i;
+ tileinfo->palette_id = 1;
+ tileinfo->start_row = 0;
+
+ tileinfo->colsb[0] = MAX_ROWS;
+ tileinfo->rows[0] = MAX_ROWS;
+
+ for (i = 1; i < 4; ++i)
+ {
+ tileinfo->colsb[i] = MAX_COLS;
+ tileinfo->rows[i] = MAX_ROWS;
+ }
+
+ _tile_loadconfig (tileinfo);
+}
+
+static bool
+enable_amx (void)
+{
+ uint64_t bitmask;
+ if (arch_prctl (ARCH_GET_XCOMP_PERM, &bitmask) != 0)
+ return false;
+
+ if ((bitmask & (1 << X86_XSTATE_TILECFG_ID)) == 0)
+ return false;
+
+ if (arch_prctl (ARCH_REQ_XCOMP_PERM, X86_XSTATE_TILEDATA_ID) != 0)
+ return false;
+
+ /* Load tile configuration. */
+ __tilecfg tile_data = { 0 };
+ init_tile_config (&tile_data);
+
+ return true;
+}
+
+/* An architecture can define it to clobber caller-saved registers in
+ malloc below to verify that the implicit TLSDESC call won't change
+ caller-saved registers. */
+static void
+clear_tile_register (void)
+{
+ _tile_zero (2);
+}
+
+#define MOD(i) "tst-gnu2-tls2-amx-mod" #i ".so"
+#define IS_SUPPORTED() enable_amx ()
+#define PREPARE_MALLOC() clear_tile_register ()
+
+#include <elf/tst-gnu2-tls2.c>
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
new file mode 100644
index 0000000000000000..1845a3caba43a0f1
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
@@ -0,0 +1,63 @@
+/* Test TLSDESC relocation with AMX.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdint.h>
+#include <string.h>
+#include <x86intrin.h>
+#include <support/check.h>
+
+#define MAX_ROWS 16
+#define MAX_COLS 64
+#define MAX 1024
+#define STRIDE 64
+
+typedef struct __tile_config
+{
+ uint8_t palette_id;
+ uint8_t start_row;
+ uint8_t reserved_0[14];
+ uint16_t colsb[16];
+ uint8_t rows[16];
+} __tilecfg __attribute__ ((aligned (64)));
+
+/* Initialize int8_t buffer */
+static inline void
+init_buffer (int8_t *buf, int8_t value)
+{
+ int rows, colsb, i, j;
+ rows = MAX_ROWS;
+ colsb = MAX_COLS;
+
+ for (i = 0; i < rows; i++)
+ for (j = 0; j < colsb; j++)
+ buf[i * colsb + j] = value;
+}
+
+#define BEFORE_TLSDESC_CALL() \
+ int8_t src[MAX]; \
+ int8_t res[MAX]; \
+ /* Initialize src with data */ \
+ init_buffer (src, 2); \
+ /* Load tile rows from memory. */ \
+ _tile_loadd (2, src, STRIDE);
+
+#define AFTER_TLSDESC_CALL() \
+ /* Store the tile data to memory. */ \
+ _tile_stored (2, res, STRIDE); \
+ _tile_release (); \
+ TEST_VERIFY_EXIT (memcmp (src, res, sizeof (res)) == 0);
diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
index 6a8fd298137b7f23..21fc88d6510840e6 100644
--- a/sysdeps/x86/cpu-features-offsets.sym
+++ b/sysdeps/x86/cpu-features-offsets.sym
@@ -3,3 +3,4 @@
#include <ldsodefs.h>
XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size)
+XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_full_size)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 835113b42f924b83..d71e8d3d2e0e49f9 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -307,6 +307,8 @@ update_active (struct cpu_features *cpu_features)
__cpuid_count (0xd, 0, eax, ebx, ecx, edx);
if (ebx != 0)
{
+ /* NB: On AMX capable processors, ebx always includes AMX
+ states. */
unsigned int xsave_state_full_size
= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
@@ -320,6 +322,11 @@ update_active (struct cpu_features *cpu_features)
{
unsigned int xstate_comp_offsets[32];
unsigned int xstate_comp_sizes[32];
+#ifdef __x86_64__
+ unsigned int xstate_amx_comp_offsets[32];
+ unsigned int xstate_amx_comp_sizes[32];
+ unsigned int amx_ecx;
+#endif
unsigned int i;
xstate_comp_offsets[0] = 0;
@@ -327,16 +334,39 @@ update_active (struct cpu_features *cpu_features)
xstate_comp_offsets[2] = 576;
xstate_comp_sizes[0] = 160;
xstate_comp_sizes[1] = 256;
+#ifdef __x86_64__
+ xstate_amx_comp_offsets[0] = 0;
+ xstate_amx_comp_offsets[1] = 160;
+ xstate_amx_comp_offsets[2] = 576;
+ xstate_amx_comp_sizes[0] = 160;
+ xstate_amx_comp_sizes[1] = 256;
+#endif
for (i = 2; i < 32; i++)
{
- if ((STATE_SAVE_MASK & (1 << i)) != 0)
+ if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
{
__cpuid_count (0xd, i, eax, ebx, ecx, edx);
- xstate_comp_sizes[i] = eax;
+#ifdef __x86_64__
+ /* Include this in xsave_state_full_size. */
+ amx_ecx = ecx;
+ xstate_amx_comp_sizes[i] = eax;
+ if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
+ {
+ /* Exclude this from xsave_state_size. */
+ ecx = 0;
+ xstate_comp_sizes[i] = 0;
+ }
+ else
+#endif
+ xstate_comp_sizes[i] = eax;
}
else
{
+#ifdef __x86_64__
+ amx_ecx = 0;
+ xstate_amx_comp_sizes[i] = 0;
+#endif
ecx = 0;
xstate_comp_sizes[i] = 0;
}
@@ -349,6 +379,15 @@ update_active (struct cpu_features *cpu_features)
if ((ecx & (1 << 1)) != 0)
xstate_comp_offsets[i]
= ALIGN_UP (xstate_comp_offsets[i], 64);
+#ifdef __x86_64__
+ xstate_amx_comp_offsets[i]
+ = (xstate_amx_comp_offsets[i - 1]
+ + xstate_amx_comp_sizes[i - 1]);
+ if ((amx_ecx & (1 << 1)) != 0)
+ xstate_amx_comp_offsets[i]
+ = ALIGN_UP (xstate_amx_comp_offsets[i],
+ 64);
+#endif
}
}
@@ -357,6 +396,18 @@ update_active (struct cpu_features *cpu_features)
= xstate_comp_offsets[31] + xstate_comp_sizes[31];
if (size)
{
+#ifdef __x86_64__
+ unsigned int amx_size
+ = (xstate_amx_comp_offsets[31]
+ + xstate_amx_comp_sizes[31]);
+ amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
+ 64);
+ /* Set xsave_state_full_size to the compact AMX
+ state size for XSAVEC. NB: xsave_state_full_size
+ is only used in _dl_tlsdesc_dynamic_xsave and
+ _dl_tlsdesc_dynamic_xsavec. */
+ cpu_features->xsave_state_full_size = amx_size;
+#endif
cpu_features->xsave_state_size
= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
CPU_FEATURE_SET (cpu_features, XSAVEC);
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index b9bf3115b616f05f..cd7bd27cf35959fd 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -934,6 +934,8 @@ struct cpu_features
/* The full state size for XSAVE when XSAVEC is disabled by
GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC
+
+ and the AMX state size when XSAVEC is available.
*/
unsigned int xsave_state_full_size;
/* Data cache size for use in memory and string routines, typically
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index 485cad9c0283b334..db8e576e91767db5 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -56,6 +56,14 @@
| (1 << X86_XSTATE_ZMM_H_ID) \
| (1 << X86_XSTATE_ZMM_ID) \
| (1 << X86_XSTATE_APX_F_ID))
+
+/* AMX state mask. */
+# define AMX_STATE_SAVE_MASK \
+ ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
+
+/* States to be included in xsave_state_full_size. */
+# define FULL_STATE_SAVE_MASK \
+ (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
#else
/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
doesn't have red-zone, use 0 here. */
@@ -68,13 +76,17 @@
| (1 << X86_XSTATE_BNDREGS_ID) \
| (1 << X86_XSTATE_K_ID) \
| (1 << X86_XSTATE_ZMM_H_ID))
+
+/* States to be included in xsave_state_size. */
+# define FULL_STATE_SAVE_MASK STATE_SAVE_MASK
#endif
/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
- Compiler assumes that all registers, including x87 FPU stack registers,
- are unchanged after CALL, except for EFLAGS and RAX/EAX. */
+ Compiler assumes that all registers, including AMX and x87 FPU
+ stack registers, are unchanged after CALL, except for EFLAGS and
+ RAX/EAX. */
#define TLSDESC_CALL_STATE_SAVE_MASK \
- (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
+ (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
/* Constants for bits in __x86_string_control: */
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
index 418cc4a9b862f7e0..04a534fa126a7bf7 100755
--- a/sysdeps/x86_64/configure
+++ b/sysdeps/x86_64/configure
@@ -134,6 +134,34 @@ fi
config_vars="$config_vars
enable-cet = $enable_cet"
+# Check if -mamx-tile works properly.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -mamx-tile works properly" >&5
+printf %s "checking whether -mamx-tile works properly... " >&6; }
+if test ${libc_cv_x86_have_amx_tile+y}
+then :
+ printf %s "(cached) " >&6
+else $as_nop
+ cat > conftest.c <<EOF
+#include <x86intrin.h>
+EOF
+ libc_cv_x86_have_amx_tile=no
+ if { ac_try='${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ if grep -q __builtin_ia32_ldtilecfg conftest.i; then
+ libc_cv_x86_have_amx_tile=yes
+ fi
+ fi
+ rm -rf conftest*
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_have_amx_tile" >&5
+printf "%s\n" "$libc_cv_x86_have_amx_tile" >&6; }
+config_vars="$config_vars
+have-mamx-tile = $libc_cv_x86_have_amx_tile"
+
test -n "$critic_missing" && as_fn_error $? "
*** $critic_missing" "$LINENO" 5
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
index d1f803c02ee67fc5..c714c47351e70390 100644
--- a/sysdeps/x86_64/configure.ac
+++ b/sysdeps/x86_64/configure.ac
@@ -61,5 +61,20 @@ elif test $enable_cet = permissive; then
fi
LIBC_CONFIG_VAR([enable-cet], [$enable_cet])
+# Check if -mamx-tile works properly.
+AC_CACHE_CHECK(whether -mamx-tile works properly,
+ libc_cv_x86_have_amx_tile, [dnl
+cat > conftest.c <<EOF
+#include <x86intrin.h>
+EOF
+ libc_cv_x86_have_amx_tile=no
+ if AC_TRY_COMMAND(${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i); then
+ if grep -q __builtin_ia32_ldtilecfg conftest.i; then
+ libc_cv_x86_have_amx_tile=yes
+ fi
+ fi
+ rm -rf conftest*])
+LIBC_CONFIG_VAR([have-mamx-tile], [$libc_cv_x86_have_amx_tile])
+
test -n "$critic_missing" && AC_MSG_ERROR([
*** $critic_missing])
diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
index 0c2e8d5320d0bd26..9f02cfc3eb297ed2 100644
--- a/sysdeps/x86_64/dl-tlsdesc-dynamic.h
+++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
@@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic:
# endif
#else
/* Allocate stack space of the required size to save the state. */
- sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+ sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP
#endif
/* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
r10 and r11. */

View File

@ -0,0 +1,250 @@
commit 354cabcb2634abe16da7a2ba5e648aac1204b58e
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Mon Mar 18 06:40:16 2024 -0700
x86-64: Allocate state buffer space for RDI, RSI and RBX
_dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack.
After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11. Define
TLSDESC_CALL_REGISTER_SAVE_AREA to allocate space for RDI, RSI and RBX
to avoid clobbering saved RDI, RSI and RBX values on stack by xsave to
STATE_SAVE_OFFSET(%rsp).
+==================+<- stack frame start aligned at 8 or 16 bytes
| |<- RDI saved in the red zone
| |<- RSI saved in the red zone
| |<- RBX saved in the red zone
| |<- paddings for stack realignment of 64 bytes
|------------------|<- xsave buffer end aligned at 64 bytes
| |<-
| |<-
| |<-
|------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
| |<- 8-byte padding for 64-byte alignment
| |<- 8-byte padding for 64-byte alignment
| |<- R11
| |<- R10
| |<- R9
| |<- R8
| |<- RDX
| |<- RCX
+==================+<- RSP aligned at 64 bytes
Define TLSDESC_CALL_REGISTER_SAVE_AREA, the total register save area size
for all integer registers by adding 24 to STATE_SAVE_OFFSET since RDI, RSI
and RBX are saved onto stack without adjusting stack pointer first, using
the red-zone. This fixes BZ #31501.
Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
(cherry picked from commit 717ebfa85c8240d32d0d19d86a484c31c55c9617)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index d71e8d3d2e0e49f9..6fe1b728c607f39e 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -310,7 +310,7 @@ update_active (struct cpu_features *cpu_features)
/* NB: On AMX capable processors, ebx always includes AMX
states. */
unsigned int xsave_state_full_size
- = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+ = ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
cpu_features->xsave_state_size
= xsave_state_full_size;
@@ -400,8 +400,10 @@ update_active (struct cpu_features *cpu_features)
unsigned int amx_size
= (xstate_amx_comp_offsets[31]
+ xstate_amx_comp_sizes[31]);
- amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
- 64);
+ amx_size
+ = ALIGN_UP ((amx_size
+ + TLSDESC_CALL_REGISTER_SAVE_AREA),
+ 64);
/* Set xsave_state_full_size to the compact AMX
state size for XSAVEC. NB: xsave_state_full_size
is only used in _dl_tlsdesc_dynamic_xsave and
@@ -409,7 +411,8 @@ update_active (struct cpu_features *cpu_features)
cpu_features->xsave_state_full_size = amx_size;
#endif
cpu_features->xsave_state_size
- = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+ = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
+ 64);
CPU_FEATURE_SET (cpu_features, XSAVEC);
}
}
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index db8e576e91767db5..7359149e17ccf341 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -38,14 +38,59 @@
#ifdef __x86_64__
/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
- aligned to 16 bytes for fxsave and 64 bytes for xsave.
-
- NB: Is is non-zero because of the 128-byte red-zone. Some registers
- are saved on stack without adjusting stack pointer first. When we
- update stack pointer to allocate more space, we need to take the
- red-zone into account. */
+ aligned to 16 bytes for fxsave and 64 bytes for xsave. It is non-zero
+ because MOV, instead of PUSH, is used to save registers onto stack.
+
+ +==================+<- stack frame start aligned at 8 or 16 bytes
+ | |<- paddings for stack realignment of 64 bytes
+ |------------------|<- xsave buffer end aligned at 64 bytes
+ | |<-
+ | |<-
+ | |<-
+ |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
+ | |<- 8-byte padding for 64-byte alignment
+ | |<- R9
+ | |<- R8
+ | |<- RDI
+ | |<- RSI
+ | |<- RDX
+ | |<- RCX
+ | |<- RAX
+ +==================+<- RSP aligned at 64 bytes
+
+ */
# define STATE_SAVE_OFFSET (8 * 7 + 8)
+/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
+ stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and
+ R11. Allocate space for RDI, RSI and RBX to avoid clobbering saved
+ RDI, RSI and RBX values on stack by xsave.
+
+ +==================+<- stack frame start aligned at 8 or 16 bytes
+ | |<- RDI saved in the red zone
+ | |<- RSI saved in the red zone
+ | |<- RBX saved in the red zone
+ | |<- paddings for stack realignment of 64 bytes
+ |------------------|<- xsave buffer end aligned at 64 bytes
+ | |<-
+ | |<-
+ | |<-
+ |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
+ | |<- 8-byte padding for 64-byte alignment
+ | |<- 8-byte padding for 64-byte alignment
+ | |<- R11
+ | |<- R10
+ | |<- R9
+ | |<- R8
+ | |<- RDX
+ | |<- RCX
+ +==================+<- RSP aligned at 64 bytes
+
+ Define the total register save area size for all integer registers by
+ adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto
+ stack without adjusting stack pointer first, using the red-zone. */
+# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
+
/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
registers are mutually exclusive. */
# define STATE_SAVE_MASK \
@@ -66,8 +111,9 @@
(STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
#else
/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
- doesn't have red-zone, use 0 here. */
+ uses PUSH to save registers onto stack, use 0 here. */
# define STATE_SAVE_OFFSET 0
+# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
/* Save SSE, AVX, AXV512, mask and bound registers. */
# define STATE_SAVE_MASK \
diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
new file mode 100644
index 0000000000000000..1d636669ba255724
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
@@ -0,0 +1,87 @@
+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec
+ to clobber %rdi, %rsi and %rbx. On Intel AVX CPUs, the state size
+ is 960 bytes and this test didn't fail. It may be due to the unused
+ last 128 bytes. On AMD AVX CPUs, the state size is 832 bytes and
+ this test might fail without the fix. */
+#ifndef OFFSET
+# define OFFSET 40
+#endif
+
+ .text
+ .p2align 4
+ .globl apply_tls
+ .type apply_tls, @function
+apply_tls:
+ cfi_startproc
+ _CET_ENDBR
+ pushq %rbp
+ cfi_def_cfa_offset (16)
+ cfi_offset (6, -16)
+ movdqu (%RDI_LP), %xmm0
+ lea tls_var1@TLSDESC(%rip), %RAX_LP
+ mov %RSP_LP, %RBP_LP
+ cfi_def_cfa_register (6)
+ /* Align stack to 64 bytes. */
+ and $-64, %RSP_LP
+ sub $OFFSET, %RSP_LP
+ pushq %rbx
+ /* Set %ebx to 0xbadbeef. */
+ movl $0xbadbeef, %ebx
+ movl $0xbadbeef, %esi
+ movq %rdi, saved_rdi(%rip)
+ movq %rsi, saved_rsi(%rip)
+ call *tls_var1@TLSCALL(%RAX_LP)
+ /* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx. */
+ cmpq saved_rdi(%rip), %rdi
+ jne L(hlt)
+ cmpq saved_rsi(%rip), %rsi
+ jne L(hlt)
+ cmpl $0xbadbeef, %ebx
+ jne L(hlt)
+ add %fs:0, %RAX_LP
+ movups %xmm0, 32(%RAX_LP)
+ movdqu 16(%RDI_LP), %xmm1
+ mov %RAX_LP, %RBX_LP
+ movups %xmm1, 48(%RAX_LP)
+ lea 32(%RBX_LP), %RAX_LP
+ pop %rbx
+ leave
+ cfi_def_cfa (7, 8)
+ ret
+L(hlt):
+ hlt
+ cfi_endproc
+ .size apply_tls, .-apply_tls
+ .hidden tls_var1
+ .globl tls_var1
+ .section .tbss,"awT",@nobits
+ .align 16
+ .type tls_var1, @object
+ .size tls_var1, 3200
+tls_var1:
+ .zero 3200
+ .local saved_rdi
+ .comm saved_rdi,8,8
+ .local saved_rsi
+ .comm saved_rsi,8,8
+ .section .note.GNU-stack,"",@progbits

View File

@ -0,0 +1,38 @@
commit 15aebdbada54098787715448c94701f17033fc92
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Tue Mar 12 13:21:18 2024 -0300
Ignore undefined symbols for -mtls-dialect=gnu2
So it does not fail for arm config that defaults to -mtp=soft (which
issues a call to __aeabi_read_tp).
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit 968b0ca9440040a2b31248a572891f0e55c1ab10)
diff --git a/configure b/configure
index 59ff1e415dda4fbf..117b48a421792eda 100755
--- a/configure
+++ b/configure
@@ -7020,7 +7020,7 @@ void foo (void)
}
EOF
if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
- conftest.c -o conftest 1>&5'
+ -shared conftest.c -o conftest 1>&5'
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
(eval $ac_try) 2>&5
ac_status=$?
diff --git a/configure.ac b/configure.ac
index 65799e56852a5356..19b88a47a52508a1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1297,7 +1297,7 @@ void foo (void)
}
EOF
if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
- conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
+ -shared conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
then
libc_cv_mtls_dialect_gnu2=yes
else

View File

@ -0,0 +1,446 @@
commit a8ba52bde58c69f2b31da62ad2311f119adf6cb9
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Tue Mar 12 13:21:19 2024 -0300
arm: Update _dl_tlsdesc_dynamic to preserve caller-saved registers (BZ 31372)
ARM _dl_tlsdesc_dynamic slow path has two issues:
* The ip/r12 is defined by AAPCS as a scratch register, and gcc is
used to save the stack pointer before on some function calls. So it
should also be saved/restored as well. It fixes the tst-gnu2-tls2.
* None of the possible VFP registers are saved/restored. ARM has the
additional complexity to have different VFP bank sizes (depending of
VFP support by the chip).
The tst-gnu2-tls2 test is extended to check for VFP registers, although
only for hardfp builds. Different than setcontext, _dl_tlsdesc_dynamic
does not have HWCAP_ARM_IWMMXT (I don't have a way to properly test
it and it is almost a decade since newer hardware was released).
With this patch there is no need to mark tst-gnu2-tls2 as XFAIL.
Checked on arm-linux-gnueabihf.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit 64c7e344289ed085517c2227d8e3b06388242c13)
diff --git a/config.h.in b/config.h.in
index 44a34072a47aa008..4d33c63a841d3d6d 100644
--- a/config.h.in
+++ b/config.h.in
@@ -141,6 +141,9 @@
/* LOONGARCH floating-point ABI for ld.so. */
#undef LOONGARCH_ABI_FRLEN
+/* Define whether ARM used hard-float and support VFPvX-D32. */
+#undef HAVE_ARM_PCS_VFP_D32
+
/* Linux specific: minimum supported kernel version. */
#undef __LINUX_KERNEL_VERSION
diff --git a/elf/Makefile b/elf/Makefile
index c5c37a9147e69d83..030db4d207d3491e 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -3056,10 +3056,6 @@ $(objpfx)tst-gnu2-tls2.out: \
$(objpfx)tst-gnu2-tls2mod2.so
ifeq (yes,$(have-mtls-dialect-gnu2))
-# This test fails if dl_tlsdesc_dynamic doesn't preserve all caller-saved
-# registers. See https://sourceware.org/bugzilla/show_bug.cgi?id=31372
-test-xfail-tst-gnu2-tls2 = yes
-
CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2
CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
diff --git a/elf/tst-gnu2-tls2.h b/elf/tst-gnu2-tls2.h
index 77964a57a352e6a4..1ade8151e200af68 100644
--- a/elf/tst-gnu2-tls2.h
+++ b/elf/tst-gnu2-tls2.h
@@ -27,6 +27,10 @@ extern struct tls *apply_tls (struct tls *);
/* An architecture can define them to verify that clobber caller-saved
registers aren't changed by the implicit TLSDESC call. */
+#ifndef INIT_TLSDESC_CALL
+# define INIT_TLSDESC_CALL()
+#endif
+
#ifndef BEFORE_TLSDESC_CALL
# define BEFORE_TLSDESC_CALL()
#endif
diff --git a/elf/tst-gnu2-tls2mod0.c b/elf/tst-gnu2-tls2mod0.c
index 45556a0e173922cc..3fe3c142777abe04 100644
--- a/elf/tst-gnu2-tls2mod0.c
+++ b/elf/tst-gnu2-tls2mod0.c
@@ -16,13 +16,14 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include "tst-gnu2-tls2.h"
+#include <tst-gnu2-tls2.h>
__thread struct tls tls_var0 __attribute__ ((visibility ("hidden")));
struct tls *
apply_tls (struct tls *p)
{
+ INIT_TLSDESC_CALL ();
BEFORE_TLSDESC_CALL ();
tls_var0 = *p;
struct tls *ret = &tls_var0;
diff --git a/elf/tst-gnu2-tls2mod1.c b/elf/tst-gnu2-tls2mod1.c
index e10b9dbc0a7573c7..e2105384689e2d2e 100644
--- a/elf/tst-gnu2-tls2mod1.c
+++ b/elf/tst-gnu2-tls2mod1.c
@@ -16,13 +16,14 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include "tst-gnu2-tls2.h"
+#include <tst-gnu2-tls2.h>
__thread struct tls tls_var1[100] __attribute__ ((visibility ("hidden")));
struct tls *
apply_tls (struct tls *p)
{
+ INIT_TLSDESC_CALL ();
BEFORE_TLSDESC_CALL ();
tls_var1[1] = *p;
struct tls *ret = &tls_var1[1];
diff --git a/elf/tst-gnu2-tls2mod2.c b/elf/tst-gnu2-tls2mod2.c
index 141af51e55b8bf34..6d3031dc5fbc1041 100644
--- a/elf/tst-gnu2-tls2mod2.c
+++ b/elf/tst-gnu2-tls2mod2.c
@@ -16,13 +16,14 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include "tst-gnu2-tls2.h"
+#include <tst-gnu2-tls2.h>
__thread struct tls tls_var2 __attribute__ ((visibility ("hidden")));
struct tls *
apply_tls (struct tls *p)
{
+ INIT_TLSDESC_CALL ();
BEFORE_TLSDESC_CALL ();
tls_var2 = *p;
struct tls *ret = &tls_var2;
diff --git a/sysdeps/arm/configure b/sysdeps/arm/configure
index 35e2918922300956..4ef4d46cbd5384e9 100644
--- a/sysdeps/arm/configure
+++ b/sysdeps/arm/configure
@@ -187,6 +187,38 @@ else
default-abi = soft"
fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether VFP supports 32 registers" >&5
+printf %s "checking whether VFP supports 32 registers... " >&6; }
+if test ${libc_cv_arm_pcs_vfp_d32+y}
+then :
+ printf %s "(cached) " >&6
+else $as_nop
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+void foo (void)
+{
+ asm volatile ("vldr d16,=17" : : : "d16");
+}
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+ libc_cv_arm_pcs_vfp_d32=yes
+else $as_nop
+ libc_cv_arm_pcs_vfp_d32=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arm_pcs_vfp_d32" >&5
+printf "%s\n" "$libc_cv_arm_pcs_vfp_d32" >&6; }
+if test "$libc_cv_arm_pcs_vfp_d32" = yes ;
+then
+ printf "%s\n" "#define HAVE_ARM_PCS_VFP_D32 1" >>confdefs.h
+
+fi
+
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PC-relative relocs in movw/movt work properly" >&5
printf %s "checking whether PC-relative relocs in movw/movt work properly... " >&6; }
if test ${libc_cv_arm_pcrel_movw+y}
diff --git a/sysdeps/arm/configure.ac b/sysdeps/arm/configure.ac
index 5172e30bbe79f995..cd00ddc9d9ade5d7 100644
--- a/sysdeps/arm/configure.ac
+++ b/sysdeps/arm/configure.ac
@@ -21,6 +21,21 @@ else
LIBC_CONFIG_VAR([default-abi], [soft])
fi
+AC_CACHE_CHECK([whether VFP supports 32 registers],
+ libc_cv_arm_pcs_vfp_d32, [
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+void foo (void)
+{
+ asm volatile ("vldr d16,=17" : : : "d16");
+}
+]])],
+ [libc_cv_arm_pcs_vfp_d32=yes],
+ [libc_cv_arm_pcs_vfp_d32=no])])
+if test "$libc_cv_arm_pcs_vfp_d32" = yes ;
+then
+ AC_DEFINE(HAVE_ARM_PCS_VFP_D32)
+fi
+
AC_CACHE_CHECK([whether PC-relative relocs in movw/movt work properly],
libc_cv_arm_pcrel_movw, [
cat > conftest.s <<\EOF
diff --git a/sysdeps/arm/dl-tlsdesc.S b/sysdeps/arm/dl-tlsdesc.S
index 764c56e70f046b03..ada106521da8971a 100644
--- a/sysdeps/arm/dl-tlsdesc.S
+++ b/sysdeps/arm/dl-tlsdesc.S
@@ -19,6 +19,7 @@
#include <sysdep.h>
#include <arm-features.h>
#include <tls.h>
+#include <rtld-global-offsets.h>
#include "tlsdesc.h"
.text
@@ -83,14 +84,20 @@ _dl_tlsdesc_dynamic(struct tlsdesc *tdp)
.align 2
_dl_tlsdesc_dynamic:
/* Our calling convention is to clobber r0, r1 and the processor
- flags. All others that are modified must be saved */
- eabi_save ({r2,r3,r4,lr})
- push {r2,r3,r4,lr}
- cfi_adjust_cfa_offset (16)
+ flags. All others that are modified must be saved. r5 is
+ used as the hwcap value to avoid reload after __tls_get_addr
+ call. If required we will save the vector register on the slow
+ path. */
+ eabi_save ({r2,r3,r4,r5,ip,lr})
+ push {r2,r3,r4,r5,ip,lr}
+ cfi_adjust_cfa_offset (24)
cfi_rel_offset (r2,0)
cfi_rel_offset (r3,4)
cfi_rel_offset (r4,8)
- cfi_rel_offset (lr,12)
+ cfi_rel_offset (r5,12)
+ cfi_rel_offset (ip,16)
+ cfi_rel_offset (lr,20)
+
ldr r1, [r0] /* td */
GET_TLS (lr)
mov r4, r0 /* r4 = tp */
@@ -113,22 +120,69 @@ _dl_tlsdesc_dynamic:
rsbne r0, r4, r3
bne 2f
1: mov r0, r1
+
+ /* Load the hwcap to check for vector support. */
+ ldr r2, 3f
+ ldr r1, .Lrtld_global_ro
+0: add r2, pc, r2
+ ldr r2, [r2, r1]
+ ldr r5, [r2, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
+
+#ifdef __SOFTFP__
+ tst r5, #HWCAP_ARM_VFP
+ beq .Lno_vfp
+#endif
+
+ /* Store the VFP registers. Don't use VFP instructions directly
+ because this code is used in non-VFP multilibs. */
+#define VFP_STACK_REQ (32*8 + 8)
+ sub sp, sp, VFP_STACK_REQ
+ cfi_adjust_cfa_offset (VFP_STACK_REQ)
+ mov r3, sp
+ .inst 0xeca30b20 /* vstmia r3!, {d0-d15} */
+ tst r5, #HWCAP_ARM_VFPD32
+ beq 4f
+ .inst 0xece30b20 /* vstmia r3!, {d16-d31} */
+ /* Store the floating-point status register. */
+4: .inst 0xeef12a10 /* vmrs r2, fpscr */
+ str r2, [r3]
+.Lno_vfp:
bl __tls_get_addr
rsb r0, r4, r0
+#ifdef __SOFTFP__
+ tst r5, #HWCAP_ARM_VFP
+ beq 2f
+#endif
+ mov r3, sp
+ .inst 0xecb30b20 /* vldmia r3!, {d0-d15} */
+ tst r5, #HWCAP_ARM_VFPD32
+ beq 5f
+ .inst 0xecf30b20 /* vldmia r3!, {d16-d31} */
+ ldr r4, [r3]
+5: .inst 0xeee14a10 /* vmsr fpscr, r4 */
+ add sp, sp, VFP_STACK_REQ
+ cfi_adjust_cfa_offset (-VFP_STACK_REQ)
+
2:
#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
|| defined (ARM_ALWAYS_BX))
- pop {r2,r3,r4, lr}
- cfi_adjust_cfa_offset (-16)
+ pop {r2,r3,r4,r5,ip, lr}
+ cfi_adjust_cfa_offset (-20)
cfi_restore (lr)
+ cfi_restore (ip)
+ cfi_restore (r5)
cfi_restore (r4)
cfi_restore (r3)
cfi_restore (r2)
bx lr
#else
- pop {r2,r3,r4, pc}
+ pop {r2,r3,r4,r5,ip, pc}
#endif
eabi_fnend
cfi_endproc
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+
+3: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
+.Lrtld_global_ro:
+ .long C_SYMBOL_NAME(_rtld_global_ro)(GOT)
#endif /* SHARED */
diff --git a/sysdeps/arm/tst-gnu2-tls2.h b/sysdeps/arm/tst-gnu2-tls2.h
new file mode 100644
index 0000000000000000..e413ac21fb9ed9bf
--- /dev/null
+++ b/sysdeps/arm/tst-gnu2-tls2.h
@@ -0,0 +1,128 @@
+/* Test TLSDESC relocation. ARM version.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include <sys/auxv.h>
+#include <string.h>
+#include <stdlib.h>
+#include <endian.h>
+
+#ifndef __SOFTFP__
+
+# ifdef HAVE_ARM_PCS_VFP_D32
+# define SAVE_VFP_D32 \
+ asm volatile ("vldr d16,=17" : : : "d16"); \
+ asm volatile ("vldr d17,=18" : : : "d17"); \
+ asm volatile ("vldr d18,=19" : : : "d18"); \
+ asm volatile ("vldr d19,=20" : : : "d19"); \
+ asm volatile ("vldr d20,=21" : : : "d20"); \
+ asm volatile ("vldr d21,=22" : : : "d21"); \
+ asm volatile ("vldr d22,=23" : : : "d22"); \
+ asm volatile ("vldr d23,=24" : : : "d23"); \
+ asm volatile ("vldr d24,=25" : : : "d24"); \
+ asm volatile ("vldr d25,=26" : : : "d25"); \
+ asm volatile ("vldr d26,=27" : : : "d26"); \
+ asm volatile ("vldr d27,=28" : : : "d27"); \
+ asm volatile ("vldr d28,=29" : : : "d28"); \
+ asm volatile ("vldr d29,=30" : : : "d29"); \
+ asm volatile ("vldr d30,=31" : : : "d30"); \
+ asm volatile ("vldr d31,=32" : : : "d31");
+# else
+# define SAVE_VFP_D32
+# endif
+
+# define INIT_TLSDESC_CALL() \
+ unsigned long hwcap = getauxval (AT_HWCAP)
+
+/* Set each vector register to a value from 1 to 32 before the TLS access,
+ dump to memory after TLS access, and compare with the expected values. */
+
+# define BEFORE_TLSDESC_CALL() \
+ if (hwcap & HWCAP_ARM_VFP) \
+ { \
+ asm volatile ("vldr d0,=1" : : : "d0"); \
+ asm volatile ("vldr d1,=2" : : : "d1"); \
+ asm volatile ("vldr d2,=3" : : : "d1"); \
+ asm volatile ("vldr d3,=4" : : : "d3"); \
+ asm volatile ("vldr d4,=5" : : : "d4"); \
+ asm volatile ("vldr d5,=6" : : : "d5"); \
+ asm volatile ("vldr d6,=7" : : : "d6"); \
+ asm volatile ("vldr d7,=8" : : : "d7"); \
+ asm volatile ("vldr d8,=9" : : : "d8"); \
+ asm volatile ("vldr d9,=10" : : : "d9"); \
+ asm volatile ("vldr d10,=11" : : : "d10"); \
+ asm volatile ("vldr d11,=12" : : : "d11"); \
+ asm volatile ("vldr d12,=13" : : : "d12"); \
+ asm volatile ("vldr d13,=14" : : : "d13"); \
+ asm volatile ("vldr d14,=15" : : : "d14"); \
+ asm volatile ("vldr d15,=16" : : : "d15"); \
+ } \
+ if (hwcap & HWCAP_ARM_VFPD32) \
+ { \
+ SAVE_VFP_D32 \
+ }
+
+# define VFP_STACK_REQ (16*8)
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define DISP 7
+# else
+# define DISP 0
+# endif
+
+# ifdef HAVE_ARM_PCS_VFP_D32
+# define CHECK_VFP_D32 \
+ char vfp[VFP_STACK_REQ]; \
+ asm volatile ("vstmia %0, {d16-d31}\n" \
+ : \
+ : "r" (vfp) \
+ : "memory"); \
+ \
+ char expected[VFP_STACK_REQ] = { 0 }; \
+ for (int i = 0; i < 16; ++i) \
+ expected[i * 8 + DISP] = i + 17; \
+ \
+ if (memcmp (vfp, expected, VFP_STACK_REQ) != 0) \
+ abort ();
+# else
+# define CHECK_VFP_D32
+# endif
+
+# define AFTER_TLSDESC_CALL() \
+ if (hwcap & HWCAP_ARM_VFP) \
+ { \
+ char vfp[VFP_STACK_REQ]; \
+ asm volatile ("vstmia %0, {d0-d15}\n" \
+ : \
+ : "r" (vfp) \
+ : "memory"); \
+ \
+ char expected[VFP_STACK_REQ] = { 0 }; \
+ for (int i = 0; i < 16; ++i) \
+ expected[i * 8 + DISP] = i + 1; \
+ \
+ if (memcmp (vfp, expected, VFP_STACK_REQ) != 0) \
+ abort (); \
+ } \
+ if (hwcap & HWCAP_ARM_VFPD32) \
+ { \
+ CHECK_VFP_D32 \
+ }
+
+#endif /* __SOFTFP__ */
+
+#include_next <tst-gnu2-tls2.h>

View File

@ -0,0 +1,221 @@
commit aded2fc004e7ee85cf0b45b1382552d41e555a23
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Tue Mar 12 13:21:20 2024 -0300
elf: Enable TLS descriptor tests on aarch64
The aarch64 uses 'trad' for traditional tls and 'desc' for tls
descriptors, but unlike other targets it defaults to 'desc'. The
gnutls2 configure check does not set aarch64 as an ABI that uses
TLS descriptors, which then disable somes stests.
Also rename the internal machinery fron gnu2 to tls descriptors.
Checked on aarch64-linux-gnu.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit 3d53d18fc71c5d9ef4773b8bce04d54b80181926)
diff --git a/configure b/configure
index 117b48a421792eda..432e40a59295cffd 100755
--- a/configure
+++ b/configure
@@ -653,7 +653,7 @@ LIBGD
libc_cv_cc_loop_to_function
libc_cv_cc_submachine
libc_cv_cc_nofma
-libc_cv_mtls_dialect_gnu2
+libc_cv_mtls_descriptor
libc_cv_has_glob_dat
libc_cv_fpie
libc_cv_z_execstack
@@ -4760,6 +4760,9 @@ libc_config_ok=no
# whether to use such directories.
with_fp_cond=1
+# A preconfigure script may define another name to TLS descriptor variant
+mtls_descriptor=gnu2
+
if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null`
then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5
@@ -7006,9 +7009,9 @@ fi
printf "%s\n" "$libc_cv_has_glob_dat" >&6; }
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -mtls-dialect=gnu2" >&5
-printf %s "checking for -mtls-dialect=gnu2... " >&6; }
-if test ${libc_cv_mtls_dialect_gnu2+y}
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for tls descriptor support" >&5
+printf %s "checking for tls descriptor support... " >&6; }
+if test ${libc_cv_mtls_descriptor+y}
then :
printf %s "(cached) " >&6
else $as_nop
@@ -7019,7 +7022,7 @@ void foo (void)
i = 10;
}
EOF
-if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
+if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles
-shared conftest.c -o conftest 1>&5'
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
(eval $ac_try) 2>&5
@@ -7027,17 +7030,17 @@ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nost
printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }
then
- libc_cv_mtls_dialect_gnu2=yes
+ libc_cv_mtls_descriptor=$mtls_descriptor
else
- libc_cv_mtls_dialect_gnu2=no
+ libc_cv_mtls_descriptor=no
fi
rm -f conftest*
fi
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_dialect_gnu2" >&5
-printf "%s\n" "$libc_cv_mtls_dialect_gnu2" >&6; }
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_descriptor" >&5
+printf "%s\n" "$libc_cv_mtls_descriptor" >&6; }
config_vars="$config_vars
-have-mtls-dialect-gnu2 = $libc_cv_mtls_dialect_gnu2"
+have-mtls-descriptor = $libc_cv_mtls_descriptor"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if -Wno-ignored-attributes is required for aliases" >&5
printf %s "checking if -Wno-ignored-attributes is required for aliases... " >&6; }
diff --git a/configure.ac b/configure.ac
index 19b88a47a52508a1..bdc385d03c3dc7f5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -442,6 +442,9 @@ libc_config_ok=no
# whether to use such directories.
with_fp_cond=1
+# A preconfigure script may define another name to TLS descriptor variant
+mtls_descriptor=gnu2
+
dnl Let sysdeps/*/preconfigure act here.
LIBC_PRECONFIGURE([$srcdir], [for sysdeps])
@@ -1287,7 +1290,7 @@ fi
rm -f conftest*])
AC_SUBST(libc_cv_has_glob_dat)
-AC_CACHE_CHECK([for -mtls-dialect=gnu2], libc_cv_mtls_dialect_gnu2,
+AC_CACHE_CHECK([for tls descriptor support], libc_cv_mtls_descriptor,
[dnl
cat > conftest.c <<EOF
__thread int i;
@@ -1296,16 +1299,16 @@ void foo (void)
i = 10;
}
EOF
-if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
+if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles
-shared conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
then
- libc_cv_mtls_dialect_gnu2=yes
+ libc_cv_mtls_descriptor=$mtls_descriptor
else
- libc_cv_mtls_dialect_gnu2=no
+ libc_cv_mtls_descriptor=no
fi
rm -f conftest*])
-AC_SUBST(libc_cv_mtls_dialect_gnu2)
-LIBC_CONFIG_VAR([have-mtls-dialect-gnu2], [$libc_cv_mtls_dialect_gnu2])
+AC_SUBST(libc_cv_mtls_descriptor)
+LIBC_CONFIG_VAR([have-mtls-descriptor], [$libc_cv_mtls_descriptor])
dnl clang emits an warning for a double alias redirection, to warn the
dnl original symbol is sed even when weak definition overrides it.
diff --git a/elf/Makefile b/elf/Makefile
index 030db4d207d3491e..69aa423c4b90127d 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -999,13 +999,13 @@ modules-names-tests = $(filter-out ifuncmod% tst-tlsmod%,\
# For +depfiles in Makerules.
extra-test-objs += tst-auditmod17.os
-ifeq (yes,$(have-mtls-dialect-gnu2))
+ifneq (no,$(have-mtls-descriptor))
tests += tst-gnu2-tls1
modules-names += tst-gnu2-tls1mod
$(objpfx)tst-gnu2-tls1: $(objpfx)tst-gnu2-tls1mod.so
tst-gnu2-tls1mod.so-no-z-defs = yes
-CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=gnu2
-endif # $(have-mtls-dialect-gnu2)
+CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=$(have-mtls-descriptor)
+endif # $(have-mtls-descriptor)
ifeq (yes,$(have-protected-data))
modules-names += tst-protected1moda tst-protected1modb
@@ -2972,11 +2972,11 @@ $(objpfx)tst-tls-allocation-failure-static-patched.out: \
$(objpfx)tst-audit-tlsdesc: $(objpfx)tst-audit-tlsdesc-mod1.so \
$(objpfx)tst-audit-tlsdesc-mod2.so \
$(shared-thread-library)
-ifeq (yes,$(have-mtls-dialect-gnu2))
+ifneq (no,$(have-mtls-descriptor))
# The test is valid for all TLS types, but we want to exercise GNU2
# TLS if possible.
-CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=gnu2
-CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=gnu2
+CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=$(have-mtls-descriptor)
endif
$(objpfx)tst-audit-tlsdesc-dlopen: $(shared-thread-library)
$(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-audit-tlsdesc-mod1.so \
@@ -3055,11 +3055,11 @@ $(objpfx)tst-gnu2-tls2.out: \
$(objpfx)tst-gnu2-tls2mod1.so \
$(objpfx)tst-gnu2-tls2mod2.so
-ifeq (yes,$(have-mtls-dialect-gnu2))
-CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2
-CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
-CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
-CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=gnu2
-CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=gnu2
-CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=gnu2
+ifneq (no,$(have-mtls-descriptor))
+CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=$(have-mtls-descriptor)
endif
diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure
index d9bd1f8558a079cb..19657b627bc84c4e 100644
--- a/sysdeps/aarch64/preconfigure
+++ b/sysdeps/aarch64/preconfigure
@@ -2,5 +2,6 @@ case "$machine" in
aarch64*)
base_machine=aarch64
machine=aarch64
+ mtls_descriptor=desc
;;
esac
diff --git a/sysdeps/arm/Makefile b/sysdeps/arm/Makefile
index d5cea717a9c201aa..619474eca94fe8e4 100644
--- a/sysdeps/arm/Makefile
+++ b/sysdeps/arm/Makefile
@@ -13,15 +13,15 @@ $(objpfx)libgcc-stubs.a: $(objpfx)aeabi_unwind_cpp_pr1.os
lib-noranlib: $(objpfx)libgcc-stubs.a
ifeq ($(build-shared),yes)
-ifeq (yes,$(have-mtls-dialect-gnu2))
+ifneq (no,$(have-mtls-descriptor))
tests += tst-armtlsdescloc tst-armtlsdescextnow tst-armtlsdescextlazy
modules-names += tst-armtlsdesclocmod
modules-names += tst-armtlsdescextlazymod tst-armtlsdescextnowmod
CPPFLAGS-tst-armtlsdescextnowmod.c += -Dstatic=
CPPFLAGS-tst-armtlsdescextlazymod.c += -Dstatic=
-CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=gnu2
-CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=gnu2
-CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=gnu2
+CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=$(have-mtls-descriptor)
+CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=$(have-mtls-descriptor)
LDFLAGS-tst-armtlsdescextnowmod.so += -Wl,-z,now
tst-armtlsdescloc-ENV = LD_BIND_NOW=1
tst-armtlsdescextnow-ENV = LD_BIND_NOW=1

View File

@ -0,0 +1,24 @@
commit 5a461f2949ded98d8211939f84988bc464c7b4fe
Author: Andreas Schwab <schwab@suse.de>
Date: Tue Mar 19 13:49:50 2024 +0100
Add tst-gnu2-tls2mod1 to test-internal-extras
That allows sysdeps/x86_64/tst-gnu2-tls2mod1.S to use internal headers.
Fixes: 717ebfa85c ("x86-64: Allocate state buffer space for RDI, RSI and RBX")
(cherry picked from commit fd7ee2e6c5eb49e4a630a9978b4d668bff6354ee)
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index e8babc9a4edbf90b..9d374a329916fc45 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -210,6 +210,8 @@ tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2
$(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so
endif
+test-internal-extras += tst-gnu2-tls2mod1
+
endif # $(subdir) == elf
ifeq ($(subdir),csu)

View File

@ -0,0 +1,146 @@
commit aa4249266e9906c4bc833e4847f4d8feef59504f
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu Feb 8 10:08:38 2024 -0300
x86: Fix Zen3/Zen4 ERMS selection (BZ 30994)
The REP MOVSB usage on memcpy/memmove does not show much performance
improvement on Zen3/Zen4 cores compared to the vectorized loops. Also,
as from BZ 30994, if the source is aligned and the destination is not
the performance can be 20x slower.
The performance difference is noticeable with small buffer sizes, closer
to the lower bounds limits when memcpy/memmove starts to use ERMS. The
performance of REP MOVSB is similar to vectorized instruction on the
size limit (the L2 cache). Also, there is no drawback to multiple cores
sharing the cache.
Checked on x86_64-linux-gnu on Zen3.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit 0c0d39fe4aeb0f69b26e76337c5dfd5530d5d44e)
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index d5101615e348e5c2..f34d12846caf9422 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -791,7 +791,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
long int data = -1;
long int shared = -1;
long int shared_per_thread = -1;
- long int core = -1;
unsigned int threads = 0;
unsigned long int level1_icache_size = -1;
unsigned long int level1_icache_linesize = -1;
@@ -809,7 +808,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
if (cpu_features->basic.kind == arch_kind_intel)
{
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
- core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
shared_per_thread = shared;
@@ -822,7 +820,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
= handle_intel (_SC_LEVEL1_DCACHE_ASSOC, cpu_features);
level1_dcache_linesize
= handle_intel (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);
- level2_cache_size = core;
+ level2_cache_size
+ = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
level2_cache_assoc
= handle_intel (_SC_LEVEL2_CACHE_ASSOC, cpu_features);
level2_cache_linesize
@@ -835,12 +834,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
level4_cache_size
= handle_intel (_SC_LEVEL4_CACHE_SIZE, cpu_features);
- get_common_cache_info (&shared, &shared_per_thread, &threads, core);
+ get_common_cache_info (&shared, &shared_per_thread, &threads,
+ level2_cache_size);
}
else if (cpu_features->basic.kind == arch_kind_zhaoxin)
{
data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
- core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
shared_per_thread = shared;
@@ -849,19 +848,19 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
level1_dcache_size = data;
level1_dcache_assoc = handle_zhaoxin (_SC_LEVEL1_DCACHE_ASSOC);
level1_dcache_linesize = handle_zhaoxin (_SC_LEVEL1_DCACHE_LINESIZE);
- level2_cache_size = core;
+ level2_cache_size = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
level2_cache_assoc = handle_zhaoxin (_SC_LEVEL2_CACHE_ASSOC);
level2_cache_linesize = handle_zhaoxin (_SC_LEVEL2_CACHE_LINESIZE);
level3_cache_size = shared;
level3_cache_assoc = handle_zhaoxin (_SC_LEVEL3_CACHE_ASSOC);
level3_cache_linesize = handle_zhaoxin (_SC_LEVEL3_CACHE_LINESIZE);
- get_common_cache_info (&shared, &shared_per_thread, &threads, core);
+ get_common_cache_info (&shared, &shared_per_thread, &threads,
+ level2_cache_size);
}
else if (cpu_features->basic.kind == arch_kind_amd)
{
data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
- core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
@@ -869,7 +868,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
level1_dcache_size = data;
level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC);
level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE);
- level2_cache_size = core;
+ level2_cache_size = handle_amd (_SC_LEVEL2_CACHE_SIZE);;
level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC);
level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE);
level3_cache_size = shared;
@@ -880,12 +879,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
if (shared <= 0)
{
/* No shared L3 cache. All we have is the L2 cache. */
- shared = core;
+ shared = level2_cache_size;
}
else if (cpu_features->basic.family < 0x17)
{
/* Account for exclusive L2 and L3 caches. */
- shared += core;
+ shared += level2_cache_size;
}
shared_per_thread = shared;
@@ -987,6 +986,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
rep_movsb_threshold = 2112;
+ /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of
+ cases slower than the vectorized path (and for some alignments,
+ it is really slow, check BZ #30994). */
+ if (cpu_features->basic.kind == arch_kind_amd)
+ rep_movsb_threshold = non_temporal_threshold;
+
/* The default threshold to use Enhanced REP STOSB. */
unsigned long int rep_stosb_threshold = 2048;
@@ -1028,16 +1033,9 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
SIZE_MAX);
unsigned long int rep_movsb_stop_threshold;
- /* ERMS feature is implemented from AMD Zen3 architecture and it is
- performing poorly for data above L2 cache size. Henceforth, adding
- an upper bound threshold parameter to limit the usage of Enhanced
- REP MOVSB operations and setting its value to L2 cache size. */
- if (cpu_features->basic.kind == arch_kind_amd)
- rep_movsb_stop_threshold = core;
/* Setting the upper bound of ERMS to the computed value of
- non-temporal threshold for architectures other than AMD. */
- else
- rep_movsb_stop_threshold = non_temporal_threshold;
+ non-temporal threshold for all architectures. */
+ rep_movsb_stop_threshold = non_temporal_threshold;
cpu_features->data_cache_size = data;
cpu_features->shared_cache_size = shared;

View File

@ -0,0 +1,30 @@
commit 6484a92698039c4a7a510f0214e22d067b0d78b3
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu Feb 8 10:08:39 2024 -0300
x86: Do not prefer ERMS for memset on Zen3+
For AMD Zen3+ architecture, the performance of the vectorized loop is
slightly better than ERMS.
Checked on x86_64-linux-gnu on Zen3.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit 272708884cb750f12f5c74a00e6620c19dc6d567)
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index f34d12846caf9422..5a98f70364220da4 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -1021,6 +1021,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
minimum value is fixed. */
rep_stosb_threshold = TUNABLE_GET (x86_rep_stosb_threshold,
long int, NULL);
+ if (cpu_features->basic.kind == arch_kind_amd
+ && !TUNABLE_IS_INITIALIZED (x86_rep_stosb_threshold))
+ /* For AMD Zen3+ architecture, the performance of the vectorized loop is
+ slightly better than ERMS. */
+ rep_stosb_threshold = SIZE_MAX;
TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);

View File

@ -0,0 +1,24 @@
commit 5d070d12b3a52bc44dd1b71743abc4b6243862ae
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu Feb 8 10:08:40 2024 -0300
x86: Expand the comment on when REP STOSB is used on memset
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit 491e55beab7457ed310a4a47496f4a333c5d1032)
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index 9984c3ca0fafab6a..97839a22483b0613 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -21,7 +21,9 @@
2. If size is less than VEC, use integer register stores.
3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
- 5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
+ 5. On machines ERMS feature, if size is greater or equal than
+ __x86_rep_stosb_threshold then REP STOSB will be used.
+ 6. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
4 VEC stores and store 4 * VEC at a time until done. */
#include <sysdep.h>

284
glibc-upstream-2.39-8.patch Normal file
View File

@ -0,0 +1,284 @@
commit ee7f4c54e19738c2c27d3846e1e9b3595c89221f
Author: Manjunath Matti <mmatti@linux.ibm.com>
Date: Tue Mar 19 15:29:48 2024 -0500
powerpc: Add HWCAP3/HWCAP4 data to TCB for Power Architecture.
This patch adds a new feature for powerpc. In order to get faster
access to the HWCAP3/HWCAP4 masks, similar to HWCAP/HWCAP2 (i.e. for
implementing __builtin_cpu_supports() in GCC) without the overhead of
reading them from the auxiliary vector, we now reserve space for them
in the TCB.
Suggested-by: Peter Bergner <bergner@linux.ibm.com>
Reviewed-by: Peter Bergner <bergner@linux.ibm.com>
(cherry picked from commit 3ab9b88e2ac91062b6d493fe32bd101a55006c6a)
diff --git a/elf/dl-diagnostics.c b/elf/dl-diagnostics.c
index 7345ebc4e586883f..aaf67b87e81b04c8 100644
--- a/elf/dl-diagnostics.c
+++ b/elf/dl-diagnostics.c
@@ -235,6 +235,8 @@ _dl_print_diagnostics (char **environ)
_dl_diagnostics_print_labeled_value ("dl_hwcap", GLRO (dl_hwcap));
_dl_diagnostics_print_labeled_value ("dl_hwcap_important", HWCAP_IMPORTANT);
_dl_diagnostics_print_labeled_value ("dl_hwcap2", GLRO (dl_hwcap2));
+ _dl_diagnostics_print_labeled_value ("dl_hwcap3", GLRO (dl_hwcap3));
+ _dl_diagnostics_print_labeled_value ("dl_hwcap4", GLRO (dl_hwcap4));
_dl_diagnostics_print_labeled_string
("dl_hwcaps_subdirs", _dl_hwcaps_subdirs);
_dl_diagnostics_print_labeled_value
diff --git a/elf/dl-support.c b/elf/dl-support.c
index 2f502c8b0d27b784..451932dd03e971b8 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -158,6 +158,8 @@ const ElfW(Phdr) *_dl_phdr;
size_t _dl_phnum;
uint64_t _dl_hwcap;
uint64_t _dl_hwcap2;
+uint64_t _dl_hwcap3;
+uint64_t _dl_hwcap4;
enum dso_sort_algorithm _dl_dso_sort_algo;
diff --git a/elf/elf.h b/elf/elf.h
index 455731663c6ed339..1c394c64cd5c66ed 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -1234,6 +1234,10 @@ typedef struct
#define AT_RSEQ_FEATURE_SIZE 27 /* rseq supported feature size. */
#define AT_RSEQ_ALIGN 28 /* rseq allocation alignment. */
+/* More machine-dependent hints about processor capabilities. */
+#define AT_HWCAP3 29 /* extension of AT_HWCAP. */
+#define AT_HWCAP4 30 /* extension of AT_HWCAP. */
+
#define AT_EXECFN 31 /* Filename of executable. */
/* Pointer to the global system page used for system calls and other
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 117c901ccc5c5f0b..50f58a60e3f02330 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -646,6 +646,8 @@ struct rtld_global_ro
/* Mask for more hardware capabilities that are available on some
platforms. */
EXTERN uint64_t _dl_hwcap2;
+ EXTERN uint64_t _dl_hwcap3;
+ EXTERN uint64_t _dl_hwcap4;
EXTERN enum dso_sort_algorithm _dl_dso_sort_algo;
diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
index a76bb6e5b0895e3f..8cf00aa7e359bb6a 100644
--- a/sysdeps/powerpc/dl-procinfo.c
+++ b/sysdeps/powerpc/dl-procinfo.c
@@ -38,6 +38,10 @@
needed.
*/
+/* The total number of available bits (including those prior to
+ _DL_HWCAP_FIRST). Some of these bits might not be used. */
+#define _DL_HWCAP_COUNT 128
+
#ifndef PROCINFO_CLASS
# define PROCINFO_CLASS
#endif
@@ -61,7 +65,7 @@ PROCINFO_CLASS struct cpu_features _dl_powerpc_cpu_features
#if !defined PROCINFO_DECL && defined SHARED
._dl_powerpc_cap_flags
#else
-PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15]
+PROCINFO_CLASS const char _dl_powerpc_cap_flags[_DL_HWCAP_COUNT][15]
#endif
#ifndef PROCINFO_DECL
= {
diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h
index 68f424109501aaef..f8cb343877386402 100644
--- a/sysdeps/powerpc/dl-procinfo.h
+++ b/sysdeps/powerpc/dl-procinfo.h
@@ -22,16 +22,17 @@
#include <ldsodefs.h>
#include <sysdep.h> /* This defines the PPC_FEATURE[2]_* macros. */
-/* The total number of available bits (including those prior to
- _DL_HWCAP_FIRST). Some of these bits might not be used. */
-#define _DL_HWCAP_COUNT 64
+/* Feature masks are all 32-bits in size. */
+#define _DL_HWCAP_SIZE 32
-/* Features started at bit 31 and decremented as new features were added. */
-#define _DL_HWCAP_LAST 31
+/* AT_HWCAP2 feature strings follow the AT_HWCAP feature strings. */
+#define _DL_HWCAP2_OFFSET _DL_HWCAP_SIZE
-/* AT_HWCAP2 features started at bit 31 and decremented as new features were
- added. HWCAP2 feature bits start at bit 0. */
-#define _DL_HWCAP2_LAST 31
+/* AT_HWCAP3 feature strings follow the AT_HWCAP2 feature strings. */
+#define _DL_HWCAP3_OFFSET (_DL_HWCAP2_OFFSET + _DL_HWCAP_SIZE)
+
+/* AT_HWCAP4 feature strings follow the AT_HWCAP3 feature strings. */
+#define _DL_HWCAP4_OFFSET (_DL_HWCAP3_OFFSET + _DL_HWCAP_SIZE)
/* These bits influence library search. */
#define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \
@@ -187,21 +188,42 @@ _dl_procinfo (unsigned int type, unsigned long int word)
case AT_HWCAP:
_dl_printf ("AT_HWCAP: ");
- for (int i = 0; i <= _DL_HWCAP_LAST; ++i)
+ for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
if (word & (1 << i))
_dl_printf (" %s", _dl_hwcap_string (i));
break;
case AT_HWCAP2:
{
- unsigned int offset = _DL_HWCAP_LAST + 1;
_dl_printf ("AT_HWCAP2: ");
- /* We have to go through them all because the kernel added the
- AT_HWCAP2 features starting with the high bits. */
- for (int i = 0; i <= _DL_HWCAP2_LAST; ++i)
- if (word & (1 << i))
- _dl_printf (" %s", _dl_hwcap_string (offset + i));
+ /* We have to go through them all because the kernel added the
+ AT_HWCAP2 features starting with the high bits. */
+ for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
+ if (word & (1 << i))
+ _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP2_OFFSET + i));
+ break;
+ }
+ case AT_HWCAP3:
+ {
+ _dl_printf ("AT_HWCAP3: ");
+
+ /* We have to go through them all because the kernel added the
+ AT_HWCAP3 features starting with the high bits. */
+ for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
+ if (word & (1 << i))
+ _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP3_OFFSET + i));
+ break;
+ }
+ case AT_HWCAP4:
+ {
+ _dl_printf ("AT_HWCAP4: ");
+
+ /* We have to go through them all because the kernel added the
+ AT_HWCAP4 features starting with the high bits. */
+ for (int i = 0; i <= _DL_HWCAP_SIZE; ++i)
+ if (word & (1 << i))
+ _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP4_OFFSET + i));
break;
}
case AT_L1I_CACHEGEOMETRY:
diff --git a/sysdeps/powerpc/hwcapinfo.c b/sysdeps/powerpc/hwcapinfo.c
index 76344f285a903858..f6fede15a7dfbf6c 100644
--- a/sysdeps/powerpc/hwcapinfo.c
+++ b/sysdeps/powerpc/hwcapinfo.c
@@ -31,7 +31,7 @@ void
__tcb_parse_hwcap_and_convert_at_platform (void)
{
- uint64_t h1, h2;
+ uint64_t h1, h2, h3, h4;
/* Read AT_PLATFORM string from auxv and convert it to a number. */
__tcb.at_platform = _dl_string_platform (GLRO (dl_platform));
@@ -39,6 +39,8 @@ __tcb_parse_hwcap_and_convert_at_platform (void)
/* Read HWCAP and HWCAP2 from auxv. */
h1 = GLRO (dl_hwcap);
h2 = GLRO (dl_hwcap2);
+ h3 = GLRO (dl_hwcap3);
+ h4 = GLRO (dl_hwcap4);
/* hwcap contains only the latest supported ISA, the code checks which is
and fills the previous supported ones. */
@@ -64,13 +66,16 @@ __tcb_parse_hwcap_and_convert_at_platform (void)
else if (h1 & PPC_FEATURE_POWER5)
h1 |= PPC_FEATURE_POWER4;
- uint64_t array_hwcaps[] = { h1, h2 };
+ uint64_t array_hwcaps[] = { h1, h2, h3, h4 };
init_cpu_features (&GLRO(dl_powerpc_cpu_features), array_hwcaps);
/* Consolidate both HWCAP and HWCAP2 into a single doubleword so that
we can read both in a single load later. */
__tcb.hwcap = (h1 << 32) | (h2 & 0xffffffff);
- __tcb.hwcap_extn = 0x0;
+
+ /* Consolidate both HWCAP3 and HWCAP4 into a single doubleword so that
+ we can read both in a single load later. */
+ __tcb.hwcap_extn = (h3 << 32) | (h4 & 0xffffffff);
}
#if IS_IN (rtld)
diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h
index e3d758b163c619df..ea2a58ecb1668774 100644
--- a/sysdeps/unix/sysv/linux/dl-parse_auxv.h
+++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h
@@ -47,6 +47,8 @@ void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values)
GLRO(dl_platform) = (void *) auxv_values[AT_PLATFORM];
GLRO(dl_hwcap) = auxv_values[AT_HWCAP];
GLRO(dl_hwcap2) = auxv_values[AT_HWCAP2];
+ GLRO(dl_hwcap3) = auxv_values[AT_HWCAP3];
+ GLRO(dl_hwcap4) = auxv_values[AT_HWCAP4];
GLRO(dl_clktck) = auxv_values[AT_CLKTCK];
GLRO(dl_fpu_control) = auxv_values[AT_FPUCW];
_dl_random = (void *) auxv_values[AT_RANDOM];
diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c
index ad3692d73839d7a3..e1b14e9eb34ff5cb 100644
--- a/sysdeps/unix/sysv/linux/dl-sysdep.c
+++ b/sysdeps/unix/sysv/linux/dl-sysdep.c
@@ -197,6 +197,8 @@ _dl_show_auxv (void)
[AT_SYSINFO_EHDR - 2] = { "SYSINFO_EHDR: 0x", hex },
[AT_RANDOM - 2] = { "RANDOM: 0x", hex },
[AT_HWCAP2 - 2] = { "HWCAP2: 0x", hex },
+ [AT_HWCAP3 - 2] = { "HWCAP3: 0x", hex },
+ [AT_HWCAP4 - 2] = { "HWCAP4: 0x", hex },
[AT_MINSIGSTKSZ - 2] = { "MINSIGSTKSZ: ", dec },
[AT_L1I_CACHESIZE - 2] = { "L1I_CACHESIZE: ", dec },
[AT_L1I_CACHEGEOMETRY - 2] = { "L1I_CACHEGEOMETRY: 0x", hex },
diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
index 8e8a5ec2eab7e8c6..a947d62db63965b1 100644
--- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
+++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
@@ -94,6 +94,8 @@ init_cpu_features (struct cpu_features *cpu_features, uint64_t hwcaps[])
which are set by __tcb_parse_hwcap_and_convert_at_platform. */
cpu_features->hwcap = hwcaps[0];
cpu_features->hwcap2 = hwcaps[1];
+ cpu_features->hwcap3 = hwcaps[2];
+ cpu_features->hwcap4 = hwcaps[3];
/* Default is to use aligned memory access on optimized function unless
tunables is enable, since for this case user can explicit disable
unaligned optimizations. */
diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
index 1294f0b601ebf54f..e9eb6a13c8ab11d7 100644
--- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
@@ -26,6 +26,8 @@ struct cpu_features
bool use_cached_memopt;
unsigned long int hwcap;
unsigned long int hwcap2;
+ unsigned long int hwcap3;
+ unsigned long int hwcap4;
};
static const char hwcap_names[] = {
diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
index a4705daf1cdea4de..6a00cd88cd64b992 100644
--- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c
+++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
@@ -87,6 +87,12 @@ __libc_start_main_impl (int argc, char **argv,
case AT_HWCAP2:
_dl_hwcap2 = (unsigned long int) av->a_un.a_val;
break;
+ case AT_HWCAP3:
+ _dl_hwcap3 = (unsigned long int) av->a_un.a_val;
+ break;
+ case AT_HWCAP4:
+ _dl_hwcap4 = (unsigned long int) av->a_un.a_val;
+ break;
case AT_PLATFORM:
_dl_platform = (void *) av->a_un.a_val;
break;

172
glibc-upstream-2.39-9.patch Normal file
View File

@ -0,0 +1,172 @@
commit aad45c8ac30aa1072e54903ce6aead22702f244a
Author: Amrita H S <amritahs@linux.ibm.com>
Date: Tue Mar 19 19:08:47 2024 -0500
powerpc: Placeholder and infrastructure/build support to add Power11 related changes.
The following three changes have been added to provide initial Power11 support.
1. Add the directories to hold Power11 files.
2. Add support to select Power11 libraries based on AT_PLATFORM.
3. Let submachine=power11 be set automatically.
Reviewed-by: Florian Weimer <fweimer@redhat.com>
Reviewed-by: Peter Bergner <bergner@linux.ibm.com>
(cherry picked from commit 1ea051145612f199d8716ecdf78b084b00b5a727)
diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h
index f8cb343877386402..b36697ba440654be 100644
--- a/sysdeps/powerpc/dl-procinfo.h
+++ b/sysdeps/powerpc/dl-procinfo.h
@@ -38,7 +38,7 @@
#define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \
+ PPC_FEATURE_HAS_DFP)
-#define _DL_PLATFORMS_COUNT 16
+#define _DL_PLATFORMS_COUNT 17
#define _DL_FIRST_PLATFORM 32
/* Mask to filter out platforms. */
@@ -62,6 +62,7 @@
#define PPC_PLATFORM_POWER8 13
#define PPC_PLATFORM_POWER9 14
#define PPC_PLATFORM_POWER10 15
+#define PPC_PLATFORM_POWER11 16
static inline const char *
__attribute__ ((unused))
@@ -89,6 +90,11 @@ _dl_string_platform (const char *str)
ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER10;
str++;
}
+ else if (str[1] == '1')
+ {
+ ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER11;
+ str++;
+ }
else
return -1;
break;
diff --git a/sysdeps/powerpc/powerpc32/power11/Implies b/sysdeps/powerpc/powerpc32/power11/Implies
new file mode 100644
index 0000000000000000..051cbe0f7911c93c
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power11/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/power10/fpu
+powerpc/powerpc32/power10
diff --git a/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies
new file mode 100644
index 0000000000000000..58edb2861d17f504
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power10/fpu/multiarch
diff --git a/sysdeps/powerpc/powerpc32/power11/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies
new file mode 100644
index 0000000000000000..c70f0428badbaf14
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies
@@ -0,0 +1 @@
+powerpc/powerpc32/power10/multiarch
diff --git a/sysdeps/powerpc/powerpc64/be/power11/Implies b/sysdeps/powerpc/powerpc64/be/power11/Implies
new file mode 100644
index 0000000000000000..de481d1c13db695e
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/be/power11/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc64/be/power10/fpu
+powerpc/powerpc64/be/power10
diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies
new file mode 100644
index 0000000000000000..dff0e13064ce8238
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/be/power10/fpu
diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies
new file mode 100644
index 0000000000000000..c3f259e0097386a5
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/be/power10/fpu/multiarch
diff --git a/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies
new file mode 100644
index 0000000000000000..9491a394c9519d01
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/be/power10/multiarch
diff --git a/sysdeps/powerpc/powerpc64/le/power11/Implies b/sysdeps/powerpc/powerpc64/le/power11/Implies
new file mode 100644
index 0000000000000000..e18182dcc1f4c25f
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power11/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc64/le/power10/fpu
+powerpc/powerpc64/le/power10
diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies
new file mode 100644
index 0000000000000000..e41bd55684dbb5b8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/le/power10/fpu
diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies
new file mode 100644
index 0000000000000000..c838d5093140eae3
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/le/power10/fpu/multiarch
diff --git a/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies
new file mode 100644
index 0000000000000000..687248c3c267cd8c
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies
@@ -0,0 +1 @@
+powerpc/powerpc64/le/power10/multiarch
diff --git a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
index 77465d9133410267..65d3e69303a1c963 100644
--- a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
+++ b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
@@ -36,9 +36,11 @@ compute_level (void)
return 9;
if (strcmp (platform, "power10") == 0)
return 10;
+ if (strcmp (platform, "power11") == 0)
+ return 11;
printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform);
- /* Assume that the new platform supports POWER10. */
- return 10;
+ /* Assume that the new platform supports POWER11. */
+ return 11;
}
static int
diff --git a/sysdeps/powerpc/preconfigure b/sysdeps/powerpc/preconfigure
index 4de94089a3f68532..9e5a07ab6d6767cd 100644
--- a/sysdeps/powerpc/preconfigure
+++ b/sysdeps/powerpc/preconfigure
@@ -58,7 +58,7 @@ fi
;;
- a2|970|power[4-9]|power5x|power6+|power10)
+ a2|970|power[4-9]|power5x|power6+|power10|power11)
submachine=${archcpu}
if test ${libc_cv_cc_submachine+y}
then :
diff --git a/sysdeps/powerpc/preconfigure.ac b/sysdeps/powerpc/preconfigure.ac
index 6c63bd8257b7e40a..14b6dafd4a895c3b 100644
--- a/sysdeps/powerpc/preconfigure.ac
+++ b/sysdeps/powerpc/preconfigure.ac
@@ -46,7 +46,7 @@ case "${machine}:${submachine}" in
AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="")
;;
- a2|970|power[[4-9]]|power5x|power6+|power10)
+ a2|970|power[[4-9]]|power5x|power6+|power10|power11)
submachine=${archcpu}
AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="")
;;

View File

@ -171,7 +171,7 @@ Version: %{glibcversion}
# - It allows using the Release number without the %%dist tag in the dependency
# generator to make the generated requires interchangeable between Rawhide
# and ELN (.elnYY < .fcXX).
%global baserelease 6
%global baserelease 7
Release: %{baserelease}%{?dist}
# Licenses:
@ -288,6 +288,20 @@ Patch27: glibc-upstream-2.39-4.patch
Patch28: glibc-upstream-2.39-5.patch
Patch29: glibc-upstream-2.39-6.patch
Patch30: glibc-upstream-2.39-7.patch
Patch31: glibc-upstream-2.39-8.patch
Patch32: glibc-upstream-2.39-9.patch
Patch33: glibc-upstream-2.39-10.patch
Patch34: glibc-upstream-2.39-11.patch
Patch35: glibc-upstream-2.39-12.patch
Patch36: glibc-upstream-2.39-13.patch
Patch37: glibc-upstream-2.39-14.patch
Patch38: glibc-upstream-2.39-15.patch
Patch39: glibc-upstream-2.39-16.patch
Patch40: glibc-upstream-2.39-17.patch
Patch41: glibc-upstream-2.39-18.patch
Patch42: glibc-upstream-2.39-19.patch
Patch43: glibc-upstream-2.39-20.patch
Patch44: glibc-upstream-2.39-21.patch
##############################################################################
# Continued list of core "glibc" package information:
@ -2464,6 +2478,24 @@ update_gconv_modules_cache ()
%endif
%changelog
* Thu Apr 04 2024 Arjun Shankar <arjun@redhat.com> - 2.39-7
- Sync with upstream branch release/2.39/master,
commit 5d070d12b3a52bc44dd1b71743abc4b6243862ae:
- x86: Expand the comment on when REP STOSB is used on memset
- x86: Do not prefer ERMS for memset on Zen3+
- x86: Fix Zen3/Zen4 ERMS selection (BZ 30994)
- Add tst-gnu2-tls2mod1 to test-internal-extras
- elf: Enable TLS descriptor tests on aarch64
- arm: Update _dl_tlsdesc_dynamic to preserve caller-saved registers (BZ 31372)
- Ignore undefined symbols for -mtls-dialect=gnu2
- x86-64: Allocate state buffer space for RDI, RSI and RBX
- x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers
- x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registers
- x86-64: Save APX registers in ld.so trampoline
- LoongArch: Correct {__ieee754, _}_scalb -> {__ieee754, _}_scalbf
- powerpc: Placeholder and infrastructure/build support to add Power11 related changes.
- powerpc: Add HWCAP3/HWCAP4 data to TCB for Power Architecture.
* Tue Mar 26 2024 Florian Weimer <fweimer@redhat.com> - 2.39-6
- Do not generate ELF dependency information for glibc32