Sync with upstream branch release/2.39/master (RHEL-25850, etc.)
Upstream commit: 5d070d12b3a52bc44dd1b71743abc4b6243862ae
Related: RHEL-25850
- x86: Expand the comment on when REP STOSB is used on memset
- x86: Do not prefer ERMS for memset on Zen3+
- x86: Fix Zen3/Zen4 ERMS selection (BZ 30994)
Resolves: RHEL-25530
- Add tst-gnu2-tls2mod1 to test-internal-extras
- elf: Enable TLS descriptor tests on aarch64
- arm: Update _dl_tlsdesc_dynamic to preserve caller-saved registers (BZ 31372)
- Ignore undefined symbols for -mtls-dialect=gnu2
- x86-64: Allocate state buffer space for RDI, RSI and RBX
- x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers
- x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registers
Resolves: RHEL-29179
- x86-64: Save APX registers in ld.so trampoline
Resolves: RHEL-25045
- LoongArch: Correct {__ieee754, _}_scalb -> {__ieee754, _}_scalbf
- powerpc: Placeholder and infrastructure/build support to add Power11 related changes.
- powerpc: Add HWCAP3/HWCAP4 data to TCB for Power Architecture.
Resolves: RHEL-24761
Fedora 40 commit: 24af28d49b
This commit is contained in:
parent
4df7ab7cf0
commit
1a997221e3
16
glibc-upstream-2.39-10.patch
Normal file
16
glibc-upstream-2.39-10.patch
Normal file
@ -0,0 +1,16 @@
|
||||
commit 983f34a1252de3ca6f2305c211d86530ea42010e
|
||||
Author: caiyinyu <caiyinyu@loongson.cn>
|
||||
Date: Mon Mar 11 16:07:48 2024 +0800
|
||||
|
||||
LoongArch: Correct {__ieee754, _}_scalb -> {__ieee754, _}_scalbf
|
||||
|
||||
diff --git a/sysdeps/loongarch/fpu/e_scalbf.c b/sysdeps/loongarch/fpu/e_scalbf.c
|
||||
index 9f054852362e2d76..7c0395fbb5afbc58 100644
|
||||
--- a/sysdeps/loongarch/fpu/e_scalbf.c
|
||||
+++ b/sysdeps/loongarch/fpu/e_scalbf.c
|
||||
@@ -57,4 +57,4 @@ __ieee754_scalbf (float x, float fn)
|
||||
|
||||
return x;
|
||||
}
|
||||
-libm_alias_finite (__ieee754_scalb, __scalb)
|
||||
+libm_alias_finite (__ieee754_scalbf, __scalbf)
|
80
glibc-upstream-2.39-11.patch
Normal file
80
glibc-upstream-2.39-11.patch
Normal file
@ -0,0 +1,80 @@
|
||||
commit 7fc8242bf87828c935ac5df5cafb9dc7ab635fd9
|
||||
Author: H.J. Lu <hjl.tools@gmail.com>
|
||||
Date: Fri Feb 16 07:17:10 2024 -0800
|
||||
|
||||
x86-64: Save APX registers in ld.so trampoline
|
||||
|
||||
Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
|
||||
ld.so trampoline. This fixes BZ #31371.
|
||||
|
||||
Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
|
||||
be used by i386 _dl_tlsdesc_dynamic.
|
||||
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
||||
|
||||
(cherry picked from commit dfb05f8e704edac70db38c4c8ee700769d91a413)
|
||||
|
||||
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
|
||||
index 85d0a8c943cbb218..837fd28734914a1c 100644
|
||||
--- a/sysdeps/x86/sysdep.h
|
||||
+++ b/sysdeps/x86/sysdep.h
|
||||
@@ -21,14 +21,54 @@
|
||||
|
||||
#include <sysdeps/generic/sysdep.h>
|
||||
|
||||
+/* The extended state feature IDs in the state component bitmap. */
|
||||
+#define X86_XSTATE_X87_ID 0
|
||||
+#define X86_XSTATE_SSE_ID 1
|
||||
+#define X86_XSTATE_AVX_ID 2
|
||||
+#define X86_XSTATE_BNDREGS_ID 3
|
||||
+#define X86_XSTATE_BNDCFG_ID 4
|
||||
+#define X86_XSTATE_K_ID 5
|
||||
+#define X86_XSTATE_ZMM_H_ID 6
|
||||
+#define X86_XSTATE_ZMM_ID 7
|
||||
+#define X86_XSTATE_PKRU_ID 9
|
||||
+#define X86_XSTATE_TILECFG_ID 17
|
||||
+#define X86_XSTATE_TILEDATA_ID 18
|
||||
+#define X86_XSTATE_APX_F_ID 19
|
||||
+
|
||||
+#ifdef __x86_64__
|
||||
/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
|
||||
space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
|
||||
- aligned to 16 bytes for fxsave and 64 bytes for xsave. */
|
||||
-#define STATE_SAVE_OFFSET (8 * 7 + 8)
|
||||
-
|
||||
-/* Save SSE, AVX, AVX512, mask and bound registers. */
|
||||
-#define STATE_SAVE_MASK \
|
||||
- ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
|
||||
+ aligned to 16 bytes for fxsave and 64 bytes for xsave.
|
||||
+
|
||||
+ NB: Is is non-zero because of the 128-byte red-zone. Some registers
|
||||
+ are saved on stack without adjusting stack pointer first. When we
|
||||
+ update stack pointer to allocate more space, we need to take the
|
||||
+ red-zone into account. */
|
||||
+# define STATE_SAVE_OFFSET (8 * 7 + 8)
|
||||
+
|
||||
+/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
|
||||
+ registers are mutually exclusive. */
|
||||
+# define STATE_SAVE_MASK \
|
||||
+ ((1 << X86_XSTATE_SSE_ID) \
|
||||
+ | (1 << X86_XSTATE_AVX_ID) \
|
||||
+ | (1 << X86_XSTATE_BNDREGS_ID) \
|
||||
+ | (1 << X86_XSTATE_K_ID) \
|
||||
+ | (1 << X86_XSTATE_ZMM_H_ID) \
|
||||
+ | (1 << X86_XSTATE_ZMM_ID) \
|
||||
+ | (1 << X86_XSTATE_APX_F_ID))
|
||||
+#else
|
||||
+/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
|
||||
+ doesn't have red-zone, use 0 here. */
|
||||
+# define STATE_SAVE_OFFSET 0
|
||||
+
|
||||
+/* Save SSE, AVX, AXV512, mask and bound registers. */
|
||||
+# define STATE_SAVE_MASK \
|
||||
+ ((1 << X86_XSTATE_SSE_ID) \
|
||||
+ | (1 << X86_XSTATE_AVX_ID) \
|
||||
+ | (1 << X86_XSTATE_BNDREGS_ID) \
|
||||
+ | (1 << X86_XSTATE_K_ID) \
|
||||
+ | (1 << X86_XSTATE_ZMM_H_ID))
|
||||
+#endif
|
||||
|
||||
/* Constants for bits in __x86_string_control: */
|
||||
|
1453
glibc-upstream-2.39-12.patch
Normal file
1453
glibc-upstream-2.39-12.patch
Normal file
File diff suppressed because it is too large
Load Diff
496
glibc-upstream-2.39-13.patch
Normal file
496
glibc-upstream-2.39-13.patch
Normal file
@ -0,0 +1,496 @@
|
||||
commit 853e915fdd6ae6c5f1a7a68d2594ec8dbfef1286
|
||||
Author: H.J. Lu <hjl.tools@gmail.com>
|
||||
Date: Wed Feb 28 12:08:03 2024 -0800
|
||||
|
||||
x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers
|
||||
|
||||
_dl_tlsdesc_dynamic should also preserve AMX registers which are
|
||||
caller-saved. Add X86_XSTATE_TILECFG_ID and X86_XSTATE_TILEDATA_ID
|
||||
to x86-64 TLSDESC_CALL_STATE_SAVE_MASK. Compute the AMX state size
|
||||
and save it in xsave_state_full_size which is only used by
|
||||
_dl_tlsdesc_dynamic_xsave and _dl_tlsdesc_dynamic_xsavec. This fixes
|
||||
the AMX part of BZ #31372. Tested on AMX processor.
|
||||
|
||||
AMX test is enabled only for compilers with the fix for
|
||||
|
||||
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114098
|
||||
|
||||
GCC 14 and GCC 11/12/13 branches have the bug fix.
|
||||
Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
|
||||
|
||||
(cherry picked from commit 9b7091415af47082664717210ac49d51551456ab)
|
||||
|
||||
diff --git a/sysdeps/unix/sysv/linux/x86_64/Makefile b/sysdeps/unix/sysv/linux/x86_64/Makefile
|
||||
index 4223feb95f6fe2f5..9a1e7aa6461725af 100644
|
||||
--- a/sysdeps/unix/sysv/linux/x86_64/Makefile
|
||||
+++ b/sysdeps/unix/sysv/linux/x86_64/Makefile
|
||||
@@ -63,6 +63,33 @@ $(objpfx)libx86-64-isa-level%.os: $(..)/sysdeps/unix/sysv/linux/x86_64/x86-64-is
|
||||
$(objpfx)libx86-64-isa-level.so: $(objpfx)libx86-64-isa-level-1.so
|
||||
cp $< $@
|
||||
endif
|
||||
+
|
||||
+ifeq (yes,$(have-mamx-tile))
|
||||
+tests += \
|
||||
+ tst-gnu2-tls2-amx \
|
||||
+# tests
|
||||
+
|
||||
+modules-names += \
|
||||
+ tst-gnu2-tls2-amx-mod0 \
|
||||
+ tst-gnu2-tls2-amx-mod1 \
|
||||
+ tst-gnu2-tls2-amx-mod2 \
|
||||
+# modules-names
|
||||
+
|
||||
+$(objpfx)tst-gnu2-tls2-amx: $(shared-thread-library)
|
||||
+$(objpfx)tst-gnu2-tls2-amx.out: \
|
||||
+ $(objpfx)tst-gnu2-tls2-amx-mod0.so \
|
||||
+ $(objpfx)tst-gnu2-tls2-amx-mod1.so \
|
||||
+ $(objpfx)tst-gnu2-tls2-amx-mod2.so
|
||||
+$(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport)
|
||||
+$(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport)
|
||||
+$(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport)
|
||||
+
|
||||
+CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile
|
||||
+CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2
|
||||
+CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2
|
||||
+CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2
|
||||
+endif
|
||||
+
|
||||
endif # $(subdir) == elf
|
||||
|
||||
ifneq ($(enable-cet),no)
|
||||
diff --git a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
|
||||
index 2f511321ad3b3ac1..ef4631bf4b2fd9aa 100644
|
||||
--- a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
|
||||
+++ b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
|
||||
@@ -20,3 +20,8 @@
|
||||
# define ARCH_SHSTK_SHSTK 0x1
|
||||
# define ARCH_SHSTK_WRSS 0x2
|
||||
#endif
|
||||
+
|
||||
+#ifndef ARCH_GET_XCOMP_PERM
|
||||
+# define ARCH_GET_XCOMP_PERM 0x1022
|
||||
+# define ARCH_REQ_XCOMP_PERM 0x1023
|
||||
+#endif
|
||||
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
|
||||
new file mode 100644
|
||||
index 0000000000000000..2e0c7b91b7caf3ab
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
|
||||
@@ -0,0 +1,2 @@
|
||||
+#include "tst-gnu2-tls2-amx.h"
|
||||
+#include <tst-gnu2-tls2mod0.c>
|
||||
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
|
||||
new file mode 100644
|
||||
index 0000000000000000..b8a8ccf1c119d443
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
|
||||
@@ -0,0 +1,2 @@
|
||||
+#include "tst-gnu2-tls2-amx.h"
|
||||
+#include <tst-gnu2-tls2mod1.c>
|
||||
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
|
||||
new file mode 100644
|
||||
index 0000000000000000..cdf4a8f3635b327c
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
|
||||
@@ -0,0 +1,2 @@
|
||||
+#include "tst-gnu2-tls2-amx.h"
|
||||
+#include <tst-gnu2-tls2mod2.c>
|
||||
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
|
||||
new file mode 100644
|
||||
index 0000000000000000..ae4dd82556c9b2ef
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
|
||||
@@ -0,0 +1,83 @@
|
||||
+/* Test TLSDESC relocation with AMX.
|
||||
+ Copyright (C) 2024 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <stdbool.h>
|
||||
+#include <asm/prctl.h>
|
||||
+#include <support/check.h>
|
||||
+#include "tst-gnu2-tls2-amx.h"
|
||||
+
|
||||
+extern int arch_prctl (int, ...);
|
||||
+
|
||||
+#define X86_XSTATE_TILECFG_ID 17
|
||||
+#define X86_XSTATE_TILEDATA_ID 18
|
||||
+
|
||||
+/* Initialize tile config. */
|
||||
+__attribute__ ((noinline, noclone))
|
||||
+static void
|
||||
+init_tile_config (__tilecfg *tileinfo)
|
||||
+{
|
||||
+ int i;
|
||||
+ tileinfo->palette_id = 1;
|
||||
+ tileinfo->start_row = 0;
|
||||
+
|
||||
+ tileinfo->colsb[0] = MAX_ROWS;
|
||||
+ tileinfo->rows[0] = MAX_ROWS;
|
||||
+
|
||||
+ for (i = 1; i < 4; ++i)
|
||||
+ {
|
||||
+ tileinfo->colsb[i] = MAX_COLS;
|
||||
+ tileinfo->rows[i] = MAX_ROWS;
|
||||
+ }
|
||||
+
|
||||
+ _tile_loadconfig (tileinfo);
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+enable_amx (void)
|
||||
+{
|
||||
+ uint64_t bitmask;
|
||||
+ if (arch_prctl (ARCH_GET_XCOMP_PERM, &bitmask) != 0)
|
||||
+ return false;
|
||||
+
|
||||
+ if ((bitmask & (1 << X86_XSTATE_TILECFG_ID)) == 0)
|
||||
+ return false;
|
||||
+
|
||||
+ if (arch_prctl (ARCH_REQ_XCOMP_PERM, X86_XSTATE_TILEDATA_ID) != 0)
|
||||
+ return false;
|
||||
+
|
||||
+ /* Load tile configuration. */
|
||||
+ __tilecfg tile_data = { 0 };
|
||||
+ init_tile_config (&tile_data);
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+/* An architecture can define it to clobber caller-saved registers in
|
||||
+ malloc below to verify that the implicit TLSDESC call won't change
|
||||
+ caller-saved registers. */
|
||||
+static void
|
||||
+clear_tile_register (void)
|
||||
+{
|
||||
+ _tile_zero (2);
|
||||
+}
|
||||
+
|
||||
+#define MOD(i) "tst-gnu2-tls2-amx-mod" #i ".so"
|
||||
+#define IS_SUPPORTED() enable_amx ()
|
||||
+#define PREPARE_MALLOC() clear_tile_register ()
|
||||
+
|
||||
+#include <elf/tst-gnu2-tls2.c>
|
||||
diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
|
||||
new file mode 100644
|
||||
index 0000000000000000..1845a3caba43a0f1
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
|
||||
@@ -0,0 +1,63 @@
|
||||
+/* Test TLSDESC relocation with AMX.
|
||||
+ Copyright (C) 2024 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <stdint.h>
|
||||
+#include <string.h>
|
||||
+#include <x86intrin.h>
|
||||
+#include <support/check.h>
|
||||
+
|
||||
+#define MAX_ROWS 16
|
||||
+#define MAX_COLS 64
|
||||
+#define MAX 1024
|
||||
+#define STRIDE 64
|
||||
+
|
||||
+typedef struct __tile_config
|
||||
+{
|
||||
+ uint8_t palette_id;
|
||||
+ uint8_t start_row;
|
||||
+ uint8_t reserved_0[14];
|
||||
+ uint16_t colsb[16];
|
||||
+ uint8_t rows[16];
|
||||
+} __tilecfg __attribute__ ((aligned (64)));
|
||||
+
|
||||
+/* Initialize int8_t buffer */
|
||||
+static inline void
|
||||
+init_buffer (int8_t *buf, int8_t value)
|
||||
+{
|
||||
+ int rows, colsb, i, j;
|
||||
+ rows = MAX_ROWS;
|
||||
+ colsb = MAX_COLS;
|
||||
+
|
||||
+ for (i = 0; i < rows; i++)
|
||||
+ for (j = 0; j < colsb; j++)
|
||||
+ buf[i * colsb + j] = value;
|
||||
+}
|
||||
+
|
||||
+#define BEFORE_TLSDESC_CALL() \
|
||||
+ int8_t src[MAX]; \
|
||||
+ int8_t res[MAX]; \
|
||||
+ /* Initialize src with data */ \
|
||||
+ init_buffer (src, 2); \
|
||||
+ /* Load tile rows from memory. */ \
|
||||
+ _tile_loadd (2, src, STRIDE);
|
||||
+
|
||||
+#define AFTER_TLSDESC_CALL() \
|
||||
+ /* Store the tile data to memory. */ \
|
||||
+ _tile_stored (2, res, STRIDE); \
|
||||
+ _tile_release (); \
|
||||
+ TEST_VERIFY_EXIT (memcmp (src, res, sizeof (res)) == 0);
|
||||
diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
|
||||
index 6a8fd298137b7f23..21fc88d6510840e6 100644
|
||||
--- a/sysdeps/x86/cpu-features-offsets.sym
|
||||
+++ b/sysdeps/x86/cpu-features-offsets.sym
|
||||
@@ -3,3 +3,4 @@
|
||||
#include <ldsodefs.h>
|
||||
|
||||
XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size)
|
||||
+XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_full_size)
|
||||
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
|
||||
index 835113b42f924b83..d71e8d3d2e0e49f9 100644
|
||||
--- a/sysdeps/x86/cpu-features.c
|
||||
+++ b/sysdeps/x86/cpu-features.c
|
||||
@@ -307,6 +307,8 @@ update_active (struct cpu_features *cpu_features)
|
||||
__cpuid_count (0xd, 0, eax, ebx, ecx, edx);
|
||||
if (ebx != 0)
|
||||
{
|
||||
+ /* NB: On AMX capable processors, ebx always includes AMX
|
||||
+ states. */
|
||||
unsigned int xsave_state_full_size
|
||||
= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
|
||||
|
||||
@@ -320,6 +322,11 @@ update_active (struct cpu_features *cpu_features)
|
||||
{
|
||||
unsigned int xstate_comp_offsets[32];
|
||||
unsigned int xstate_comp_sizes[32];
|
||||
+#ifdef __x86_64__
|
||||
+ unsigned int xstate_amx_comp_offsets[32];
|
||||
+ unsigned int xstate_amx_comp_sizes[32];
|
||||
+ unsigned int amx_ecx;
|
||||
+#endif
|
||||
unsigned int i;
|
||||
|
||||
xstate_comp_offsets[0] = 0;
|
||||
@@ -327,16 +334,39 @@ update_active (struct cpu_features *cpu_features)
|
||||
xstate_comp_offsets[2] = 576;
|
||||
xstate_comp_sizes[0] = 160;
|
||||
xstate_comp_sizes[1] = 256;
|
||||
+#ifdef __x86_64__
|
||||
+ xstate_amx_comp_offsets[0] = 0;
|
||||
+ xstate_amx_comp_offsets[1] = 160;
|
||||
+ xstate_amx_comp_offsets[2] = 576;
|
||||
+ xstate_amx_comp_sizes[0] = 160;
|
||||
+ xstate_amx_comp_sizes[1] = 256;
|
||||
+#endif
|
||||
|
||||
for (i = 2; i < 32; i++)
|
||||
{
|
||||
- if ((STATE_SAVE_MASK & (1 << i)) != 0)
|
||||
+ if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
|
||||
{
|
||||
__cpuid_count (0xd, i, eax, ebx, ecx, edx);
|
||||
- xstate_comp_sizes[i] = eax;
|
||||
+#ifdef __x86_64__
|
||||
+ /* Include this in xsave_state_full_size. */
|
||||
+ amx_ecx = ecx;
|
||||
+ xstate_amx_comp_sizes[i] = eax;
|
||||
+ if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
|
||||
+ {
|
||||
+ /* Exclude this from xsave_state_size. */
|
||||
+ ecx = 0;
|
||||
+ xstate_comp_sizes[i] = 0;
|
||||
+ }
|
||||
+ else
|
||||
+#endif
|
||||
+ xstate_comp_sizes[i] = eax;
|
||||
}
|
||||
else
|
||||
{
|
||||
+#ifdef __x86_64__
|
||||
+ amx_ecx = 0;
|
||||
+ xstate_amx_comp_sizes[i] = 0;
|
||||
+#endif
|
||||
ecx = 0;
|
||||
xstate_comp_sizes[i] = 0;
|
||||
}
|
||||
@@ -349,6 +379,15 @@ update_active (struct cpu_features *cpu_features)
|
||||
if ((ecx & (1 << 1)) != 0)
|
||||
xstate_comp_offsets[i]
|
||||
= ALIGN_UP (xstate_comp_offsets[i], 64);
|
||||
+#ifdef __x86_64__
|
||||
+ xstate_amx_comp_offsets[i]
|
||||
+ = (xstate_amx_comp_offsets[i - 1]
|
||||
+ + xstate_amx_comp_sizes[i - 1]);
|
||||
+ if ((amx_ecx & (1 << 1)) != 0)
|
||||
+ xstate_amx_comp_offsets[i]
|
||||
+ = ALIGN_UP (xstate_amx_comp_offsets[i],
|
||||
+ 64);
|
||||
+#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -357,6 +396,18 @@ update_active (struct cpu_features *cpu_features)
|
||||
= xstate_comp_offsets[31] + xstate_comp_sizes[31];
|
||||
if (size)
|
||||
{
|
||||
+#ifdef __x86_64__
|
||||
+ unsigned int amx_size
|
||||
+ = (xstate_amx_comp_offsets[31]
|
||||
+ + xstate_amx_comp_sizes[31]);
|
||||
+ amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
|
||||
+ 64);
|
||||
+ /* Set xsave_state_full_size to the compact AMX
|
||||
+ state size for XSAVEC. NB: xsave_state_full_size
|
||||
+ is only used in _dl_tlsdesc_dynamic_xsave and
|
||||
+ _dl_tlsdesc_dynamic_xsavec. */
|
||||
+ cpu_features->xsave_state_full_size = amx_size;
|
||||
+#endif
|
||||
cpu_features->xsave_state_size
|
||||
= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
|
||||
CPU_FEATURE_SET (cpu_features, XSAVEC);
|
||||
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
|
||||
index b9bf3115b616f05f..cd7bd27cf35959fd 100644
|
||||
--- a/sysdeps/x86/include/cpu-features.h
|
||||
+++ b/sysdeps/x86/include/cpu-features.h
|
||||
@@ -934,6 +934,8 @@ struct cpu_features
|
||||
/* The full state size for XSAVE when XSAVEC is disabled by
|
||||
|
||||
GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC
|
||||
+
|
||||
+ and the AMX state size when XSAVEC is available.
|
||||
*/
|
||||
unsigned int xsave_state_full_size;
|
||||
/* Data cache size for use in memory and string routines, typically
|
||||
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
|
||||
index 485cad9c0283b334..db8e576e91767db5 100644
|
||||
--- a/sysdeps/x86/sysdep.h
|
||||
+++ b/sysdeps/x86/sysdep.h
|
||||
@@ -56,6 +56,14 @@
|
||||
| (1 << X86_XSTATE_ZMM_H_ID) \
|
||||
| (1 << X86_XSTATE_ZMM_ID) \
|
||||
| (1 << X86_XSTATE_APX_F_ID))
|
||||
+
|
||||
+/* AMX state mask. */
|
||||
+# define AMX_STATE_SAVE_MASK \
|
||||
+ ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
|
||||
+
|
||||
+/* States to be included in xsave_state_full_size. */
|
||||
+# define FULL_STATE_SAVE_MASK \
|
||||
+ (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
|
||||
#else
|
||||
/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
|
||||
doesn't have red-zone, use 0 here. */
|
||||
@@ -68,13 +76,17 @@
|
||||
| (1 << X86_XSTATE_BNDREGS_ID) \
|
||||
| (1 << X86_XSTATE_K_ID) \
|
||||
| (1 << X86_XSTATE_ZMM_H_ID))
|
||||
+
|
||||
+/* States to be included in xsave_state_size. */
|
||||
+# define FULL_STATE_SAVE_MASK STATE_SAVE_MASK
|
||||
#endif
|
||||
|
||||
/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
|
||||
- Compiler assumes that all registers, including x87 FPU stack registers,
|
||||
- are unchanged after CALL, except for EFLAGS and RAX/EAX. */
|
||||
+ Compiler assumes that all registers, including AMX and x87 FPU
|
||||
+ stack registers, are unchanged after CALL, except for EFLAGS and
|
||||
+ RAX/EAX. */
|
||||
#define TLSDESC_CALL_STATE_SAVE_MASK \
|
||||
- (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
|
||||
+ (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
|
||||
|
||||
/* Constants for bits in __x86_string_control: */
|
||||
|
||||
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
|
||||
index 418cc4a9b862f7e0..04a534fa126a7bf7 100755
|
||||
--- a/sysdeps/x86_64/configure
|
||||
+++ b/sysdeps/x86_64/configure
|
||||
@@ -134,6 +134,34 @@ fi
|
||||
config_vars="$config_vars
|
||||
enable-cet = $enable_cet"
|
||||
|
||||
+# Check if -mamx-tile works properly.
|
||||
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -mamx-tile works properly" >&5
|
||||
+printf %s "checking whether -mamx-tile works properly... " >&6; }
|
||||
+if test ${libc_cv_x86_have_amx_tile+y}
|
||||
+then :
|
||||
+ printf %s "(cached) " >&6
|
||||
+else $as_nop
|
||||
+ cat > conftest.c <<EOF
|
||||
+#include <x86intrin.h>
|
||||
+EOF
|
||||
+ libc_cv_x86_have_amx_tile=no
|
||||
+ if { ac_try='${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i'
|
||||
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||
+ (eval $ac_try) 2>&5
|
||||
+ ac_status=$?
|
||||
+ printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||
+ test $ac_status = 0; }; }; then
|
||||
+ if grep -q __builtin_ia32_ldtilecfg conftest.i; then
|
||||
+ libc_cv_x86_have_amx_tile=yes
|
||||
+ fi
|
||||
+ fi
|
||||
+ rm -rf conftest*
|
||||
+fi
|
||||
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_have_amx_tile" >&5
|
||||
+printf "%s\n" "$libc_cv_x86_have_amx_tile" >&6; }
|
||||
+config_vars="$config_vars
|
||||
+have-mamx-tile = $libc_cv_x86_have_amx_tile"
|
||||
+
|
||||
test -n "$critic_missing" && as_fn_error $? "
|
||||
*** $critic_missing" "$LINENO" 5
|
||||
|
||||
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
|
||||
index d1f803c02ee67fc5..c714c47351e70390 100644
|
||||
--- a/sysdeps/x86_64/configure.ac
|
||||
+++ b/sysdeps/x86_64/configure.ac
|
||||
@@ -61,5 +61,20 @@ elif test $enable_cet = permissive; then
|
||||
fi
|
||||
LIBC_CONFIG_VAR([enable-cet], [$enable_cet])
|
||||
|
||||
+# Check if -mamx-tile works properly.
|
||||
+AC_CACHE_CHECK(whether -mamx-tile works properly,
|
||||
+ libc_cv_x86_have_amx_tile, [dnl
|
||||
+cat > conftest.c <<EOF
|
||||
+#include <x86intrin.h>
|
||||
+EOF
|
||||
+ libc_cv_x86_have_amx_tile=no
|
||||
+ if AC_TRY_COMMAND(${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i); then
|
||||
+ if grep -q __builtin_ia32_ldtilecfg conftest.i; then
|
||||
+ libc_cv_x86_have_amx_tile=yes
|
||||
+ fi
|
||||
+ fi
|
||||
+ rm -rf conftest*])
|
||||
+LIBC_CONFIG_VAR([have-mamx-tile], [$libc_cv_x86_have_amx_tile])
|
||||
+
|
||||
test -n "$critic_missing" && AC_MSG_ERROR([
|
||||
*** $critic_missing])
|
||||
diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
|
||||
index 0c2e8d5320d0bd26..9f02cfc3eb297ed2 100644
|
||||
--- a/sysdeps/x86_64/dl-tlsdesc-dynamic.h
|
||||
+++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
|
||||
@@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic:
|
||||
# endif
|
||||
#else
|
||||
/* Allocate stack space of the required size to save the state. */
|
||||
- sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
|
||||
+ sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP
|
||||
#endif
|
||||
/* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
|
||||
r10 and r11. */
|
250
glibc-upstream-2.39-14.patch
Normal file
250
glibc-upstream-2.39-14.patch
Normal file
@ -0,0 +1,250 @@
|
||||
commit 354cabcb2634abe16da7a2ba5e648aac1204b58e
|
||||
Author: H.J. Lu <hjl.tools@gmail.com>
|
||||
Date: Mon Mar 18 06:40:16 2024 -0700
|
||||
|
||||
x86-64: Allocate state buffer space for RDI, RSI and RBX
|
||||
|
||||
_dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack.
|
||||
After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11. Define
|
||||
TLSDESC_CALL_REGISTER_SAVE_AREA to allocate space for RDI, RSI and RBX
|
||||
to avoid clobbering saved RDI, RSI and RBX values on stack by xsave to
|
||||
STATE_SAVE_OFFSET(%rsp).
|
||||
|
||||
+==================+<- stack frame start aligned at 8 or 16 bytes
|
||||
| |<- RDI saved in the red zone
|
||||
| |<- RSI saved in the red zone
|
||||
| |<- RBX saved in the red zone
|
||||
| |<- paddings for stack realignment of 64 bytes
|
||||
|------------------|<- xsave buffer end aligned at 64 bytes
|
||||
| |<-
|
||||
| |<-
|
||||
| |<-
|
||||
|------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
|
||||
| |<- 8-byte padding for 64-byte alignment
|
||||
| |<- 8-byte padding for 64-byte alignment
|
||||
| |<- R11
|
||||
| |<- R10
|
||||
| |<- R9
|
||||
| |<- R8
|
||||
| |<- RDX
|
||||
| |<- RCX
|
||||
+==================+<- RSP aligned at 64 bytes
|
||||
|
||||
Define TLSDESC_CALL_REGISTER_SAVE_AREA, the total register save area size
|
||||
for all integer registers by adding 24 to STATE_SAVE_OFFSET since RDI, RSI
|
||||
and RBX are saved onto stack without adjusting stack pointer first, using
|
||||
the red-zone. This fixes BZ #31501.
|
||||
Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
|
||||
|
||||
(cherry picked from commit 717ebfa85c8240d32d0d19d86a484c31c55c9617)
|
||||
|
||||
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
|
||||
index d71e8d3d2e0e49f9..6fe1b728c607f39e 100644
|
||||
--- a/sysdeps/x86/cpu-features.c
|
||||
+++ b/sysdeps/x86/cpu-features.c
|
||||
@@ -310,7 +310,7 @@ update_active (struct cpu_features *cpu_features)
|
||||
/* NB: On AMX capable processors, ebx always includes AMX
|
||||
states. */
|
||||
unsigned int xsave_state_full_size
|
||||
- = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
|
||||
+ = ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
|
||||
|
||||
cpu_features->xsave_state_size
|
||||
= xsave_state_full_size;
|
||||
@@ -400,8 +400,10 @@ update_active (struct cpu_features *cpu_features)
|
||||
unsigned int amx_size
|
||||
= (xstate_amx_comp_offsets[31]
|
||||
+ xstate_amx_comp_sizes[31]);
|
||||
- amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
|
||||
- 64);
|
||||
+ amx_size
|
||||
+ = ALIGN_UP ((amx_size
|
||||
+ + TLSDESC_CALL_REGISTER_SAVE_AREA),
|
||||
+ 64);
|
||||
/* Set xsave_state_full_size to the compact AMX
|
||||
state size for XSAVEC. NB: xsave_state_full_size
|
||||
is only used in _dl_tlsdesc_dynamic_xsave and
|
||||
@@ -409,7 +411,8 @@ update_active (struct cpu_features *cpu_features)
|
||||
cpu_features->xsave_state_full_size = amx_size;
|
||||
#endif
|
||||
cpu_features->xsave_state_size
|
||||
- = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
|
||||
+ = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
|
||||
+ 64);
|
||||
CPU_FEATURE_SET (cpu_features, XSAVEC);
|
||||
}
|
||||
}
|
||||
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
|
||||
index db8e576e91767db5..7359149e17ccf341 100644
|
||||
--- a/sysdeps/x86/sysdep.h
|
||||
+++ b/sysdeps/x86/sysdep.h
|
||||
@@ -38,14 +38,59 @@
|
||||
#ifdef __x86_64__
|
||||
/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
|
||||
space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
|
||||
- aligned to 16 bytes for fxsave and 64 bytes for xsave.
|
||||
-
|
||||
- NB: Is is non-zero because of the 128-byte red-zone. Some registers
|
||||
- are saved on stack without adjusting stack pointer first. When we
|
||||
- update stack pointer to allocate more space, we need to take the
|
||||
- red-zone into account. */
|
||||
+ aligned to 16 bytes for fxsave and 64 bytes for xsave. It is non-zero
|
||||
+ because MOV, instead of PUSH, is used to save registers onto stack.
|
||||
+
|
||||
+ +==================+<- stack frame start aligned at 8 or 16 bytes
|
||||
+ | |<- paddings for stack realignment of 64 bytes
|
||||
+ |------------------|<- xsave buffer end aligned at 64 bytes
|
||||
+ | |<-
|
||||
+ | |<-
|
||||
+ | |<-
|
||||
+ |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
|
||||
+ | |<- 8-byte padding for 64-byte alignment
|
||||
+ | |<- R9
|
||||
+ | |<- R8
|
||||
+ | |<- RDI
|
||||
+ | |<- RSI
|
||||
+ | |<- RDX
|
||||
+ | |<- RCX
|
||||
+ | |<- RAX
|
||||
+ +==================+<- RSP aligned at 64 bytes
|
||||
+
|
||||
+ */
|
||||
# define STATE_SAVE_OFFSET (8 * 7 + 8)
|
||||
|
||||
+/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
|
||||
+ stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and
|
||||
+ R11. Allocate space for RDI, RSI and RBX to avoid clobbering saved
|
||||
+ RDI, RSI and RBX values on stack by xsave.
|
||||
+
|
||||
+ +==================+<- stack frame start aligned at 8 or 16 bytes
|
||||
+ | |<- RDI saved in the red zone
|
||||
+ | |<- RSI saved in the red zone
|
||||
+ | |<- RBX saved in the red zone
|
||||
+ | |<- paddings for stack realignment of 64 bytes
|
||||
+ |------------------|<- xsave buffer end aligned at 64 bytes
|
||||
+ | |<-
|
||||
+ | |<-
|
||||
+ | |<-
|
||||
+ |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
|
||||
+ | |<- 8-byte padding for 64-byte alignment
|
||||
+ | |<- 8-byte padding for 64-byte alignment
|
||||
+ | |<- R11
|
||||
+ | |<- R10
|
||||
+ | |<- R9
|
||||
+ | |<- R8
|
||||
+ | |<- RDX
|
||||
+ | |<- RCX
|
||||
+ +==================+<- RSP aligned at 64 bytes
|
||||
+
|
||||
+ Define the total register save area size for all integer registers by
|
||||
+ adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto
|
||||
+ stack without adjusting stack pointer first, using the red-zone. */
|
||||
+# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
|
||||
+
|
||||
/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
|
||||
registers are mutually exclusive. */
|
||||
# define STATE_SAVE_MASK \
|
||||
@@ -66,8 +111,9 @@
|
||||
(STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
|
||||
#else
|
||||
/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
|
||||
- doesn't have red-zone, use 0 here. */
|
||||
+ uses PUSH to save registers onto stack, use 0 here. */
|
||||
# define STATE_SAVE_OFFSET 0
|
||||
+# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
|
||||
|
||||
/* Save SSE, AVX, AXV512, mask and bound registers. */
|
||||
# define STATE_SAVE_MASK \
|
||||
diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
|
||||
new file mode 100644
|
||||
index 0000000000000000..1d636669ba255724
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
|
||||
@@ -0,0 +1,87 @@
|
||||
+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
|
||||
+ Copyright (C) 2024 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <sysdep.h>
|
||||
+
|
||||
+/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec
|
||||
+ to clobber %rdi, %rsi and %rbx. On Intel AVX CPUs, the state size
|
||||
+ is 960 bytes and this test didn't fail. It may be due to the unused
|
||||
+ last 128 bytes. On AMD AVX CPUs, the state size is 832 bytes and
|
||||
+ this test might fail without the fix. */
|
||||
+#ifndef OFFSET
|
||||
+# define OFFSET 40
|
||||
+#endif
|
||||
+
|
||||
+ .text
|
||||
+ .p2align 4
|
||||
+ .globl apply_tls
|
||||
+ .type apply_tls, @function
|
||||
+apply_tls:
|
||||
+ cfi_startproc
|
||||
+ _CET_ENDBR
|
||||
+ pushq %rbp
|
||||
+ cfi_def_cfa_offset (16)
|
||||
+ cfi_offset (6, -16)
|
||||
+ movdqu (%RDI_LP), %xmm0
|
||||
+ lea tls_var1@TLSDESC(%rip), %RAX_LP
|
||||
+ mov %RSP_LP, %RBP_LP
|
||||
+ cfi_def_cfa_register (6)
|
||||
+ /* Align stack to 64 bytes. */
|
||||
+ and $-64, %RSP_LP
|
||||
+ sub $OFFSET, %RSP_LP
|
||||
+ pushq %rbx
|
||||
+ /* Set %ebx to 0xbadbeef. */
|
||||
+ movl $0xbadbeef, %ebx
|
||||
+ movl $0xbadbeef, %esi
|
||||
+ movq %rdi, saved_rdi(%rip)
|
||||
+ movq %rsi, saved_rsi(%rip)
|
||||
+ call *tls_var1@TLSCALL(%RAX_LP)
|
||||
+ /* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx. */
|
||||
+ cmpq saved_rdi(%rip), %rdi
|
||||
+ jne L(hlt)
|
||||
+ cmpq saved_rsi(%rip), %rsi
|
||||
+ jne L(hlt)
|
||||
+ cmpl $0xbadbeef, %ebx
|
||||
+ jne L(hlt)
|
||||
+ add %fs:0, %RAX_LP
|
||||
+ movups %xmm0, 32(%RAX_LP)
|
||||
+ movdqu 16(%RDI_LP), %xmm1
|
||||
+ mov %RAX_LP, %RBX_LP
|
||||
+ movups %xmm1, 48(%RAX_LP)
|
||||
+ lea 32(%RBX_LP), %RAX_LP
|
||||
+ pop %rbx
|
||||
+ leave
|
||||
+ cfi_def_cfa (7, 8)
|
||||
+ ret
|
||||
+L(hlt):
|
||||
+ hlt
|
||||
+ cfi_endproc
|
||||
+ .size apply_tls, .-apply_tls
|
||||
+ .hidden tls_var1
|
||||
+ .globl tls_var1
|
||||
+ .section .tbss,"awT",@nobits
|
||||
+ .align 16
|
||||
+ .type tls_var1, @object
|
||||
+ .size tls_var1, 3200
|
||||
+tls_var1:
|
||||
+ .zero 3200
|
||||
+ .local saved_rdi
|
||||
+ .comm saved_rdi,8,8
|
||||
+ .local saved_rsi
|
||||
+ .comm saved_rsi,8,8
|
||||
+ .section .note.GNU-stack,"",@progbits
|
38
glibc-upstream-2.39-15.patch
Normal file
38
glibc-upstream-2.39-15.patch
Normal file
@ -0,0 +1,38 @@
|
||||
commit 15aebdbada54098787715448c94701f17033fc92
|
||||
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
Date: Tue Mar 12 13:21:18 2024 -0300
|
||||
|
||||
Ignore undefined symbols for -mtls-dialect=gnu2
|
||||
|
||||
So it does not fail for arm config that defaults to -mtp=soft (which
|
||||
issues a call to __aeabi_read_tp).
|
||||
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
|
||||
|
||||
(cherry picked from commit 968b0ca9440040a2b31248a572891f0e55c1ab10)
|
||||
|
||||
diff --git a/configure b/configure
|
||||
index 59ff1e415dda4fbf..117b48a421792eda 100755
|
||||
--- a/configure
|
||||
+++ b/configure
|
||||
@@ -7020,7 +7020,7 @@ void foo (void)
|
||||
}
|
||||
EOF
|
||||
if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
|
||||
- conftest.c -o conftest 1>&5'
|
||||
+ -shared conftest.c -o conftest 1>&5'
|
||||
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||
(eval $ac_try) 2>&5
|
||||
ac_status=$?
|
||||
diff --git a/configure.ac b/configure.ac
|
||||
index 65799e56852a5356..19b88a47a52508a1 100644
|
||||
--- a/configure.ac
|
||||
+++ b/configure.ac
|
||||
@@ -1297,7 +1297,7 @@ void foo (void)
|
||||
}
|
||||
EOF
|
||||
if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
|
||||
- conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
|
||||
+ -shared conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
|
||||
then
|
||||
libc_cv_mtls_dialect_gnu2=yes
|
||||
else
|
446
glibc-upstream-2.39-16.patch
Normal file
446
glibc-upstream-2.39-16.patch
Normal file
@ -0,0 +1,446 @@
|
||||
commit a8ba52bde58c69f2b31da62ad2311f119adf6cb9
|
||||
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
Date: Tue Mar 12 13:21:19 2024 -0300
|
||||
|
||||
arm: Update _dl_tlsdesc_dynamic to preserve caller-saved registers (BZ 31372)
|
||||
|
||||
ARM _dl_tlsdesc_dynamic slow path has two issues:
|
||||
|
||||
* The ip/r12 is defined by AAPCS as a scratch register, and gcc is
|
||||
used to save the stack pointer before on some function calls. So it
|
||||
should also be saved/restored as well. It fixes the tst-gnu2-tls2.
|
||||
|
||||
* None of the possible VFP registers are saved/restored. ARM has the
|
||||
additional complexity to have different VFP bank sizes (depending of
|
||||
VFP support by the chip).
|
||||
|
||||
The tst-gnu2-tls2 test is extended to check for VFP registers, although
|
||||
only for hardfp builds. Different than setcontext, _dl_tlsdesc_dynamic
|
||||
does not have HWCAP_ARM_IWMMXT (I don't have a way to properly test
|
||||
it and it is almost a decade since newer hardware was released).
|
||||
|
||||
With this patch there is no need to mark tst-gnu2-tls2 as XFAIL.
|
||||
|
||||
Checked on arm-linux-gnueabihf.
|
||||
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
|
||||
|
||||
(cherry picked from commit 64c7e344289ed085517c2227d8e3b06388242c13)
|
||||
|
||||
diff --git a/config.h.in b/config.h.in
|
||||
index 44a34072a47aa008..4d33c63a841d3d6d 100644
|
||||
--- a/config.h.in
|
||||
+++ b/config.h.in
|
||||
@@ -141,6 +141,9 @@
|
||||
/* LOONGARCH floating-point ABI for ld.so. */
|
||||
#undef LOONGARCH_ABI_FRLEN
|
||||
|
||||
+/* Define whether ARM used hard-float and support VFPvX-D32. */
|
||||
+#undef HAVE_ARM_PCS_VFP_D32
|
||||
+
|
||||
/* Linux specific: minimum supported kernel version. */
|
||||
#undef __LINUX_KERNEL_VERSION
|
||||
|
||||
diff --git a/elf/Makefile b/elf/Makefile
|
||||
index c5c37a9147e69d83..030db4d207d3491e 100644
|
||||
--- a/elf/Makefile
|
||||
+++ b/elf/Makefile
|
||||
@@ -3056,10 +3056,6 @@ $(objpfx)tst-gnu2-tls2.out: \
|
||||
$(objpfx)tst-gnu2-tls2mod2.so
|
||||
|
||||
ifeq (yes,$(have-mtls-dialect-gnu2))
|
||||
-# This test fails if dl_tlsdesc_dynamic doesn't preserve all caller-saved
|
||||
-# registers. See https://sourceware.org/bugzilla/show_bug.cgi?id=31372
|
||||
-test-xfail-tst-gnu2-tls2 = yes
|
||||
-
|
||||
CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2
|
||||
CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
|
||||
CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
|
||||
diff --git a/elf/tst-gnu2-tls2.h b/elf/tst-gnu2-tls2.h
|
||||
index 77964a57a352e6a4..1ade8151e200af68 100644
|
||||
--- a/elf/tst-gnu2-tls2.h
|
||||
+++ b/elf/tst-gnu2-tls2.h
|
||||
@@ -27,6 +27,10 @@ extern struct tls *apply_tls (struct tls *);
|
||||
|
||||
/* An architecture can define them to verify that clobber caller-saved
|
||||
registers aren't changed by the implicit TLSDESC call. */
|
||||
+#ifndef INIT_TLSDESC_CALL
|
||||
+# define INIT_TLSDESC_CALL()
|
||||
+#endif
|
||||
+
|
||||
#ifndef BEFORE_TLSDESC_CALL
|
||||
# define BEFORE_TLSDESC_CALL()
|
||||
#endif
|
||||
diff --git a/elf/tst-gnu2-tls2mod0.c b/elf/tst-gnu2-tls2mod0.c
|
||||
index 45556a0e173922cc..3fe3c142777abe04 100644
|
||||
--- a/elf/tst-gnu2-tls2mod0.c
|
||||
+++ b/elf/tst-gnu2-tls2mod0.c
|
||||
@@ -16,13 +16,14 @@
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
-#include "tst-gnu2-tls2.h"
|
||||
+#include <tst-gnu2-tls2.h>
|
||||
|
||||
__thread struct tls tls_var0 __attribute__ ((visibility ("hidden")));
|
||||
|
||||
struct tls *
|
||||
apply_tls (struct tls *p)
|
||||
{
|
||||
+ INIT_TLSDESC_CALL ();
|
||||
BEFORE_TLSDESC_CALL ();
|
||||
tls_var0 = *p;
|
||||
struct tls *ret = &tls_var0;
|
||||
diff --git a/elf/tst-gnu2-tls2mod1.c b/elf/tst-gnu2-tls2mod1.c
|
||||
index e10b9dbc0a7573c7..e2105384689e2d2e 100644
|
||||
--- a/elf/tst-gnu2-tls2mod1.c
|
||||
+++ b/elf/tst-gnu2-tls2mod1.c
|
||||
@@ -16,13 +16,14 @@
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
-#include "tst-gnu2-tls2.h"
|
||||
+#include <tst-gnu2-tls2.h>
|
||||
|
||||
__thread struct tls tls_var1[100] __attribute__ ((visibility ("hidden")));
|
||||
|
||||
struct tls *
|
||||
apply_tls (struct tls *p)
|
||||
{
|
||||
+ INIT_TLSDESC_CALL ();
|
||||
BEFORE_TLSDESC_CALL ();
|
||||
tls_var1[1] = *p;
|
||||
struct tls *ret = &tls_var1[1];
|
||||
diff --git a/elf/tst-gnu2-tls2mod2.c b/elf/tst-gnu2-tls2mod2.c
|
||||
index 141af51e55b8bf34..6d3031dc5fbc1041 100644
|
||||
--- a/elf/tst-gnu2-tls2mod2.c
|
||||
+++ b/elf/tst-gnu2-tls2mod2.c
|
||||
@@ -16,13 +16,14 @@
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
-#include "tst-gnu2-tls2.h"
|
||||
+#include <tst-gnu2-tls2.h>
|
||||
|
||||
__thread struct tls tls_var2 __attribute__ ((visibility ("hidden")));
|
||||
|
||||
struct tls *
|
||||
apply_tls (struct tls *p)
|
||||
{
|
||||
+ INIT_TLSDESC_CALL ();
|
||||
BEFORE_TLSDESC_CALL ();
|
||||
tls_var2 = *p;
|
||||
struct tls *ret = &tls_var2;
|
||||
diff --git a/sysdeps/arm/configure b/sysdeps/arm/configure
|
||||
index 35e2918922300956..4ef4d46cbd5384e9 100644
|
||||
--- a/sysdeps/arm/configure
|
||||
+++ b/sysdeps/arm/configure
|
||||
@@ -187,6 +187,38 @@ else
|
||||
default-abi = soft"
|
||||
fi
|
||||
|
||||
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether VFP supports 32 registers" >&5
|
||||
+printf %s "checking whether VFP supports 32 registers... " >&6; }
|
||||
+if test ${libc_cv_arm_pcs_vfp_d32+y}
|
||||
+then :
|
||||
+ printf %s "(cached) " >&6
|
||||
+else $as_nop
|
||||
+
|
||||
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
+/* end confdefs.h. */
|
||||
+
|
||||
+void foo (void)
|
||||
+{
|
||||
+ asm volatile ("vldr d16,=17" : : : "d16");
|
||||
+}
|
||||
+
|
||||
+_ACEOF
|
||||
+if ac_fn_c_try_compile "$LINENO"
|
||||
+then :
|
||||
+ libc_cv_arm_pcs_vfp_d32=yes
|
||||
+else $as_nop
|
||||
+ libc_cv_arm_pcs_vfp_d32=no
|
||||
+fi
|
||||
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
|
||||
+fi
|
||||
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arm_pcs_vfp_d32" >&5
|
||||
+printf "%s\n" "$libc_cv_arm_pcs_vfp_d32" >&6; }
|
||||
+if test "$libc_cv_arm_pcs_vfp_d32" = yes ;
|
||||
+then
|
||||
+ printf "%s\n" "#define HAVE_ARM_PCS_VFP_D32 1" >>confdefs.h
|
||||
+
|
||||
+fi
|
||||
+
|
||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PC-relative relocs in movw/movt work properly" >&5
|
||||
printf %s "checking whether PC-relative relocs in movw/movt work properly... " >&6; }
|
||||
if test ${libc_cv_arm_pcrel_movw+y}
|
||||
diff --git a/sysdeps/arm/configure.ac b/sysdeps/arm/configure.ac
|
||||
index 5172e30bbe79f995..cd00ddc9d9ade5d7 100644
|
||||
--- a/sysdeps/arm/configure.ac
|
||||
+++ b/sysdeps/arm/configure.ac
|
||||
@@ -21,6 +21,21 @@ else
|
||||
LIBC_CONFIG_VAR([default-abi], [soft])
|
||||
fi
|
||||
|
||||
+AC_CACHE_CHECK([whether VFP supports 32 registers],
|
||||
+ libc_cv_arm_pcs_vfp_d32, [
|
||||
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
||||
+void foo (void)
|
||||
+{
|
||||
+ asm volatile ("vldr d16,=17" : : : "d16");
|
||||
+}
|
||||
+]])],
|
||||
+ [libc_cv_arm_pcs_vfp_d32=yes],
|
||||
+ [libc_cv_arm_pcs_vfp_d32=no])])
|
||||
+if test "$libc_cv_arm_pcs_vfp_d32" = yes ;
|
||||
+then
|
||||
+ AC_DEFINE(HAVE_ARM_PCS_VFP_D32)
|
||||
+fi
|
||||
+
|
||||
AC_CACHE_CHECK([whether PC-relative relocs in movw/movt work properly],
|
||||
libc_cv_arm_pcrel_movw, [
|
||||
cat > conftest.s <<\EOF
|
||||
diff --git a/sysdeps/arm/dl-tlsdesc.S b/sysdeps/arm/dl-tlsdesc.S
|
||||
index 764c56e70f046b03..ada106521da8971a 100644
|
||||
--- a/sysdeps/arm/dl-tlsdesc.S
|
||||
+++ b/sysdeps/arm/dl-tlsdesc.S
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <sysdep.h>
|
||||
#include <arm-features.h>
|
||||
#include <tls.h>
|
||||
+#include <rtld-global-offsets.h>
|
||||
#include "tlsdesc.h"
|
||||
|
||||
.text
|
||||
@@ -83,14 +84,20 @@ _dl_tlsdesc_dynamic(struct tlsdesc *tdp)
|
||||
.align 2
|
||||
_dl_tlsdesc_dynamic:
|
||||
/* Our calling convention is to clobber r0, r1 and the processor
|
||||
- flags. All others that are modified must be saved */
|
||||
- eabi_save ({r2,r3,r4,lr})
|
||||
- push {r2,r3,r4,lr}
|
||||
- cfi_adjust_cfa_offset (16)
|
||||
+ flags. All others that are modified must be saved. r5 is
|
||||
+ used as the hwcap value to avoid reload after __tls_get_addr
|
||||
+ call. If required we will save the vector register on the slow
|
||||
+ path. */
|
||||
+ eabi_save ({r2,r3,r4,r5,ip,lr})
|
||||
+ push {r2,r3,r4,r5,ip,lr}
|
||||
+ cfi_adjust_cfa_offset (24)
|
||||
cfi_rel_offset (r2,0)
|
||||
cfi_rel_offset (r3,4)
|
||||
cfi_rel_offset (r4,8)
|
||||
- cfi_rel_offset (lr,12)
|
||||
+ cfi_rel_offset (r5,12)
|
||||
+ cfi_rel_offset (ip,16)
|
||||
+ cfi_rel_offset (lr,20)
|
||||
+
|
||||
ldr r1, [r0] /* td */
|
||||
GET_TLS (lr)
|
||||
mov r4, r0 /* r4 = tp */
|
||||
@@ -113,22 +120,69 @@ _dl_tlsdesc_dynamic:
|
||||
rsbne r0, r4, r3
|
||||
bne 2f
|
||||
1: mov r0, r1
|
||||
+
|
||||
+ /* Load the hwcap to check for vector support. */
|
||||
+ ldr r2, 3f
|
||||
+ ldr r1, .Lrtld_global_ro
|
||||
+0: add r2, pc, r2
|
||||
+ ldr r2, [r2, r1]
|
||||
+ ldr r5, [r2, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
|
||||
+
|
||||
+#ifdef __SOFTFP__
|
||||
+ tst r5, #HWCAP_ARM_VFP
|
||||
+ beq .Lno_vfp
|
||||
+#endif
|
||||
+
|
||||
+ /* Store the VFP registers. Don't use VFP instructions directly
|
||||
+ because this code is used in non-VFP multilibs. */
|
||||
+#define VFP_STACK_REQ (32*8 + 8)
|
||||
+ sub sp, sp, VFP_STACK_REQ
|
||||
+ cfi_adjust_cfa_offset (VFP_STACK_REQ)
|
||||
+ mov r3, sp
|
||||
+ .inst 0xeca30b20 /* vstmia r3!, {d0-d15} */
|
||||
+ tst r5, #HWCAP_ARM_VFPD32
|
||||
+ beq 4f
|
||||
+ .inst 0xece30b20 /* vstmia r3!, {d16-d31} */
|
||||
+ /* Store the floating-point status register. */
|
||||
+4: .inst 0xeef12a10 /* vmrs r2, fpscr */
|
||||
+ str r2, [r3]
|
||||
+.Lno_vfp:
|
||||
bl __tls_get_addr
|
||||
rsb r0, r4, r0
|
||||
+#ifdef __SOFTFP__
|
||||
+ tst r5, #HWCAP_ARM_VFP
|
||||
+ beq 2f
|
||||
+#endif
|
||||
+ mov r3, sp
|
||||
+ .inst 0xecb30b20 /* vldmia r3!, {d0-d15} */
|
||||
+ tst r5, #HWCAP_ARM_VFPD32
|
||||
+ beq 5f
|
||||
+ .inst 0xecf30b20 /* vldmia r3!, {d16-d31} */
|
||||
+ ldr r4, [r3]
|
||||
+5: .inst 0xeee14a10 /* vmsr fpscr, r4 */
|
||||
+ add sp, sp, VFP_STACK_REQ
|
||||
+ cfi_adjust_cfa_offset (-VFP_STACK_REQ)
|
||||
+
|
||||
2:
|
||||
#if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \
|
||||
|| defined (ARM_ALWAYS_BX))
|
||||
- pop {r2,r3,r4, lr}
|
||||
- cfi_adjust_cfa_offset (-16)
|
||||
+ pop {r2,r3,r4,r5,ip, lr}
|
||||
+ cfi_adjust_cfa_offset (-20)
|
||||
cfi_restore (lr)
|
||||
+ cfi_restore (ip)
|
||||
+ cfi_restore (r5)
|
||||
cfi_restore (r4)
|
||||
cfi_restore (r3)
|
||||
cfi_restore (r2)
|
||||
bx lr
|
||||
#else
|
||||
- pop {r2,r3,r4, pc}
|
||||
+ pop {r2,r3,r4,r5,ip, pc}
|
||||
#endif
|
||||
eabi_fnend
|
||||
cfi_endproc
|
||||
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
|
||||
+
|
||||
+3: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
|
||||
+.Lrtld_global_ro:
|
||||
+ .long C_SYMBOL_NAME(_rtld_global_ro)(GOT)
|
||||
#endif /* SHARED */
|
||||
diff --git a/sysdeps/arm/tst-gnu2-tls2.h b/sysdeps/arm/tst-gnu2-tls2.h
|
||||
new file mode 100644
|
||||
index 0000000000000000..e413ac21fb9ed9bf
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/arm/tst-gnu2-tls2.h
|
||||
@@ -0,0 +1,128 @@
|
||||
+/* Test TLSDESC relocation. ARM version.
|
||||
+ Copyright (C) 2024 Free Software Foundation, Inc.
|
||||
+ This file is part of the GNU C Library.
|
||||
+
|
||||
+ The GNU C Library is free software; you can redistribute it and/or
|
||||
+ modify it under the terms of the GNU Lesser General Public
|
||||
+ License as published by the Free Software Foundation; either
|
||||
+ version 2.1 of the License, or (at your option) any later version.
|
||||
+
|
||||
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+ Lesser General Public License for more details.
|
||||
+
|
||||
+ You should have received a copy of the GNU Lesser General Public
|
||||
+ License along with the GNU C Library; if not, see
|
||||
+ <https://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include <config.h>
|
||||
+#include <sys/auxv.h>
|
||||
+#include <string.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <endian.h>
|
||||
+
|
||||
+#ifndef __SOFTFP__
|
||||
+
|
||||
+# ifdef HAVE_ARM_PCS_VFP_D32
|
||||
+# define SAVE_VFP_D32 \
|
||||
+ asm volatile ("vldr d16,=17" : : : "d16"); \
|
||||
+ asm volatile ("vldr d17,=18" : : : "d17"); \
|
||||
+ asm volatile ("vldr d18,=19" : : : "d18"); \
|
||||
+ asm volatile ("vldr d19,=20" : : : "d19"); \
|
||||
+ asm volatile ("vldr d20,=21" : : : "d20"); \
|
||||
+ asm volatile ("vldr d21,=22" : : : "d21"); \
|
||||
+ asm volatile ("vldr d22,=23" : : : "d22"); \
|
||||
+ asm volatile ("vldr d23,=24" : : : "d23"); \
|
||||
+ asm volatile ("vldr d24,=25" : : : "d24"); \
|
||||
+ asm volatile ("vldr d25,=26" : : : "d25"); \
|
||||
+ asm volatile ("vldr d26,=27" : : : "d26"); \
|
||||
+ asm volatile ("vldr d27,=28" : : : "d27"); \
|
||||
+ asm volatile ("vldr d28,=29" : : : "d28"); \
|
||||
+ asm volatile ("vldr d29,=30" : : : "d29"); \
|
||||
+ asm volatile ("vldr d30,=31" : : : "d30"); \
|
||||
+ asm volatile ("vldr d31,=32" : : : "d31");
|
||||
+# else
|
||||
+# define SAVE_VFP_D32
|
||||
+# endif
|
||||
+
|
||||
+# define INIT_TLSDESC_CALL() \
|
||||
+ unsigned long hwcap = getauxval (AT_HWCAP)
|
||||
+
|
||||
+/* Set each vector register to a value from 1 to 32 before the TLS access,
|
||||
+ dump to memory after TLS access, and compare with the expected values. */
|
||||
+
|
||||
+# define BEFORE_TLSDESC_CALL() \
|
||||
+ if (hwcap & HWCAP_ARM_VFP) \
|
||||
+ { \
|
||||
+ asm volatile ("vldr d0,=1" : : : "d0"); \
|
||||
+ asm volatile ("vldr d1,=2" : : : "d1"); \
|
||||
+ asm volatile ("vldr d2,=3" : : : "d1"); \
|
||||
+ asm volatile ("vldr d3,=4" : : : "d3"); \
|
||||
+ asm volatile ("vldr d4,=5" : : : "d4"); \
|
||||
+ asm volatile ("vldr d5,=6" : : : "d5"); \
|
||||
+ asm volatile ("vldr d6,=7" : : : "d6"); \
|
||||
+ asm volatile ("vldr d7,=8" : : : "d7"); \
|
||||
+ asm volatile ("vldr d8,=9" : : : "d8"); \
|
||||
+ asm volatile ("vldr d9,=10" : : : "d9"); \
|
||||
+ asm volatile ("vldr d10,=11" : : : "d10"); \
|
||||
+ asm volatile ("vldr d11,=12" : : : "d11"); \
|
||||
+ asm volatile ("vldr d12,=13" : : : "d12"); \
|
||||
+ asm volatile ("vldr d13,=14" : : : "d13"); \
|
||||
+ asm volatile ("vldr d14,=15" : : : "d14"); \
|
||||
+ asm volatile ("vldr d15,=16" : : : "d15"); \
|
||||
+ } \
|
||||
+ if (hwcap & HWCAP_ARM_VFPD32) \
|
||||
+ { \
|
||||
+ SAVE_VFP_D32 \
|
||||
+ }
|
||||
+
|
||||
+# define VFP_STACK_REQ (16*8)
|
||||
+# if __BYTE_ORDER == __BIG_ENDIAN
|
||||
+# define DISP 7
|
||||
+# else
|
||||
+# define DISP 0
|
||||
+# endif
|
||||
+
|
||||
+# ifdef HAVE_ARM_PCS_VFP_D32
|
||||
+# define CHECK_VFP_D32 \
|
||||
+ char vfp[VFP_STACK_REQ]; \
|
||||
+ asm volatile ("vstmia %0, {d16-d31}\n" \
|
||||
+ : \
|
||||
+ : "r" (vfp) \
|
||||
+ : "memory"); \
|
||||
+ \
|
||||
+ char expected[VFP_STACK_REQ] = { 0 }; \
|
||||
+ for (int i = 0; i < 16; ++i) \
|
||||
+ expected[i * 8 + DISP] = i + 17; \
|
||||
+ \
|
||||
+ if (memcmp (vfp, expected, VFP_STACK_REQ) != 0) \
|
||||
+ abort ();
|
||||
+# else
|
||||
+# define CHECK_VFP_D32
|
||||
+# endif
|
||||
+
|
||||
+# define AFTER_TLSDESC_CALL() \
|
||||
+ if (hwcap & HWCAP_ARM_VFP) \
|
||||
+ { \
|
||||
+ char vfp[VFP_STACK_REQ]; \
|
||||
+ asm volatile ("vstmia %0, {d0-d15}\n" \
|
||||
+ : \
|
||||
+ : "r" (vfp) \
|
||||
+ : "memory"); \
|
||||
+ \
|
||||
+ char expected[VFP_STACK_REQ] = { 0 }; \
|
||||
+ for (int i = 0; i < 16; ++i) \
|
||||
+ expected[i * 8 + DISP] = i + 1; \
|
||||
+ \
|
||||
+ if (memcmp (vfp, expected, VFP_STACK_REQ) != 0) \
|
||||
+ abort (); \
|
||||
+ } \
|
||||
+ if (hwcap & HWCAP_ARM_VFPD32) \
|
||||
+ { \
|
||||
+ CHECK_VFP_D32 \
|
||||
+ }
|
||||
+
|
||||
+#endif /* __SOFTFP__ */
|
||||
+
|
||||
+#include_next <tst-gnu2-tls2.h>
|
221
glibc-upstream-2.39-17.patch
Normal file
221
glibc-upstream-2.39-17.patch
Normal file
@ -0,0 +1,221 @@
|
||||
commit aded2fc004e7ee85cf0b45b1382552d41e555a23
|
||||
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
Date: Tue Mar 12 13:21:20 2024 -0300
|
||||
|
||||
elf: Enable TLS descriptor tests on aarch64
|
||||
|
||||
The aarch64 uses 'trad' for traditional tls and 'desc' for tls
|
||||
descriptors, but unlike other targets it defaults to 'desc'. The
|
||||
gnutls2 configure check does not set aarch64 as an ABI that uses
|
||||
TLS descriptors, which then disable somes stests.
|
||||
|
||||
Also rename the internal machinery fron gnu2 to tls descriptors.
|
||||
|
||||
Checked on aarch64-linux-gnu.
|
||||
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
|
||||
|
||||
(cherry picked from commit 3d53d18fc71c5d9ef4773b8bce04d54b80181926)
|
||||
|
||||
diff --git a/configure b/configure
|
||||
index 117b48a421792eda..432e40a59295cffd 100755
|
||||
--- a/configure
|
||||
+++ b/configure
|
||||
@@ -653,7 +653,7 @@ LIBGD
|
||||
libc_cv_cc_loop_to_function
|
||||
libc_cv_cc_submachine
|
||||
libc_cv_cc_nofma
|
||||
-libc_cv_mtls_dialect_gnu2
|
||||
+libc_cv_mtls_descriptor
|
||||
libc_cv_has_glob_dat
|
||||
libc_cv_fpie
|
||||
libc_cv_z_execstack
|
||||
@@ -4760,6 +4760,9 @@ libc_config_ok=no
|
||||
# whether to use such directories.
|
||||
with_fp_cond=1
|
||||
|
||||
+# A preconfigure script may define another name to TLS descriptor variant
|
||||
+mtls_descriptor=gnu2
|
||||
+
|
||||
if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null`
|
||||
then
|
||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5
|
||||
@@ -7006,9 +7009,9 @@ fi
|
||||
printf "%s\n" "$libc_cv_has_glob_dat" >&6; }
|
||||
|
||||
|
||||
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -mtls-dialect=gnu2" >&5
|
||||
-printf %s "checking for -mtls-dialect=gnu2... " >&6; }
|
||||
-if test ${libc_cv_mtls_dialect_gnu2+y}
|
||||
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for tls descriptor support" >&5
|
||||
+printf %s "checking for tls descriptor support... " >&6; }
|
||||
+if test ${libc_cv_mtls_descriptor+y}
|
||||
then :
|
||||
printf %s "(cached) " >&6
|
||||
else $as_nop
|
||||
@@ -7019,7 +7022,7 @@ void foo (void)
|
||||
i = 10;
|
||||
}
|
||||
EOF
|
||||
-if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
|
||||
+if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles
|
||||
-shared conftest.c -o conftest 1>&5'
|
||||
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||
(eval $ac_try) 2>&5
|
||||
@@ -7027,17 +7030,17 @@ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nost
|
||||
printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||
test $ac_status = 0; }; }
|
||||
then
|
||||
- libc_cv_mtls_dialect_gnu2=yes
|
||||
+ libc_cv_mtls_descriptor=$mtls_descriptor
|
||||
else
|
||||
- libc_cv_mtls_dialect_gnu2=no
|
||||
+ libc_cv_mtls_descriptor=no
|
||||
fi
|
||||
rm -f conftest*
|
||||
fi
|
||||
-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_dialect_gnu2" >&5
|
||||
-printf "%s\n" "$libc_cv_mtls_dialect_gnu2" >&6; }
|
||||
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_descriptor" >&5
|
||||
+printf "%s\n" "$libc_cv_mtls_descriptor" >&6; }
|
||||
|
||||
config_vars="$config_vars
|
||||
-have-mtls-dialect-gnu2 = $libc_cv_mtls_dialect_gnu2"
|
||||
+have-mtls-descriptor = $libc_cv_mtls_descriptor"
|
||||
|
||||
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if -Wno-ignored-attributes is required for aliases" >&5
|
||||
printf %s "checking if -Wno-ignored-attributes is required for aliases... " >&6; }
|
||||
diff --git a/configure.ac b/configure.ac
|
||||
index 19b88a47a52508a1..bdc385d03c3dc7f5 100644
|
||||
--- a/configure.ac
|
||||
+++ b/configure.ac
|
||||
@@ -442,6 +442,9 @@ libc_config_ok=no
|
||||
# whether to use such directories.
|
||||
with_fp_cond=1
|
||||
|
||||
+# A preconfigure script may define another name to TLS descriptor variant
|
||||
+mtls_descriptor=gnu2
|
||||
+
|
||||
dnl Let sysdeps/*/preconfigure act here.
|
||||
LIBC_PRECONFIGURE([$srcdir], [for sysdeps])
|
||||
|
||||
@@ -1287,7 +1290,7 @@ fi
|
||||
rm -f conftest*])
|
||||
AC_SUBST(libc_cv_has_glob_dat)
|
||||
|
||||
-AC_CACHE_CHECK([for -mtls-dialect=gnu2], libc_cv_mtls_dialect_gnu2,
|
||||
+AC_CACHE_CHECK([for tls descriptor support], libc_cv_mtls_descriptor,
|
||||
[dnl
|
||||
cat > conftest.c <<EOF
|
||||
__thread int i;
|
||||
@@ -1296,16 +1299,16 @@ void foo (void)
|
||||
i = 10;
|
||||
}
|
||||
EOF
|
||||
-if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles
|
||||
+if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles
|
||||
-shared conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD])
|
||||
then
|
||||
- libc_cv_mtls_dialect_gnu2=yes
|
||||
+ libc_cv_mtls_descriptor=$mtls_descriptor
|
||||
else
|
||||
- libc_cv_mtls_dialect_gnu2=no
|
||||
+ libc_cv_mtls_descriptor=no
|
||||
fi
|
||||
rm -f conftest*])
|
||||
-AC_SUBST(libc_cv_mtls_dialect_gnu2)
|
||||
-LIBC_CONFIG_VAR([have-mtls-dialect-gnu2], [$libc_cv_mtls_dialect_gnu2])
|
||||
+AC_SUBST(libc_cv_mtls_descriptor)
|
||||
+LIBC_CONFIG_VAR([have-mtls-descriptor], [$libc_cv_mtls_descriptor])
|
||||
|
||||
dnl clang emits an warning for a double alias redirection, to warn the
|
||||
dnl original symbol is sed even when weak definition overrides it.
|
||||
diff --git a/elf/Makefile b/elf/Makefile
|
||||
index 030db4d207d3491e..69aa423c4b90127d 100644
|
||||
--- a/elf/Makefile
|
||||
+++ b/elf/Makefile
|
||||
@@ -999,13 +999,13 @@ modules-names-tests = $(filter-out ifuncmod% tst-tlsmod%,\
|
||||
# For +depfiles in Makerules.
|
||||
extra-test-objs += tst-auditmod17.os
|
||||
|
||||
-ifeq (yes,$(have-mtls-dialect-gnu2))
|
||||
+ifneq (no,$(have-mtls-descriptor))
|
||||
tests += tst-gnu2-tls1
|
||||
modules-names += tst-gnu2-tls1mod
|
||||
$(objpfx)tst-gnu2-tls1: $(objpfx)tst-gnu2-tls1mod.so
|
||||
tst-gnu2-tls1mod.so-no-z-defs = yes
|
||||
-CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=gnu2
|
||||
-endif # $(have-mtls-dialect-gnu2)
|
||||
+CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=$(have-mtls-descriptor)
|
||||
+endif # $(have-mtls-descriptor)
|
||||
|
||||
ifeq (yes,$(have-protected-data))
|
||||
modules-names += tst-protected1moda tst-protected1modb
|
||||
@@ -2972,11 +2972,11 @@ $(objpfx)tst-tls-allocation-failure-static-patched.out: \
|
||||
$(objpfx)tst-audit-tlsdesc: $(objpfx)tst-audit-tlsdesc-mod1.so \
|
||||
$(objpfx)tst-audit-tlsdesc-mod2.so \
|
||||
$(shared-thread-library)
|
||||
-ifeq (yes,$(have-mtls-dialect-gnu2))
|
||||
+ifneq (no,$(have-mtls-descriptor))
|
||||
# The test is valid for all TLS types, but we want to exercise GNU2
|
||||
# TLS if possible.
|
||||
-CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=gnu2
|
||||
-CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=gnu2
|
||||
+CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=$(have-mtls-descriptor)
|
||||
+CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=$(have-mtls-descriptor)
|
||||
endif
|
||||
$(objpfx)tst-audit-tlsdesc-dlopen: $(shared-thread-library)
|
||||
$(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-audit-tlsdesc-mod1.so \
|
||||
@@ -3055,11 +3055,11 @@ $(objpfx)tst-gnu2-tls2.out: \
|
||||
$(objpfx)tst-gnu2-tls2mod1.so \
|
||||
$(objpfx)tst-gnu2-tls2mod2.so
|
||||
|
||||
-ifeq (yes,$(have-mtls-dialect-gnu2))
|
||||
-CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2
|
||||
-CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2
|
||||
-CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2
|
||||
-CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=gnu2
|
||||
-CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=gnu2
|
||||
-CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=gnu2
|
||||
+ifneq (no,$(have-mtls-descriptor))
|
||||
+CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=$(have-mtls-descriptor)
|
||||
+CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=$(have-mtls-descriptor)
|
||||
+CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=$(have-mtls-descriptor)
|
||||
+CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=$(have-mtls-descriptor)
|
||||
+CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=$(have-mtls-descriptor)
|
||||
+CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=$(have-mtls-descriptor)
|
||||
endif
|
||||
diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure
|
||||
index d9bd1f8558a079cb..19657b627bc84c4e 100644
|
||||
--- a/sysdeps/aarch64/preconfigure
|
||||
+++ b/sysdeps/aarch64/preconfigure
|
||||
@@ -2,5 +2,6 @@ case "$machine" in
|
||||
aarch64*)
|
||||
base_machine=aarch64
|
||||
machine=aarch64
|
||||
+ mtls_descriptor=desc
|
||||
;;
|
||||
esac
|
||||
diff --git a/sysdeps/arm/Makefile b/sysdeps/arm/Makefile
|
||||
index d5cea717a9c201aa..619474eca94fe8e4 100644
|
||||
--- a/sysdeps/arm/Makefile
|
||||
+++ b/sysdeps/arm/Makefile
|
||||
@@ -13,15 +13,15 @@ $(objpfx)libgcc-stubs.a: $(objpfx)aeabi_unwind_cpp_pr1.os
|
||||
lib-noranlib: $(objpfx)libgcc-stubs.a
|
||||
|
||||
ifeq ($(build-shared),yes)
|
||||
-ifeq (yes,$(have-mtls-dialect-gnu2))
|
||||
+ifneq (no,$(have-mtls-descriptor))
|
||||
tests += tst-armtlsdescloc tst-armtlsdescextnow tst-armtlsdescextlazy
|
||||
modules-names += tst-armtlsdesclocmod
|
||||
modules-names += tst-armtlsdescextlazymod tst-armtlsdescextnowmod
|
||||
CPPFLAGS-tst-armtlsdescextnowmod.c += -Dstatic=
|
||||
CPPFLAGS-tst-armtlsdescextlazymod.c += -Dstatic=
|
||||
-CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=gnu2
|
||||
-CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=gnu2
|
||||
-CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=gnu2
|
||||
+CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=$(have-mtls-descriptor)
|
||||
+CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=$(have-mtls-descriptor)
|
||||
+CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=$(have-mtls-descriptor)
|
||||
LDFLAGS-tst-armtlsdescextnowmod.so += -Wl,-z,now
|
||||
tst-armtlsdescloc-ENV = LD_BIND_NOW=1
|
||||
tst-armtlsdescextnow-ENV = LD_BIND_NOW=1
|
24
glibc-upstream-2.39-18.patch
Normal file
24
glibc-upstream-2.39-18.patch
Normal file
@ -0,0 +1,24 @@
|
||||
commit 5a461f2949ded98d8211939f84988bc464c7b4fe
|
||||
Author: Andreas Schwab <schwab@suse.de>
|
||||
Date: Tue Mar 19 13:49:50 2024 +0100
|
||||
|
||||
Add tst-gnu2-tls2mod1 to test-internal-extras
|
||||
|
||||
That allows sysdeps/x86_64/tst-gnu2-tls2mod1.S to use internal headers.
|
||||
|
||||
Fixes: 717ebfa85c ("x86-64: Allocate state buffer space for RDI, RSI and RBX")
|
||||
(cherry picked from commit fd7ee2e6c5eb49e4a630a9978b4d668bff6354ee)
|
||||
|
||||
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
|
||||
index e8babc9a4edbf90b..9d374a329916fc45 100644
|
||||
--- a/sysdeps/x86_64/Makefile
|
||||
+++ b/sysdeps/x86_64/Makefile
|
||||
@@ -210,6 +210,8 @@ tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2
|
||||
$(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so
|
||||
endif
|
||||
|
||||
+test-internal-extras += tst-gnu2-tls2mod1
|
||||
+
|
||||
endif # $(subdir) == elf
|
||||
|
||||
ifeq ($(subdir),csu)
|
146
glibc-upstream-2.39-19.patch
Normal file
146
glibc-upstream-2.39-19.patch
Normal file
@ -0,0 +1,146 @@
|
||||
commit aa4249266e9906c4bc833e4847f4d8feef59504f
|
||||
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
Date: Thu Feb 8 10:08:38 2024 -0300
|
||||
|
||||
x86: Fix Zen3/Zen4 ERMS selection (BZ 30994)
|
||||
|
||||
The REP MOVSB usage on memcpy/memmove does not show much performance
|
||||
improvement on Zen3/Zen4 cores compared to the vectorized loops. Also,
|
||||
as from BZ 30994, if the source is aligned and the destination is not
|
||||
the performance can be 20x slower.
|
||||
|
||||
The performance difference is noticeable with small buffer sizes, closer
|
||||
to the lower bounds limits when memcpy/memmove starts to use ERMS. The
|
||||
performance of REP MOVSB is similar to vectorized instruction on the
|
||||
size limit (the L2 cache). Also, there is no drawback to multiple cores
|
||||
sharing the cache.
|
||||
|
||||
Checked on x86_64-linux-gnu on Zen3.
|
||||
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
|
||||
|
||||
(cherry picked from commit 0c0d39fe4aeb0f69b26e76337c5dfd5530d5d44e)
|
||||
|
||||
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
|
||||
index d5101615e348e5c2..f34d12846caf9422 100644
|
||||
--- a/sysdeps/x86/dl-cacheinfo.h
|
||||
+++ b/sysdeps/x86/dl-cacheinfo.h
|
||||
@@ -791,7 +791,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
||||
long int data = -1;
|
||||
long int shared = -1;
|
||||
long int shared_per_thread = -1;
|
||||
- long int core = -1;
|
||||
unsigned int threads = 0;
|
||||
unsigned long int level1_icache_size = -1;
|
||||
unsigned long int level1_icache_linesize = -1;
|
||||
@@ -809,7 +808,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
||||
if (cpu_features->basic.kind == arch_kind_intel)
|
||||
{
|
||||
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
|
||||
- core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
|
||||
shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
|
||||
shared_per_thread = shared;
|
||||
|
||||
@@ -822,7 +820,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
||||
= handle_intel (_SC_LEVEL1_DCACHE_ASSOC, cpu_features);
|
||||
level1_dcache_linesize
|
||||
= handle_intel (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);
|
||||
- level2_cache_size = core;
|
||||
+ level2_cache_size
|
||||
+ = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
|
||||
level2_cache_assoc
|
||||
= handle_intel (_SC_LEVEL2_CACHE_ASSOC, cpu_features);
|
||||
level2_cache_linesize
|
||||
@@ -835,12 +834,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
||||
level4_cache_size
|
||||
= handle_intel (_SC_LEVEL4_CACHE_SIZE, cpu_features);
|
||||
|
||||
- get_common_cache_info (&shared, &shared_per_thread, &threads, core);
|
||||
+ get_common_cache_info (&shared, &shared_per_thread, &threads,
|
||||
+ level2_cache_size);
|
||||
}
|
||||
else if (cpu_features->basic.kind == arch_kind_zhaoxin)
|
||||
{
|
||||
data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
|
||||
- core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
|
||||
shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
|
||||
shared_per_thread = shared;
|
||||
|
||||
@@ -849,19 +848,19 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
||||
level1_dcache_size = data;
|
||||
level1_dcache_assoc = handle_zhaoxin (_SC_LEVEL1_DCACHE_ASSOC);
|
||||
level1_dcache_linesize = handle_zhaoxin (_SC_LEVEL1_DCACHE_LINESIZE);
|
||||
- level2_cache_size = core;
|
||||
+ level2_cache_size = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
|
||||
level2_cache_assoc = handle_zhaoxin (_SC_LEVEL2_CACHE_ASSOC);
|
||||
level2_cache_linesize = handle_zhaoxin (_SC_LEVEL2_CACHE_LINESIZE);
|
||||
level3_cache_size = shared;
|
||||
level3_cache_assoc = handle_zhaoxin (_SC_LEVEL3_CACHE_ASSOC);
|
||||
level3_cache_linesize = handle_zhaoxin (_SC_LEVEL3_CACHE_LINESIZE);
|
||||
|
||||
- get_common_cache_info (&shared, &shared_per_thread, &threads, core);
|
||||
+ get_common_cache_info (&shared, &shared_per_thread, &threads,
|
||||
+ level2_cache_size);
|
||||
}
|
||||
else if (cpu_features->basic.kind == arch_kind_amd)
|
||||
{
|
||||
data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
|
||||
- core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
|
||||
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
|
||||
|
||||
level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
|
||||
@@ -869,7 +868,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
||||
level1_dcache_size = data;
|
||||
level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC);
|
||||
level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE);
|
||||
- level2_cache_size = core;
|
||||
+ level2_cache_size = handle_amd (_SC_LEVEL2_CACHE_SIZE);;
|
||||
level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC);
|
||||
level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE);
|
||||
level3_cache_size = shared;
|
||||
@@ -880,12 +879,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
||||
if (shared <= 0)
|
||||
{
|
||||
/* No shared L3 cache. All we have is the L2 cache. */
|
||||
- shared = core;
|
||||
+ shared = level2_cache_size;
|
||||
}
|
||||
else if (cpu_features->basic.family < 0x17)
|
||||
{
|
||||
/* Account for exclusive L2 and L3 caches. */
|
||||
- shared += core;
|
||||
+ shared += level2_cache_size;
|
||||
}
|
||||
|
||||
shared_per_thread = shared;
|
||||
@@ -987,6 +986,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
||||
if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
|
||||
rep_movsb_threshold = 2112;
|
||||
|
||||
+ /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of
|
||||
+ cases slower than the vectorized path (and for some alignments,
|
||||
+ it is really slow, check BZ #30994). */
|
||||
+ if (cpu_features->basic.kind == arch_kind_amd)
|
||||
+ rep_movsb_threshold = non_temporal_threshold;
|
||||
+
|
||||
/* The default threshold to use Enhanced REP STOSB. */
|
||||
unsigned long int rep_stosb_threshold = 2048;
|
||||
|
||||
@@ -1028,16 +1033,9 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
||||
SIZE_MAX);
|
||||
|
||||
unsigned long int rep_movsb_stop_threshold;
|
||||
- /* ERMS feature is implemented from AMD Zen3 architecture and it is
|
||||
- performing poorly for data above L2 cache size. Henceforth, adding
|
||||
- an upper bound threshold parameter to limit the usage of Enhanced
|
||||
- REP MOVSB operations and setting its value to L2 cache size. */
|
||||
- if (cpu_features->basic.kind == arch_kind_amd)
|
||||
- rep_movsb_stop_threshold = core;
|
||||
/* Setting the upper bound of ERMS to the computed value of
|
||||
- non-temporal threshold for architectures other than AMD. */
|
||||
- else
|
||||
- rep_movsb_stop_threshold = non_temporal_threshold;
|
||||
+ non-temporal threshold for all architectures. */
|
||||
+ rep_movsb_stop_threshold = non_temporal_threshold;
|
||||
|
||||
cpu_features->data_cache_size = data;
|
||||
cpu_features->shared_cache_size = shared;
|
30
glibc-upstream-2.39-20.patch
Normal file
30
glibc-upstream-2.39-20.patch
Normal file
@ -0,0 +1,30 @@
|
||||
commit 6484a92698039c4a7a510f0214e22d067b0d78b3
|
||||
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
Date: Thu Feb 8 10:08:39 2024 -0300
|
||||
|
||||
x86: Do not prefer ERMS for memset on Zen3+
|
||||
|
||||
For AMD Zen3+ architecture, the performance of the vectorized loop is
|
||||
slightly better than ERMS.
|
||||
|
||||
Checked on x86_64-linux-gnu on Zen3.
|
||||
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
|
||||
|
||||
(cherry picked from commit 272708884cb750f12f5c74a00e6620c19dc6d567)
|
||||
|
||||
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
|
||||
index f34d12846caf9422..5a98f70364220da4 100644
|
||||
--- a/sysdeps/x86/dl-cacheinfo.h
|
||||
+++ b/sysdeps/x86/dl-cacheinfo.h
|
||||
@@ -1021,6 +1021,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
||||
minimum value is fixed. */
|
||||
rep_stosb_threshold = TUNABLE_GET (x86_rep_stosb_threshold,
|
||||
long int, NULL);
|
||||
+ if (cpu_features->basic.kind == arch_kind_amd
|
||||
+ && !TUNABLE_IS_INITIALIZED (x86_rep_stosb_threshold))
|
||||
+ /* For AMD Zen3+ architecture, the performance of the vectorized loop is
|
||||
+ slightly better than ERMS. */
|
||||
+ rep_stosb_threshold = SIZE_MAX;
|
||||
|
||||
TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
|
||||
TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);
|
24
glibc-upstream-2.39-21.patch
Normal file
24
glibc-upstream-2.39-21.patch
Normal file
@ -0,0 +1,24 @@
|
||||
commit 5d070d12b3a52bc44dd1b71743abc4b6243862ae
|
||||
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||
Date: Thu Feb 8 10:08:40 2024 -0300
|
||||
|
||||
x86: Expand the comment on when REP STOSB is used on memset
|
||||
|
||||
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
|
||||
(cherry picked from commit 491e55beab7457ed310a4a47496f4a333c5d1032)
|
||||
|
||||
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||||
index 9984c3ca0fafab6a..97839a22483b0613 100644
|
||||
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||||
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||||
@@ -21,7 +21,9 @@
|
||||
2. If size is less than VEC, use integer register stores.
|
||||
3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
|
||||
4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
|
||||
- 5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
|
||||
+ 5. On machines ERMS feature, if size is greater or equal than
|
||||
+ __x86_rep_stosb_threshold then REP STOSB will be used.
|
||||
+ 6. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
|
||||
4 VEC stores and store 4 * VEC at a time until done. */
|
||||
|
||||
#include <sysdep.h>
|
284
glibc-upstream-2.39-8.patch
Normal file
284
glibc-upstream-2.39-8.patch
Normal file
@ -0,0 +1,284 @@
|
||||
commit ee7f4c54e19738c2c27d3846e1e9b3595c89221f
|
||||
Author: Manjunath Matti <mmatti@linux.ibm.com>
|
||||
Date: Tue Mar 19 15:29:48 2024 -0500
|
||||
|
||||
powerpc: Add HWCAP3/HWCAP4 data to TCB for Power Architecture.
|
||||
|
||||
This patch adds a new feature for powerpc. In order to get faster
|
||||
access to the HWCAP3/HWCAP4 masks, similar to HWCAP/HWCAP2 (i.e. for
|
||||
implementing __builtin_cpu_supports() in GCC) without the overhead of
|
||||
reading them from the auxiliary vector, we now reserve space for them
|
||||
in the TCB.
|
||||
|
||||
Suggested-by: Peter Bergner <bergner@linux.ibm.com>
|
||||
Reviewed-by: Peter Bergner <bergner@linux.ibm.com>
|
||||
(cherry picked from commit 3ab9b88e2ac91062b6d493fe32bd101a55006c6a)
|
||||
|
||||
diff --git a/elf/dl-diagnostics.c b/elf/dl-diagnostics.c
|
||||
index 7345ebc4e586883f..aaf67b87e81b04c8 100644
|
||||
--- a/elf/dl-diagnostics.c
|
||||
+++ b/elf/dl-diagnostics.c
|
||||
@@ -235,6 +235,8 @@ _dl_print_diagnostics (char **environ)
|
||||
_dl_diagnostics_print_labeled_value ("dl_hwcap", GLRO (dl_hwcap));
|
||||
_dl_diagnostics_print_labeled_value ("dl_hwcap_important", HWCAP_IMPORTANT);
|
||||
_dl_diagnostics_print_labeled_value ("dl_hwcap2", GLRO (dl_hwcap2));
|
||||
+ _dl_diagnostics_print_labeled_value ("dl_hwcap3", GLRO (dl_hwcap3));
|
||||
+ _dl_diagnostics_print_labeled_value ("dl_hwcap4", GLRO (dl_hwcap4));
|
||||
_dl_diagnostics_print_labeled_string
|
||||
("dl_hwcaps_subdirs", _dl_hwcaps_subdirs);
|
||||
_dl_diagnostics_print_labeled_value
|
||||
diff --git a/elf/dl-support.c b/elf/dl-support.c
|
||||
index 2f502c8b0d27b784..451932dd03e971b8 100644
|
||||
--- a/elf/dl-support.c
|
||||
+++ b/elf/dl-support.c
|
||||
@@ -158,6 +158,8 @@ const ElfW(Phdr) *_dl_phdr;
|
||||
size_t _dl_phnum;
|
||||
uint64_t _dl_hwcap;
|
||||
uint64_t _dl_hwcap2;
|
||||
+uint64_t _dl_hwcap3;
|
||||
+uint64_t _dl_hwcap4;
|
||||
|
||||
enum dso_sort_algorithm _dl_dso_sort_algo;
|
||||
|
||||
diff --git a/elf/elf.h b/elf/elf.h
|
||||
index 455731663c6ed339..1c394c64cd5c66ed 100644
|
||||
--- a/elf/elf.h
|
||||
+++ b/elf/elf.h
|
||||
@@ -1234,6 +1234,10 @@ typedef struct
|
||||
#define AT_RSEQ_FEATURE_SIZE 27 /* rseq supported feature size. */
|
||||
#define AT_RSEQ_ALIGN 28 /* rseq allocation alignment. */
|
||||
|
||||
+/* More machine-dependent hints about processor capabilities. */
|
||||
+#define AT_HWCAP3 29 /* extension of AT_HWCAP. */
|
||||
+#define AT_HWCAP4 30 /* extension of AT_HWCAP. */
|
||||
+
|
||||
#define AT_EXECFN 31 /* Filename of executable. */
|
||||
|
||||
/* Pointer to the global system page used for system calls and other
|
||||
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
|
||||
index 117c901ccc5c5f0b..50f58a60e3f02330 100644
|
||||
--- a/sysdeps/generic/ldsodefs.h
|
||||
+++ b/sysdeps/generic/ldsodefs.h
|
||||
@@ -646,6 +646,8 @@ struct rtld_global_ro
|
||||
/* Mask for more hardware capabilities that are available on some
|
||||
platforms. */
|
||||
EXTERN uint64_t _dl_hwcap2;
|
||||
+ EXTERN uint64_t _dl_hwcap3;
|
||||
+ EXTERN uint64_t _dl_hwcap4;
|
||||
|
||||
EXTERN enum dso_sort_algorithm _dl_dso_sort_algo;
|
||||
|
||||
diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
|
||||
index a76bb6e5b0895e3f..8cf00aa7e359bb6a 100644
|
||||
--- a/sysdeps/powerpc/dl-procinfo.c
|
||||
+++ b/sysdeps/powerpc/dl-procinfo.c
|
||||
@@ -38,6 +38,10 @@
|
||||
needed.
|
||||
*/
|
||||
|
||||
+/* The total number of available bits (including those prior to
|
||||
+ _DL_HWCAP_FIRST). Some of these bits might not be used. */
|
||||
+#define _DL_HWCAP_COUNT 128
|
||||
+
|
||||
#ifndef PROCINFO_CLASS
|
||||
# define PROCINFO_CLASS
|
||||
#endif
|
||||
@@ -61,7 +65,7 @@ PROCINFO_CLASS struct cpu_features _dl_powerpc_cpu_features
|
||||
#if !defined PROCINFO_DECL && defined SHARED
|
||||
._dl_powerpc_cap_flags
|
||||
#else
|
||||
-PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15]
|
||||
+PROCINFO_CLASS const char _dl_powerpc_cap_flags[_DL_HWCAP_COUNT][15]
|
||||
#endif
|
||||
#ifndef PROCINFO_DECL
|
||||
= {
|
||||
diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h
|
||||
index 68f424109501aaef..f8cb343877386402 100644
|
||||
--- a/sysdeps/powerpc/dl-procinfo.h
|
||||
+++ b/sysdeps/powerpc/dl-procinfo.h
|
||||
@@ -22,16 +22,17 @@
|
||||
#include <ldsodefs.h>
|
||||
#include <sysdep.h> /* This defines the PPC_FEATURE[2]_* macros. */
|
||||
|
||||
-/* The total number of available bits (including those prior to
|
||||
- _DL_HWCAP_FIRST). Some of these bits might not be used. */
|
||||
-#define _DL_HWCAP_COUNT 64
|
||||
+/* Feature masks are all 32-bits in size. */
|
||||
+#define _DL_HWCAP_SIZE 32
|
||||
|
||||
-/* Features started at bit 31 and decremented as new features were added. */
|
||||
-#define _DL_HWCAP_LAST 31
|
||||
+/* AT_HWCAP2 feature strings follow the AT_HWCAP feature strings. */
|
||||
+#define _DL_HWCAP2_OFFSET _DL_HWCAP_SIZE
|
||||
|
||||
-/* AT_HWCAP2 features started at bit 31 and decremented as new features were
|
||||
- added. HWCAP2 feature bits start at bit 0. */
|
||||
-#define _DL_HWCAP2_LAST 31
|
||||
+/* AT_HWCAP3 feature strings follow the AT_HWCAP2 feature strings. */
|
||||
+#define _DL_HWCAP3_OFFSET (_DL_HWCAP2_OFFSET + _DL_HWCAP_SIZE)
|
||||
+
|
||||
+/* AT_HWCAP4 feature strings follow the AT_HWCAP3 feature strings. */
|
||||
+#define _DL_HWCAP4_OFFSET (_DL_HWCAP3_OFFSET + _DL_HWCAP_SIZE)
|
||||
|
||||
/* These bits influence library search. */
|
||||
#define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \
|
||||
@@ -187,21 +188,42 @@ _dl_procinfo (unsigned int type, unsigned long int word)
|
||||
case AT_HWCAP:
|
||||
_dl_printf ("AT_HWCAP: ");
|
||||
|
||||
- for (int i = 0; i <= _DL_HWCAP_LAST; ++i)
|
||||
+ for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
|
||||
if (word & (1 << i))
|
||||
_dl_printf (" %s", _dl_hwcap_string (i));
|
||||
break;
|
||||
case AT_HWCAP2:
|
||||
{
|
||||
- unsigned int offset = _DL_HWCAP_LAST + 1;
|
||||
|
||||
_dl_printf ("AT_HWCAP2: ");
|
||||
|
||||
- /* We have to go through them all because the kernel added the
|
||||
- AT_HWCAP2 features starting with the high bits. */
|
||||
- for (int i = 0; i <= _DL_HWCAP2_LAST; ++i)
|
||||
- if (word & (1 << i))
|
||||
- _dl_printf (" %s", _dl_hwcap_string (offset + i));
|
||||
+ /* We have to go through them all because the kernel added the
|
||||
+ AT_HWCAP2 features starting with the high bits. */
|
||||
+ for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
|
||||
+ if (word & (1 << i))
|
||||
+ _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP2_OFFSET + i));
|
||||
+ break;
|
||||
+ }
|
||||
+ case AT_HWCAP3:
|
||||
+ {
|
||||
+ _dl_printf ("AT_HWCAP3: ");
|
||||
+
|
||||
+ /* We have to go through them all because the kernel added the
|
||||
+ AT_HWCAP3 features starting with the high bits. */
|
||||
+ for (int i = 0; i < _DL_HWCAP_SIZE; ++i)
|
||||
+ if (word & (1 << i))
|
||||
+ _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP3_OFFSET + i));
|
||||
+ break;
|
||||
+ }
|
||||
+ case AT_HWCAP4:
|
||||
+ {
|
||||
+ _dl_printf ("AT_HWCAP4: ");
|
||||
+
|
||||
+ /* We have to go through them all because the kernel added the
|
||||
+ AT_HWCAP4 features starting with the high bits. */
|
||||
+ for (int i = 0; i <= _DL_HWCAP_SIZE; ++i)
|
||||
+ if (word & (1 << i))
|
||||
+ _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP4_OFFSET + i));
|
||||
break;
|
||||
}
|
||||
case AT_L1I_CACHEGEOMETRY:
|
||||
diff --git a/sysdeps/powerpc/hwcapinfo.c b/sysdeps/powerpc/hwcapinfo.c
|
||||
index 76344f285a903858..f6fede15a7dfbf6c 100644
|
||||
--- a/sysdeps/powerpc/hwcapinfo.c
|
||||
+++ b/sysdeps/powerpc/hwcapinfo.c
|
||||
@@ -31,7 +31,7 @@ void
|
||||
__tcb_parse_hwcap_and_convert_at_platform (void)
|
||||
{
|
||||
|
||||
- uint64_t h1, h2;
|
||||
+ uint64_t h1, h2, h3, h4;
|
||||
|
||||
/* Read AT_PLATFORM string from auxv and convert it to a number. */
|
||||
__tcb.at_platform = _dl_string_platform (GLRO (dl_platform));
|
||||
@@ -39,6 +39,8 @@ __tcb_parse_hwcap_and_convert_at_platform (void)
|
||||
/* Read HWCAP and HWCAP2 from auxv. */
|
||||
h1 = GLRO (dl_hwcap);
|
||||
h2 = GLRO (dl_hwcap2);
|
||||
+ h3 = GLRO (dl_hwcap3);
|
||||
+ h4 = GLRO (dl_hwcap4);
|
||||
|
||||
/* hwcap contains only the latest supported ISA, the code checks which is
|
||||
and fills the previous supported ones. */
|
||||
@@ -64,13 +66,16 @@ __tcb_parse_hwcap_and_convert_at_platform (void)
|
||||
else if (h1 & PPC_FEATURE_POWER5)
|
||||
h1 |= PPC_FEATURE_POWER4;
|
||||
|
||||
- uint64_t array_hwcaps[] = { h1, h2 };
|
||||
+ uint64_t array_hwcaps[] = { h1, h2, h3, h4 };
|
||||
init_cpu_features (&GLRO(dl_powerpc_cpu_features), array_hwcaps);
|
||||
|
||||
/* Consolidate both HWCAP and HWCAP2 into a single doubleword so that
|
||||
we can read both in a single load later. */
|
||||
__tcb.hwcap = (h1 << 32) | (h2 & 0xffffffff);
|
||||
- __tcb.hwcap_extn = 0x0;
|
||||
+
|
||||
+ /* Consolidate both HWCAP3 and HWCAP4 into a single doubleword so that
|
||||
+ we can read both in a single load later. */
|
||||
+ __tcb.hwcap_extn = (h3 << 32) | (h4 & 0xffffffff);
|
||||
|
||||
}
|
||||
#if IS_IN (rtld)
|
||||
diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h
|
||||
index e3d758b163c619df..ea2a58ecb1668774 100644
|
||||
--- a/sysdeps/unix/sysv/linux/dl-parse_auxv.h
|
||||
+++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h
|
||||
@@ -47,6 +47,8 @@ void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values)
|
||||
GLRO(dl_platform) = (void *) auxv_values[AT_PLATFORM];
|
||||
GLRO(dl_hwcap) = auxv_values[AT_HWCAP];
|
||||
GLRO(dl_hwcap2) = auxv_values[AT_HWCAP2];
|
||||
+ GLRO(dl_hwcap3) = auxv_values[AT_HWCAP3];
|
||||
+ GLRO(dl_hwcap4) = auxv_values[AT_HWCAP4];
|
||||
GLRO(dl_clktck) = auxv_values[AT_CLKTCK];
|
||||
GLRO(dl_fpu_control) = auxv_values[AT_FPUCW];
|
||||
_dl_random = (void *) auxv_values[AT_RANDOM];
|
||||
diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c
|
||||
index ad3692d73839d7a3..e1b14e9eb34ff5cb 100644
|
||||
--- a/sysdeps/unix/sysv/linux/dl-sysdep.c
|
||||
+++ b/sysdeps/unix/sysv/linux/dl-sysdep.c
|
||||
@@ -197,6 +197,8 @@ _dl_show_auxv (void)
|
||||
[AT_SYSINFO_EHDR - 2] = { "SYSINFO_EHDR: 0x", hex },
|
||||
[AT_RANDOM - 2] = { "RANDOM: 0x", hex },
|
||||
[AT_HWCAP2 - 2] = { "HWCAP2: 0x", hex },
|
||||
+ [AT_HWCAP3 - 2] = { "HWCAP3: 0x", hex },
|
||||
+ [AT_HWCAP4 - 2] = { "HWCAP4: 0x", hex },
|
||||
[AT_MINSIGSTKSZ - 2] = { "MINSIGSTKSZ: ", dec },
|
||||
[AT_L1I_CACHESIZE - 2] = { "L1I_CACHESIZE: ", dec },
|
||||
[AT_L1I_CACHEGEOMETRY - 2] = { "L1I_CACHEGEOMETRY: 0x", hex },
|
||||
diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
|
||||
index 8e8a5ec2eab7e8c6..a947d62db63965b1 100644
|
||||
--- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
|
||||
+++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c
|
||||
@@ -94,6 +94,8 @@ init_cpu_features (struct cpu_features *cpu_features, uint64_t hwcaps[])
|
||||
which are set by __tcb_parse_hwcap_and_convert_at_platform. */
|
||||
cpu_features->hwcap = hwcaps[0];
|
||||
cpu_features->hwcap2 = hwcaps[1];
|
||||
+ cpu_features->hwcap3 = hwcaps[2];
|
||||
+ cpu_features->hwcap4 = hwcaps[3];
|
||||
/* Default is to use aligned memory access on optimized function unless
|
||||
tunables is enable, since for this case user can explicit disable
|
||||
unaligned optimizations. */
|
||||
diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
|
||||
index 1294f0b601ebf54f..e9eb6a13c8ab11d7 100644
|
||||
--- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
|
||||
+++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h
|
||||
@@ -26,6 +26,8 @@ struct cpu_features
|
||||
bool use_cached_memopt;
|
||||
unsigned long int hwcap;
|
||||
unsigned long int hwcap2;
|
||||
+ unsigned long int hwcap3;
|
||||
+ unsigned long int hwcap4;
|
||||
};
|
||||
|
||||
static const char hwcap_names[] = {
|
||||
diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
|
||||
index a4705daf1cdea4de..6a00cd88cd64b992 100644
|
||||
--- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c
|
||||
+++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
|
||||
@@ -87,6 +87,12 @@ __libc_start_main_impl (int argc, char **argv,
|
||||
case AT_HWCAP2:
|
||||
_dl_hwcap2 = (unsigned long int) av->a_un.a_val;
|
||||
break;
|
||||
+ case AT_HWCAP3:
|
||||
+ _dl_hwcap3 = (unsigned long int) av->a_un.a_val;
|
||||
+ break;
|
||||
+ case AT_HWCAP4:
|
||||
+ _dl_hwcap4 = (unsigned long int) av->a_un.a_val;
|
||||
+ break;
|
||||
case AT_PLATFORM:
|
||||
_dl_platform = (void *) av->a_un.a_val;
|
||||
break;
|
172
glibc-upstream-2.39-9.patch
Normal file
172
glibc-upstream-2.39-9.patch
Normal file
@ -0,0 +1,172 @@
|
||||
commit aad45c8ac30aa1072e54903ce6aead22702f244a
|
||||
Author: Amrita H S <amritahs@linux.ibm.com>
|
||||
Date: Tue Mar 19 19:08:47 2024 -0500
|
||||
|
||||
powerpc: Placeholder and infrastructure/build support to add Power11 related changes.
|
||||
|
||||
The following three changes have been added to provide initial Power11 support.
|
||||
1. Add the directories to hold Power11 files.
|
||||
2. Add support to select Power11 libraries based on AT_PLATFORM.
|
||||
3. Let submachine=power11 be set automatically.
|
||||
|
||||
Reviewed-by: Florian Weimer <fweimer@redhat.com>
|
||||
Reviewed-by: Peter Bergner <bergner@linux.ibm.com>
|
||||
(cherry picked from commit 1ea051145612f199d8716ecdf78b084b00b5a727)
|
||||
|
||||
diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h
|
||||
index f8cb343877386402..b36697ba440654be 100644
|
||||
--- a/sysdeps/powerpc/dl-procinfo.h
|
||||
+++ b/sysdeps/powerpc/dl-procinfo.h
|
||||
@@ -38,7 +38,7 @@
|
||||
#define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \
|
||||
+ PPC_FEATURE_HAS_DFP)
|
||||
|
||||
-#define _DL_PLATFORMS_COUNT 16
|
||||
+#define _DL_PLATFORMS_COUNT 17
|
||||
|
||||
#define _DL_FIRST_PLATFORM 32
|
||||
/* Mask to filter out platforms. */
|
||||
@@ -62,6 +62,7 @@
|
||||
#define PPC_PLATFORM_POWER8 13
|
||||
#define PPC_PLATFORM_POWER9 14
|
||||
#define PPC_PLATFORM_POWER10 15
|
||||
+#define PPC_PLATFORM_POWER11 16
|
||||
|
||||
static inline const char *
|
||||
__attribute__ ((unused))
|
||||
@@ -89,6 +90,11 @@ _dl_string_platform (const char *str)
|
||||
ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER10;
|
||||
str++;
|
||||
}
|
||||
+ else if (str[1] == '1')
|
||||
+ {
|
||||
+ ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER11;
|
||||
+ str++;
|
||||
+ }
|
||||
else
|
||||
return -1;
|
||||
break;
|
||||
diff --git a/sysdeps/powerpc/powerpc32/power11/Implies b/sysdeps/powerpc/powerpc32/power11/Implies
|
||||
new file mode 100644
|
||||
index 0000000000000000..051cbe0f7911c93c
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/powerpc/powerpc32/power11/Implies
|
||||
@@ -0,0 +1,2 @@
|
||||
+powerpc/powerpc32/power10/fpu
|
||||
+powerpc/powerpc32/power10
|
||||
diff --git a/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies
|
||||
new file mode 100644
|
||||
index 0000000000000000..58edb2861d17f504
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies
|
||||
@@ -0,0 +1 @@
|
||||
+powerpc/powerpc32/power10/fpu/multiarch
|
||||
diff --git a/sysdeps/powerpc/powerpc32/power11/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies
|
||||
new file mode 100644
|
||||
index 0000000000000000..c70f0428badbaf14
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies
|
||||
@@ -0,0 +1 @@
|
||||
+powerpc/powerpc32/power10/multiarch
|
||||
diff --git a/sysdeps/powerpc/powerpc64/be/power11/Implies b/sysdeps/powerpc/powerpc64/be/power11/Implies
|
||||
new file mode 100644
|
||||
index 0000000000000000..de481d1c13db695e
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/powerpc/powerpc64/be/power11/Implies
|
||||
@@ -0,0 +1,2 @@
|
||||
+powerpc/powerpc64/be/power10/fpu
|
||||
+powerpc/powerpc64/be/power10
|
||||
diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies
|
||||
new file mode 100644
|
||||
index 0000000000000000..dff0e13064ce8238
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies
|
||||
@@ -0,0 +1 @@
|
||||
+powerpc/powerpc64/be/power10/fpu
|
||||
diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies
|
||||
new file mode 100644
|
||||
index 0000000000000000..c3f259e0097386a5
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies
|
||||
@@ -0,0 +1 @@
|
||||
+powerpc/powerpc64/be/power10/fpu/multiarch
|
||||
diff --git a/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies
|
||||
new file mode 100644
|
||||
index 0000000000000000..9491a394c9519d01
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies
|
||||
@@ -0,0 +1 @@
|
||||
+powerpc/powerpc64/be/power10/multiarch
|
||||
diff --git a/sysdeps/powerpc/powerpc64/le/power11/Implies b/sysdeps/powerpc/powerpc64/le/power11/Implies
|
||||
new file mode 100644
|
||||
index 0000000000000000..e18182dcc1f4c25f
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/powerpc/powerpc64/le/power11/Implies
|
||||
@@ -0,0 +1,2 @@
|
||||
+powerpc/powerpc64/le/power10/fpu
|
||||
+powerpc/powerpc64/le/power10
|
||||
diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies
|
||||
new file mode 100644
|
||||
index 0000000000000000..e41bd55684dbb5b8
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies
|
||||
@@ -0,0 +1 @@
|
||||
+powerpc/powerpc64/le/power10/fpu
|
||||
diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies
|
||||
new file mode 100644
|
||||
index 0000000000000000..c838d5093140eae3
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies
|
||||
@@ -0,0 +1 @@
|
||||
+powerpc/powerpc64/le/power10/fpu/multiarch
|
||||
diff --git a/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies
|
||||
new file mode 100644
|
||||
index 0000000000000000..687248c3c267cd8c
|
||||
--- /dev/null
|
||||
+++ b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies
|
||||
@@ -0,0 +1 @@
|
||||
+powerpc/powerpc64/le/power10/multiarch
|
||||
diff --git a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
|
||||
index 77465d9133410267..65d3e69303a1c963 100644
|
||||
--- a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
|
||||
+++ b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
|
||||
@@ -36,9 +36,11 @@ compute_level (void)
|
||||
return 9;
|
||||
if (strcmp (platform, "power10") == 0)
|
||||
return 10;
|
||||
+ if (strcmp (platform, "power11") == 0)
|
||||
+ return 11;
|
||||
printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform);
|
||||
- /* Assume that the new platform supports POWER10. */
|
||||
- return 10;
|
||||
+ /* Assume that the new platform supports POWER11. */
|
||||
+ return 11;
|
||||
}
|
||||
|
||||
static int
|
||||
diff --git a/sysdeps/powerpc/preconfigure b/sysdeps/powerpc/preconfigure
|
||||
index 4de94089a3f68532..9e5a07ab6d6767cd 100644
|
||||
--- a/sysdeps/powerpc/preconfigure
|
||||
+++ b/sysdeps/powerpc/preconfigure
|
||||
@@ -58,7 +58,7 @@ fi
|
||||
|
||||
;;
|
||||
|
||||
- a2|970|power[4-9]|power5x|power6+|power10)
|
||||
+ a2|970|power[4-9]|power5x|power6+|power10|power11)
|
||||
submachine=${archcpu}
|
||||
if test ${libc_cv_cc_submachine+y}
|
||||
then :
|
||||
diff --git a/sysdeps/powerpc/preconfigure.ac b/sysdeps/powerpc/preconfigure.ac
|
||||
index 6c63bd8257b7e40a..14b6dafd4a895c3b 100644
|
||||
--- a/sysdeps/powerpc/preconfigure.ac
|
||||
+++ b/sysdeps/powerpc/preconfigure.ac
|
||||
@@ -46,7 +46,7 @@ case "${machine}:${submachine}" in
|
||||
AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="")
|
||||
;;
|
||||
|
||||
- a2|970|power[[4-9]]|power5x|power6+|power10)
|
||||
+ a2|970|power[[4-9]]|power5x|power6+|power10|power11)
|
||||
submachine=${archcpu}
|
||||
AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="")
|
||||
;;
|
34
glibc.spec
34
glibc.spec
@ -171,7 +171,7 @@ Version: %{glibcversion}
|
||||
# - It allows using the Release number without the %%dist tag in the dependency
|
||||
# generator to make the generated requires interchangeable between Rawhide
|
||||
# and ELN (.elnYY < .fcXX).
|
||||
%global baserelease 6
|
||||
%global baserelease 7
|
||||
Release: %{baserelease}%{?dist}
|
||||
|
||||
# Licenses:
|
||||
@ -288,6 +288,20 @@ Patch27: glibc-upstream-2.39-4.patch
|
||||
Patch28: glibc-upstream-2.39-5.patch
|
||||
Patch29: glibc-upstream-2.39-6.patch
|
||||
Patch30: glibc-upstream-2.39-7.patch
|
||||
Patch31: glibc-upstream-2.39-8.patch
|
||||
Patch32: glibc-upstream-2.39-9.patch
|
||||
Patch33: glibc-upstream-2.39-10.patch
|
||||
Patch34: glibc-upstream-2.39-11.patch
|
||||
Patch35: glibc-upstream-2.39-12.patch
|
||||
Patch36: glibc-upstream-2.39-13.patch
|
||||
Patch37: glibc-upstream-2.39-14.patch
|
||||
Patch38: glibc-upstream-2.39-15.patch
|
||||
Patch39: glibc-upstream-2.39-16.patch
|
||||
Patch40: glibc-upstream-2.39-17.patch
|
||||
Patch41: glibc-upstream-2.39-18.patch
|
||||
Patch42: glibc-upstream-2.39-19.patch
|
||||
Patch43: glibc-upstream-2.39-20.patch
|
||||
Patch44: glibc-upstream-2.39-21.patch
|
||||
|
||||
##############################################################################
|
||||
# Continued list of core "glibc" package information:
|
||||
@ -2464,6 +2478,24 @@ update_gconv_modules_cache ()
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Thu Apr 04 2024 Arjun Shankar <arjun@redhat.com> - 2.39-7
|
||||
- Sync with upstream branch release/2.39/master,
|
||||
commit 5d070d12b3a52bc44dd1b71743abc4b6243862ae:
|
||||
- x86: Expand the comment on when REP STOSB is used on memset
|
||||
- x86: Do not prefer ERMS for memset on Zen3+
|
||||
- x86: Fix Zen3/Zen4 ERMS selection (BZ 30994)
|
||||
- Add tst-gnu2-tls2mod1 to test-internal-extras
|
||||
- elf: Enable TLS descriptor tests on aarch64
|
||||
- arm: Update _dl_tlsdesc_dynamic to preserve caller-saved registers (BZ 31372)
|
||||
- Ignore undefined symbols for -mtls-dialect=gnu2
|
||||
- x86-64: Allocate state buffer space for RDI, RSI and RBX
|
||||
- x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers
|
||||
- x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registers
|
||||
- x86-64: Save APX registers in ld.so trampoline
|
||||
- LoongArch: Correct {__ieee754, _}_scalb -> {__ieee754, _}_scalbf
|
||||
- powerpc: Placeholder and infrastructure/build support to add Power11 related changes.
|
||||
- powerpc: Add HWCAP3/HWCAP4 data to TCB for Power Architecture.
|
||||
|
||||
* Tue Mar 26 2024 Florian Weimer <fweimer@redhat.com> - 2.39-6
|
||||
- Do not generate ELF dependency information for glibc32
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user