Compare commits

...

2 Commits

Author SHA1 Message Date
DJ Delorie a7536148af x86-64: Save APX registers in ld.so trampoline 2024-04-09 01:53:29 +00:00
DJ Delorie 6dbf26d6f4 x86: Fix Zen3/Zen4 ERMS selection
Resolves: RHEL-25531
2024-04-02 22:40:56 -04:00
7 changed files with 507 additions and 1 deletions

1
.glibc.metadata Normal file
View File

@ -0,0 +1 @@
7c3b8890a6346793b6334cc5f2fea5d437d307b8 glibc-2.34.tar.xz

88
glibc-RHEL-25046.patch Normal file
View File

@ -0,0 +1,88 @@
From dfb05f8e704edac70db38c4c8ee700769d91a413 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 16 Feb 2024 07:17:10 -0800
Subject: [PATCH] x86-64: Save APX registers in ld.so trampoline
Content-type: text/plain; charset=UTF-8
Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
ld.so trampoline. This fixes BZ #31371.
Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
be used by i386 _dl_tlsdesc_dynamic.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Conflicts:
sysdeps/x86/sysdep.h
(rebased for context and line numbers)
---
sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++-----
1 file changed, 46 insertions(+), 6 deletions(-)
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index 85d0a8c943..837fd28734 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -48,14 +48,54 @@
# define SHSTK_ENABLED 0
#endif
+/* The extended state feature IDs in the state component bitmap. */
+#define X86_XSTATE_X87_ID 0
+#define X86_XSTATE_SSE_ID 1
+#define X86_XSTATE_AVX_ID 2
+#define X86_XSTATE_BNDREGS_ID 3
+#define X86_XSTATE_BNDCFG_ID 4
+#define X86_XSTATE_K_ID 5
+#define X86_XSTATE_ZMM_H_ID 6
+#define X86_XSTATE_ZMM_ID 7
+#define X86_XSTATE_PKRU_ID 9
+#define X86_XSTATE_TILECFG_ID 17
+#define X86_XSTATE_TILEDATA_ID 18
+#define X86_XSTATE_APX_F_ID 19
+
+#ifdef __x86_64__
/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
- aligned to 16 bytes for fxsave and 64 bytes for xsave. */
-#define STATE_SAVE_OFFSET (8 * 7 + 8)
-
-/* Save SSE, AVX, AVX512, mask and bound registers. */
-#define STATE_SAVE_MASK \
- ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
+ aligned to 16 bytes for fxsave and 64 bytes for xsave.
+
+ NB: Is is non-zero because of the 128-byte red-zone. Some registers
+ are saved on stack without adjusting stack pointer first. When we
+ update stack pointer to allocate more space, we need to take the
+ red-zone into account. */
+# define STATE_SAVE_OFFSET (8 * 7 + 8)
+
+/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
+ registers are mutually exclusive. */
+# define STATE_SAVE_MASK \
+ ((1 << X86_XSTATE_SSE_ID) \
+ | (1 << X86_XSTATE_AVX_ID) \
+ | (1 << X86_XSTATE_BNDREGS_ID) \
+ | (1 << X86_XSTATE_K_ID) \
+ | (1 << X86_XSTATE_ZMM_H_ID) \
+ | (1 << X86_XSTATE_ZMM_ID) \
+ | (1 << X86_XSTATE_APX_F_ID))
+#else
+/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
+ doesn't have red-zone, use 0 here. */
+# define STATE_SAVE_OFFSET 0
+
+/* Save SSE, AVX, AXV512, mask and bound registers. */
+# define STATE_SAVE_MASK \
+ ((1 << X86_XSTATE_SSE_ID) \
+ | (1 << X86_XSTATE_AVX_ID) \
+ | (1 << X86_XSTATE_BNDREGS_ID) \
+ | (1 << X86_XSTATE_K_ID) \
+ | (1 << X86_XSTATE_ZMM_H_ID))
+#endif
/* Constants for bits in __x86_string_control: */
--
2.39.3

188
glibc-RHEL-25531-1.patch Normal file
View File

@ -0,0 +1,188 @@
From a4c3f5f46e850c977cda81c251036475aab8313c Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu, 23 Nov 2023 14:29:14 -0300
Subject: [PATCH] elf: Add a way to check if tunable is set (BZ 27069)
Content-type: text/plain; charset=UTF-8
The patch adds two new macros, TUNABLE_GET_DEFAULT and TUNABLE_IS_INITIALIZED,
here the former get the default value with a signature similar to
TUNABLE_GET, while the later returns whether the tunable was set by
the environment variable.
Checked on x86_64-linux-gnu.
Reviewed-by: DJ Delorie <dj@redhat.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Conflicts:
elf/Versions
(removed to preserve ABI)
elf/dl-tunable-types.h
(line numbers)
scripts/gen-tunables.awk
(account for missing TUNABLE_SECLEVEL patch)
---
elf/dl-tunable-types.h | 1 +
elf/dl-tunables.c | 40 ++++++++++++++++++++++++++++++++++++++++
elf/dl-tunables.h | 28 ++++++++++++++++++++++++++++
elf/dl-tunables.list | 1 +
scripts/gen-tunables.awk | 4 ++--
6 files changed, 73 insertions(+), 2 deletions(-)
diff -rup a/elf/dl-tunable-types.h b/elf/dl-tunable-types.h
--- a/elf/dl-tunable-types.h 2021-08-01 21:33:43.000000000 -0400
+++ b/elf/dl-tunable-types.h 2024-03-26 18:23:22.211504813 -0400
@@ -61,6 +61,7 @@ struct _tunable
{
const char name[TUNABLE_NAME_MAX]; /* Internal name of the tunable. */
tunable_type_t type; /* Data type of the tunable. */
+ const tunable_val_t def; /* The value. */
tunable_val_t val; /* The value. */
bool initialized; /* Flag to indicate that the tunable is
initialized. */
diff -rup a/elf/dl-tunables.c b/elf/dl-tunables.c
--- a/elf/dl-tunables.c 2024-03-26 18:21:10.090681748 -0400
+++ b/elf/dl-tunables.c 2024-03-26 18:23:22.214504923 -0400
@@ -152,6 +152,13 @@ tunable_initialize (tunable_t *cur, cons
do_tunable_update_val (cur, &val, NULL, NULL);
}
+bool
+__tunable_is_initialized (tunable_id_t id)
+{
+ return tunable_list[id].initialized;
+}
+rtld_hidden_def (__tunable_is_initialized)
+
void
__tunable_set_val (tunable_id_t id, tunable_val_t *valp, tunable_num_t *minp,
tunable_num_t *maxp)
@@ -399,6 +406,39 @@ __tunables_print (void)
}
}
+void
+__tunable_get_default (tunable_id_t id, void *valp)
+{
+ tunable_t *cur = &tunable_list[id];
+
+ switch (cur->type.type_code)
+ {
+ case TUNABLE_TYPE_UINT_64:
+ {
+ *((uint64_t *) valp) = (uint64_t) cur->def.numval;
+ break;
+ }
+ case TUNABLE_TYPE_INT_32:
+ {
+ *((int32_t *) valp) = (int32_t) cur->def.numval;
+ break;
+ }
+ case TUNABLE_TYPE_SIZE_T:
+ {
+ *((size_t *) valp) = (size_t) cur->def.numval;
+ break;
+ }
+ case TUNABLE_TYPE_STRING:
+ {
+ *((const char **)valp) = cur->def.strval;
+ break;
+ }
+ default:
+ __builtin_unreachable ();
+ }
+}
+rtld_hidden_def (__tunable_get_default)
+
/* Set the tunable value. This is called by the module that the tunable exists
in. */
void
diff -rup a/elf/dl-tunables.h b/elf/dl-tunables.h
--- a/elf/dl-tunables.h 2021-08-01 21:33:43.000000000 -0400
+++ b/elf/dl-tunables.h 2024-03-26 18:23:22.217505032 -0400
@@ -53,18 +53,26 @@ typedef void (*tunable_callback_t) (tuna
extern void __tunables_init (char **);
extern void __tunables_print (void);
+extern bool __tunable_is_initialized (tunable_id_t);
extern void __tunable_get_val (tunable_id_t, void *, tunable_callback_t);
extern void __tunable_set_val (tunable_id_t, tunable_val_t *, tunable_num_t *,
tunable_num_t *);
+extern void __tunable_get_default (tunable_id_t id, void *valp);
rtld_hidden_proto (__tunables_init)
rtld_hidden_proto (__tunables_print)
+rtld_hidden_proto (__tunable_is_initialized)
rtld_hidden_proto (__tunable_get_val)
rtld_hidden_proto (__tunable_set_val)
+rtld_hidden_proto (__tunable_get_default)
/* Define TUNABLE_GET and TUNABLE_SET in short form if TOP_NAMESPACE and
TUNABLE_NAMESPACE are defined. This is useful shorthand to get and set
tunables within a module. */
#if defined TOP_NAMESPACE && defined TUNABLE_NAMESPACE
+# define TUNABLE_IS_INITIALIZED(__id) \
+ TUNABLE_IS_INITIALIZED_FULL(TOP_NAMESPACE, TUNABLE_NAMESPACE, __id)
+# define TUNABLE_GET_DEFAULT(__id, __type) \
+ TUNABLE_GET_DEFAULT_FULL(TOP_NAMESPACE, TUNABLE_NAMESPACE,__id, __type)
# define TUNABLE_GET(__id, __type, __cb) \
TUNABLE_GET_FULL (TOP_NAMESPACE, TUNABLE_NAMESPACE, __id, __type, __cb)
# define TUNABLE_SET(__id, __val) \
@@ -73,6 +81,10 @@ rtld_hidden_proto (__tunable_set_val)
TUNABLE_SET_WITH_BOUNDS_FULL (TOP_NAMESPACE, TUNABLE_NAMESPACE, __id, \
__val, __min, __max)
#else
+# define TUNABLE_IS_INITIALIZED(__top, __ns, __id) \
+ TUNABLE_IS_INITIALIZED_FULL(__top, __ns, __id)
+# define TUNABLE_GET_DEFAULT(__top, __ns, __type) \
+ TUNABLE_GET_DEFAULT_FULL(__top, __ns, __id, __type)
# define TUNABLE_GET(__top, __ns, __id, __type, __cb) \
TUNABLE_GET_FULL (__top, __ns, __id, __type, __cb)
# define TUNABLE_SET(__top, __ns, __id, __val) \
@@ -81,6 +93,22 @@ rtld_hidden_proto (__tunable_set_val)
TUNABLE_SET_WITH_BOUNDS_FULL (__top, __ns, __id, __val, __min, __max)
#endif
+/* Return whether the tunable was initialized by the environment variable. */
+#define TUNABLE_IS_INITIALIZED_FULL(__top, __ns, __id) \
+({ \
+ tunable_id_t id = TUNABLE_ENUM_NAME (__top, __ns, __id); \
+ __tunable_is_initialized (id); \
+})
+
+/* Return the default value of the tunable. */
+#define TUNABLE_GET_DEFAULT_FULL(__top, __ns, __id, __type) \
+({ \
+ tunable_id_t id = TUNABLE_ENUM_NAME (__top, __ns, __id); \
+ __type __ret; \
+ __tunable_get_default (id, &__ret); \
+ __ret; \
+})
+
/* Get and return a tunable value. If the tunable was set externally and __CB
is defined then call __CB before returning the value. */
# define TUNABLE_GET_FULL(__top, __ns, __id, __type, __cb) \
diff -rup a/elf/dl-tunables.list b/elf/dl-tunables.list
--- a/elf/dl-tunables.list 2024-03-26 18:21:09.664666196 -0400
+++ b/elf/dl-tunables.list 2024-03-26 18:23:22.220505142 -0400
@@ -20,6 +20,7 @@
# type: Defaults to STRING
# minval: Optional minimum acceptable value
# maxval: Optional maximum acceptable value
+# default: Optional default value (if not specified it will be 0 or "")
# env_alias: An alias environment variable
# security_level: Specify security level of the tunable for AT_SECURE binaries.
# Valid values are:
diff -rup a/scripts/gen-tunables.awk b/scripts/gen-tunables.awk
--- a/scripts/gen-tunables.awk 2024-03-26 18:21:09.523661049 -0400
+++ b/scripts/gen-tunables.awk 2024-03-26 18:34:45.385462341 -0400
@@ -236,8 +236,8 @@ END {
n = indices[2];
m = indices[3];
printf (" {TUNABLE_NAME_S(%s, %s, %s)", t, n, m)
- printf (", {TUNABLE_TYPE_%s, %s, %s}, {%s}, NULL, TUNABLE_SECLEVEL_%s, %s},\n",
- types[t,n,m], minvals[t,n,m], maxvals[t,n,m],
+ printf (", {TUNABLE_TYPE_%s, %s, %s}, {%s}, {%s}, NULL, TUNABLE_SECLEVEL_%s, %s},\n",
+ types[t,n,m], minvals[t,n,m], maxvals[t,n,m],default_val[t,n,m],
default_val[t,n,m], security_level[t,n,m], env_alias[t,n,m]);
}
print "};"

155
glibc-RHEL-25531-2.patch Normal file
View File

@ -0,0 +1,155 @@
From 0c0d39fe4aeb0f69b26e76337c5dfd5530d5d44e Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu, 8 Feb 2024 10:08:38 -0300
Subject: [PATCH] x86: Fix Zen3/Zen4 ERMS selection (BZ 30994)
Content-type: text/plain; charset=UTF-8
The REP MOVSB usage on memcpy/memmove does not show much performance
improvement on Zen3/Zen4 cores compared to the vectorized loops. Also,
as from BZ 30994, if the source is aligned and the destination is not
the performance can be 20x slower.
The performance difference is noticeable with small buffer sizes, closer
to the lower bounds limits when memcpy/memmove starts to use ERMS. The
performance of REP MOVSB is similar to vectorized instruction on the
size limit (the L2 cache). Also, there is no drawback to multiple cores
sharing the cache.
Checked on x86_64-linux-gnu on Zen3.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
Conflicts:
sysdeps/x86/dl-cacheinfo.h
(tweaked for changed context)
---
sysdeps/x86/dl-cacheinfo.h | 38 ++++++++++++++++++--------------------
1 file changed, 18 insertions(+), 20 deletions(-)
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index d5101615e3..f34d12846c 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -791,7 +791,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
long int data = -1;
long int shared = -1;
long int shared_per_thread = -1;
- long int core = -1;
unsigned int threads = 0;
unsigned long int level1_icache_size = -1;
unsigned long int level1_icache_linesize = -1;
@@ -809,7 +808,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
if (cpu_features->basic.kind == arch_kind_intel)
{
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
- core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
shared_per_thread = shared;
@@ -822,7 +820,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
= handle_intel (_SC_LEVEL1_DCACHE_ASSOC, cpu_features);
level1_dcache_linesize
= handle_intel (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features);
- level2_cache_size = core;
+ level2_cache_size
+ = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
level2_cache_assoc
= handle_intel (_SC_LEVEL2_CACHE_ASSOC, cpu_features);
level2_cache_linesize
@@ -835,12 +834,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
level4_cache_size
= handle_intel (_SC_LEVEL4_CACHE_SIZE, cpu_features);
- get_common_cache_info (&shared, &shared_per_thread, &threads, core);
+ get_common_cache_info (&shared, &shared_per_thread, &threads,
+ level2_cache_size);
}
else if (cpu_features->basic.kind == arch_kind_zhaoxin)
{
data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
- core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
shared_per_thread = shared;
@@ -849,19 +848,19 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
level1_dcache_size = data;
level1_dcache_assoc = handle_zhaoxin (_SC_LEVEL1_DCACHE_ASSOC);
level1_dcache_linesize = handle_zhaoxin (_SC_LEVEL1_DCACHE_LINESIZE);
- level2_cache_size = core;
+ level2_cache_size = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
level2_cache_assoc = handle_zhaoxin (_SC_LEVEL2_CACHE_ASSOC);
level2_cache_linesize = handle_zhaoxin (_SC_LEVEL2_CACHE_LINESIZE);
level3_cache_size = shared;
level3_cache_assoc = handle_zhaoxin (_SC_LEVEL3_CACHE_ASSOC);
level3_cache_linesize = handle_zhaoxin (_SC_LEVEL3_CACHE_LINESIZE);
- get_common_cache_info (&shared, &shared_per_thread, &threads, core);
+ get_common_cache_info (&shared, &shared_per_thread, &threads,
+ level2_cache_size);
}
else if (cpu_features->basic.kind == arch_kind_amd)
{
data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
- core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
@@ -869,7 +868,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
level1_dcache_size = data;
level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC);
level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE);
- level2_cache_size = core;
+ level2_cache_size = handle_amd (_SC_LEVEL2_CACHE_SIZE);;
level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC);
level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE);
level3_cache_size = shared;
@@ -880,12 +879,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
if (shared <= 0)
{
/* No shared L3 cache. All we have is the L2 cache. */
- shared = core;
+ shared = level2_cache_size;
}
else if (cpu_features->basic.family < 0x17)
{
/* Account for exclusive L2 and L3 caches. */
- shared += core;
+ shared += level2_cache_size;
}
shared_per_thread = shared;
@@ -987,6 +986,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
rep_movsb_threshold = 2112;
+ /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of
+ cases slower than the vectorized path (and for some alignments,
+ it is really slow, check BZ #30994). */
+ if (cpu_features->basic.kind == arch_kind_amd)
+ rep_movsb_threshold = non_temporal_threshold;
+
/* The default threshold to use Enhanced REP STOSB. */
unsigned long int rep_stosb_threshold = 2048;
@@ -1028,15 +1033,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
#endif
unsigned long int rep_movsb_stop_threshold;
- /* ERMS feature is implemented from AMD Zen3 architecture and it is
- performing poorly for data above L2 cache size. Henceforth, adding
- an upper bound threshold parameter to limit the usage of Enhanced
- REP MOVSB operations and setting its value to L2 cache size. */
- if (cpu_features->basic.kind == arch_kind_amd)
- rep_movsb_stop_threshold = core;
/* Setting the upper bound of ERMS to the computed value of
- non-temporal threshold for architectures other than AMD. */
- else
- rep_movsb_stop_threshold = non_temporal_threshold;
+ non-temporal threshold for all architectures. */
+ rep_movsb_stop_threshold = non_temporal_threshold;
cpu_features->data_cache_size = data;
cpu_features->shared_cache_size = shared;
--
2.39.3

34
glibc-RHEL-25531-3.patch Normal file
View File

@ -0,0 +1,34 @@
From 272708884cb750f12f5c74a00e6620c19dc6d567 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu, 8 Feb 2024 10:08:39 -0300
Subject: [PATCH] x86: Do not prefer ERMS for memset on Zen3+
Content-type: text/plain; charset=UTF-8
For AMD Zen3+ architecture, the performance of the vectorized loop is
slightly better than ERMS.
Checked on x86_64-linux-gnu on Zen3.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
sysdeps/x86/dl-cacheinfo.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
index f34d12846c..5a98f70364 100644
--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
@@ -1021,6 +1021,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
minimum value is fixed. */
rep_stosb_threshold = TUNABLE_GET (x86_rep_stosb_threshold,
long int, NULL);
+ if (cpu_features->basic.kind == arch_kind_amd
+ && !TUNABLE_IS_INITIALIZED (x86_rep_stosb_threshold))
+ /* For AMD Zen3+ architecture, the performance of the vectorized loop is
+ slightly better than ERMS. */
+ rep_stosb_threshold = SIZE_MAX;
TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);
--
2.39.3

29
glibc-RHEL-25531-4.patch Normal file
View File

@ -0,0 +1,29 @@
From 491e55beab7457ed310a4a47496f4a333c5d1032 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu, 8 Feb 2024 10:08:40 -0300
Subject: [PATCH] x86: Expand the comment on when REP STOSB is used on memset
Content-type: text/plain; charset=UTF-8
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
---
sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index 9984c3ca0f..97839a2248 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -21,7 +21,9 @@
2. If size is less than VEC, use integer register stores.
3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
- 5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
+ 5. On machines ERMS feature, if size is greater or equal than
+ __x86_rep_stosb_threshold then REP STOSB will be used.
+ 6. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
4 VEC stores and store 4 * VEC at a time until done. */
#include <sysdep.h>
--
2.39.3

View File

@ -155,7 +155,7 @@ end \
Summary: The GNU libc libraries
Name: glibc
Version: %{glibcversion}
Release: 103%{?dist}
Release: 105%{?dist}
# In general, GPLv2+ is used by programs, LGPLv2+ is used for
# libraries.
@ -812,6 +812,11 @@ Patch575: glibc-RHEL-23472.patch
Patch576: glibc-RHEL-20172-1.patch
Patch577: glibc-RHEL-20172-2.patch
Patch578: glibc-RHEL-21884.patch
Patch579: glibc-RHEL-25531-1.patch
Patch580: glibc-RHEL-25531-2.patch
Patch581: glibc-RHEL-25531-3.patch
Patch582: glibc-RHEL-25531-4.patch
Patch583: glibc-RHEL-25046.patch
##############################################################################
# Continued list of core "glibc" package information:
@ -2970,6 +2975,12 @@ update_gconv_modules_cache ()
%endif
%changelog
* Wed Apr 3 2024 DJ Delorie <dj@redhat.com> - 2.34-105
- x86-64: Save APX registers in ld.so trampoline (RHEL-25046)
* Tue Mar 26 2024 DJ Delorie <dj@redhat.com> - 2.34-104
- x86: Fix Zen3/Zen4 ERMS selection (RHEL-25531)
* Tue Mar 12 2024 Arjun Shankar <arjun@redhat.com> - 2.34-103
- malloc: Do not use MAP_NORESERVE to allocate heap segments (RHEL-21884)