From 0d5bc434145dd63a1f10f56c594a95648f0c7369 Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Tue, 5 Aug 2025 16:30:24 +0200 Subject: [PATCH] x86_64, aarch64: More CPU output in ld.so --list-diagnostics (RHEL-107540) Resolves: RHEL-107540 --- glibc-RHEL-107540-1.patch | 262 +++++++++++++++++++ glibc-RHEL-107540-2.patch | 512 ++++++++++++++++++++++++++++++++++++++ glibc-RHEL-107540-3.patch | 146 +++++++++++ glibc.spec | 8 +- 4 files changed, 927 insertions(+), 1 deletion(-) create mode 100644 glibc-RHEL-107540-1.patch create mode 100644 glibc-RHEL-107540-2.patch create mode 100644 glibc-RHEL-107540-3.patch diff --git a/glibc-RHEL-107540-1.patch b/glibc-RHEL-107540-1.patch new file mode 100644 index 0000000..e3eedb7 --- /dev/null +++ b/glibc-RHEL-107540-1.patch @@ -0,0 +1,262 @@ +commit 5653ccd847f0cd3a98906e44c97c71d68652d326 +Author: Florian Weimer +Date: Mon Apr 8 16:48:55 2024 +0200 + + elf: Add CPU iteration support for future use in ld.so diagnostics + + Reviewed-by: Szabolcs Nagy + +diff --git a/elf/dl-iterate_cpu.h b/elf/dl-iterate_cpu.h +new file mode 100644 +index 0000000000000000..60db167b13e2d377 +--- /dev/null ++++ b/elf/dl-iterate_cpu.h +@@ -0,0 +1,136 @@ ++/* Iterate over all CPUs, for CPU-specific diagnostics. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef DL_ITERATE_CPU_H ++#define DL_ITERATE_CPU_H ++ ++#include ++#include ++ ++struct dl_iterate_cpu ++{ ++ /* Sequential iteration count, starting at 0. */ ++ unsigned int processor_index; ++ ++ /* Requested CPU. Can be -1 if affinity could not be set. */ ++ int requested_cpu; ++ ++ /* Observed current CPU. -1 if unavailable. */ ++ int actual_cpu; ++ ++ /* Observed node ID for the CPU. -1 if unavailable. */ ++ int actual_node; ++ ++ /* Internal fields to implement the iteration. */ ++ ++ /* Affinity as obtained by _dl_iterate_cpu_init, using ++ _dl_getaffinity. Space for 8,192 CPUs. */ ++ unsigned long int mask_reference[8192 / sizeof (unsigned long int) / 8]; ++ ++ /* This array is used by _dl_setaffinity calls. */ ++ unsigned long int mask_request[8192 / sizeof (unsigned long int) / 8]; ++ ++ /* Return value from the initial _dl_getaffinity call. */ ++ int length_reference; ++}; ++ ++static void ++_dl_iterate_cpu_init (struct dl_iterate_cpu *dic) ++{ ++ dic->length_reference ++ = _dl_getaffinity (dic->mask_reference, sizeof (dic->mask_reference)); ++ /* Prepare for the first _dl_iterate_cpu_next call. */ ++ dic->processor_index = -1; ++ dic->requested_cpu = -1; ++} ++ ++static bool ++_dl_iterate_cpu_next (struct dl_iterate_cpu *dic) ++{ ++ ++dic->processor_index; ++ ++ if (dic->length_reference > 0) ++ { ++ /* Search for the next CPU to switch to. */ ++ while (true) ++ { ++ ++dic->requested_cpu; ++ ++ /* Array index and bit number within the array. */ ++ unsigned int long_index ++ = dic->requested_cpu / sizeof (unsigned long int) / 8; ++ unsigned int bit_index ++ = dic->requested_cpu % (sizeof (unsigned long int) * 8); ++ ++ if (long_index * sizeof (unsigned long int) >= dic->length_reference) ++ /* All possible CPUs have been covered. */ ++ return false; ++ ++ unsigned long int bit = 1UL << bit_index; ++ if (dic->mask_reference[long_index] & bit) ++ { ++ /* The CPU is available. Try to select it. */ ++ dic->mask_request[long_index] = bit; ++ if (_dl_setaffinity (dic->mask_request, ++ (long_index + 1) ++ * sizeof (unsigned long int)) < 0) ++ { ++ /* Record that we could not perform a CPU request. */ ++ dic->length_reference = -1; ++ ++ if (dic->processor_index > 0) ++ /* We already reported something. There is no need to ++ continue because the new data is probably not useful. */ ++ return false; ++ } ++ ++ /* Clear the bit in case the next iteration switches to the ++ next long value. */ ++ dic->mask_request[long_index] = 0; ++ ++ /* We found a CPU to run on. */ ++ break; ++ } ++ } ++ } ++ else ++ { ++ /* No way to set CPU affinity. Iterate just once. */ ++ if (dic->processor_index > 0) ++ return false; ++ } ++ ++ /* Fill in the actual CPU information. CPU pinning may not actually ++ be effective, depending on the container host. */ ++ unsigned int cpu, node; ++ if (_dl_getcpu (&cpu, &node) < 0) ++ { ++ /* No CPU information available. */ ++ dic->actual_cpu = -1; ++ dic->actual_node = -1; ++ } ++ else ++ { ++ dic->actual_cpu = cpu; ++ dic->actual_node = node; ++ } ++ ++ return true; ++} ++ ++#endif /* DL_ITERATE_CPU_H */ +diff --git a/sysdeps/generic/dl-affinity.h b/sysdeps/generic/dl-affinity.h +new file mode 100644 +index 0000000000000000..d117f737e98712e0 +--- /dev/null ++++ b/sysdeps/generic/dl-affinity.h +@@ -0,0 +1,54 @@ ++/* CPU affinity handling for the dynamic linker. Stub version. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef DL_AFFINITY_H ++#define DL_AFFINITY_H ++ ++#include ++#include ++ ++/* On success, write the current CPU ID to *CPU, and the current node ++ ID to *NODE, and return 0. Return a negative error code on ++ failure. */ ++static inline int ++_dl_getcpu (unsigned int *cpu, unsigned int *node) ++{ ++ return -ENOSYS; ++} ++ ++/* On success, write CPU ID affinity bits for the current thread to ++ *BITS, which must be SIZE bytes long, and return the number of ++ bytes updated, a multiple of sizeof (unsigned long int). On ++ failure, return a negative error code. */ ++static int ++_dl_getaffinity (unsigned long int *bits, size_t size) ++{ ++ return -ENOSYS; ++} ++ ++/* Set the CPU affinity mask for the current thread to *BITS, using ++ the SIZE bytes from that array, which should be a multiple of ++ sizeof (unsigned long int). Return 0 on success, and a negative ++ error code on failure. */ ++static int ++_dl_setaffinity (const unsigned long int *bits, size_t size) ++{ ++ return -ENOSYS; ++} ++ ++#endif /* DL_AFFINITY_H */ +diff --git a/sysdeps/unix/sysv/linux/dl-affinity.h b/sysdeps/unix/sysv/linux/dl-affinity.h +new file mode 100644 +index 0000000000000000..bbfede7750a3037f +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/dl-affinity.h +@@ -0,0 +1,46 @@ ++/* CPU affinity handling for the dynamic linker. Linux version. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* See sysdeps/generic/dl-affinity.h for documentation of these interfaces. */ ++ ++#ifndef DL_AFFINITY_H ++#define DL_AFFINITY_H ++ ++#include ++#include ++#include ++ ++static inline int ++_dl_getcpu (unsigned int *cpu, unsigned int *node) ++{ ++ return INTERNAL_SYSCALL_CALL (getcpu, cpu, node); ++} ++ ++static int ++_dl_getaffinity (unsigned long int *bits, size_t size) ++{ ++ return INTERNAL_SYSCALL_CALL (sched_getaffinity, /* TID */ 0, size, bits); ++} ++ ++static int ++_dl_setaffinity (const unsigned long int *bits, size_t size) ++{ ++ return INTERNAL_SYSCALL_CALL (sched_setaffinity, /* TID */ 0, size, bits); ++} ++ ++#endif /* DL_AFFINITY_H */ diff --git a/glibc-RHEL-107540-2.patch b/glibc-RHEL-107540-2.patch new file mode 100644 index 0000000..0401b32 --- /dev/null +++ b/glibc-RHEL-107540-2.patch @@ -0,0 +1,512 @@ +commit 7a430f40c46acfa7ce4c3bff193b278c190b2efc +Author: Florian Weimer +Date: Mon Apr 8 16:48:55 2024 +0200 + + x86: Add generic CPUID data dumper to ld.so --list-diagnostics + + This is surprisingly difficult to implement if the goal is to produce + reasonably sized output. With the current approaches to output + compression (suppressing zeros and repeated results between CPUs, + folding ranges of identical subleaves, dealing with the %ecx + reflection issue), the output is less than 600 KiB even for systems + with 256 logical CPUs. + + Reviewed-by: H.J. Lu + +diff --git a/manual/dynlink.texi b/manual/dynlink.texi +index 5a93de0df6b60214..d71f7a30d6f46808 100644 +--- a/manual/dynlink.texi ++++ b/manual/dynlink.texi +@@ -262,7 +262,90 @@ The value of the @code{dczid_el0} system register on the processor + @item x86.cpu_features.@dots{} + These items are specific to the i386 and x86-64 architectures. They + reflect supported CPU features and information on cache geometry, mostly +-collected using the @code{CPUID} instruction. ++collected using the CPUID instruction. ++ ++@item x86.processor[@var{index}].@dots{} ++These are additional items for the i386 and x86-64 architectures, as ++described below. They mostly contain raw data from the CPUID ++instruction. The probes are performed for each active CPU for the ++@code{ld.so} process, and data for different probed CPUs receives a ++uniqe @var{index} value. Some CPUID data is expected to differ from CPU ++core to CPU core. In some cases, CPUs are not correctly initialized and ++indicate the presence of different feature sets. ++ ++@item x86.processor[@var{index}].requested=@var{kernel-cpu} ++The kernel is told to run the subsequent probing on the CPU numbered ++@var{kernel-cpu}. The values @var{kernel-cpu} and @var{index} can be ++distinct if there are gaps in the process CPU affinity mask. This line ++is not included if CPU affinity mask information is not available. ++ ++@item x86.processor[@var{index}].observed=@var{kernel-cpu} ++This line reports the kernel CPU number @var{kernel-cpu} on which the ++probing code initially ran. If the CPU number cannot be obtained, ++this line is not printed. ++ ++@item x86.processor[@var{index}].observed_node=@var{node} ++This reports the observed NUMA node number, as reported by the ++@code{getcpu} system call. If this information cannot be obtained, this ++line is not printed. ++ ++@item x86.processor[@var{index}].cpuid_leaves=@var{count} ++This line indicates that @var{count} distinct CPUID leaves were ++encountered. (This reflects internal @code{ld.so} storage space, it ++does not directly correspond to @code{CPUID} enumeration ranges.) ++ ++@item x86.processor[@var{index}].ecx_limit=@var{value} ++The CPUID data extraction code uses a brute-force approach to enumerate ++subleaves (see the @samp{.subleaf_eax} lines below). The last ++@code{%rcx} value used in a CPUID query on this probed CPU was ++@var{value}. ++ ++@item x86.processor[@var{index}].cpuid.eax[@var{query_eax}].eax=@var{eax} ++@itemx x86.processor[@var{index}].cpuid.eax[@var{query_eax}].ebx=@var{ebx} ++@itemx x86.processor[@var{index}].cpuid.eax[@var{query_eax}].ecx=@var{ecx} ++@itemx x86.processor[@var{index}].cpuid.eax[@var{query_eax}].edx=@var{edx} ++These lines report the register contents after executing the CPUID ++instruction with @samp{%rax == @var{query_eax}} and @samp{%rcx == 0} (a ++@dfn{leaf}). For the first probed CPU (with a zero @var{index}), only ++leaves with non-zero register contents are reported. For subsequent ++CPUs, only leaves whose register contents differs from the previously ++probed CPUs (with @var{index} one less) are reported. ++ ++Basic and extended leaves are reported using the same syntax. This ++means there is a large jump in @var{query_eax} for the first reported ++extended leaf. ++ ++@item x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].eax=@var{eax} ++@itemx x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].ebx=@var{ebx} ++@itemx x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].ecx=@var{ecx} ++@itemx x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].edx=@var{edx} ++This is similar to the leaves above, but for a @dfn{subleaf}. For ++subleaves, the CPUID instruction is executed with @samp{%rax == ++@var{query_eax}} and @samp{%rcx == @var{query_ecx}}, so the result ++depends on both register values. The same rules about filtering zero ++and identical results apply. ++ ++@item x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].until_ecx=@var{ecx_limit} ++Some CPUID results are the same regardless the @var{query_ecx} value. ++If this situation is detected, a line with the @samp{.until_ecx} ++selector ins included, and this indicates that the CPUID register ++contents is the same for @code{%rcx} values between @var{query_ecx} ++and @var{ecx_limit} (inclusive). ++ ++@item x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].ecx_query_mask=0xff ++This line indicates that in an @samp{.until_ecx} range, the CPUID ++instruction preserved the lowested 8 bits of the input @code{%rcx} in ++the output @code{%rcx} registers. Otherwise, the subleaves in the range ++have identical values. This special treatment is necessary to report ++compact range information in case such copying occurs (because the ++subleaves would otherwise be all different). ++ ++@item x86.processor[@var{index}].xgetbv.ecx[@var{query_ecx}]=@var{result} ++This line shows the 64-bit @var{result} value in the @code{%rdx:%rax} ++register pair after executing the XGETBV instruction with @code{%rcx} ++set to @var{query_ecx}. Zero values and values matching the previously ++probed CPU are omitted. Nothing is printed if the system does not ++support the XGETBV instruction. + @end table + + @node Dynamic Linker Introspection +diff --git a/sysdeps/x86/dl-diagnostics-cpu.c b/sysdeps/x86/dl-diagnostics-cpu.c +index 9a3e0ec8b9214a9c..7fe2c2dfd6668223 100644 +--- a/sysdeps/x86/dl-diagnostics-cpu.c ++++ b/sysdeps/x86/dl-diagnostics-cpu.c +@@ -17,7 +17,18 @@ + . */ + + #include ++ ++#include ++#include ++#include ++#include + #include ++#include ++#include ++#include ++ ++/* The generic CPUID dumping code. */ ++static void _dl_diagnostics_cpuid (void); + + static void + print_cpu_features_value (const char *label, uint64_t value) +@@ -124,4 +135,377 @@ _dl_diagnostics_cpu (void) + + sizeof (cpu_features->cachesize_non_temporal_divisor) + == sizeof (*cpu_features), + "last cpu_features field has been printed"); ++ ++ _dl_diagnostics_cpuid (); ++} ++ ++/* The following code implements a generic CPUID dumper that tries to ++ gather CPUID data without knowing about CPUID implementation ++ details. */ ++ ++/* Register arguments to CPUID. Multiple ECX subleaf values yielding ++ the same result are combined, to shorten the output. Both ++ identical matches (EAX to EDX are the same) and matches where EAX, ++ EBX, EDX, and ECX are equal except in the lower byte, which must ++ match the query ECX value. The latter is needed to compress ranges ++ on CPUs which preserve the lowest byte in ECX if an unknown leaf is ++ queried. */ ++struct cpuid_query ++{ ++ unsigned int eax; ++ unsigned ecx_first; ++ unsigned ecx_last; ++ bool ecx_preserves_query_byte; ++}; ++ ++/* Single integer value that can be used for sorting/ordering ++ comparisons. Uses Q->eax and Q->ecx_first only because ecx_last is ++ always greater than the previous ecx_first value and less than the ++ subsequent one. */ ++static inline unsigned long long int ++cpuid_query_combined (struct cpuid_query *q) ++{ ++ /* ecx can be -1 (that is, ~0U). If this happens, this the only ecx ++ value for this eax value, so the ordering does not matter. */ ++ return ((unsigned long long int) q->eax << 32) | (unsigned int) q->ecx_first; ++}; ++ ++/* Used for differential reporting of zero/non-zero values. */ ++static const struct cpuid_registers cpuid_registers_zero; ++ ++/* Register arguments to CPUID paired with the results that came back. */ ++struct cpuid_query_result ++{ ++ struct cpuid_query q; ++ struct cpuid_registers r; ++}; ++ ++/* During a first enumeration pass, we try to collect data for ++ cpuid_initial_subleaf_limit subleaves per leaf/EAX value. If we run ++ out of space, we try once more with applying the lower limit. */ ++enum { cpuid_main_leaf_limit = 128 }; ++enum { cpuid_initial_subleaf_limit = 512 }; ++enum { cpuid_subleaf_limit = 32 }; ++ ++/* Offset of the extended leaf area. */ ++enum {cpuid_extended_leaf_offset = 0x80000000 }; ++ ++/* Collected CPUID data. Everything is stored in a statically sized ++ array that is sized so that the second pass will collect some data ++ for all leaves, after the limit is applied. On the second pass, ++ ecx_limit is set to cpuid_subleaf_limit. */ ++struct cpuid_collected_data ++{ ++ unsigned int used; ++ unsigned int ecx_limit; ++ uint64_t xgetbv_ecx_0; ++ struct cpuid_query_result qr[cpuid_main_leaf_limit ++ * 2 * cpuid_subleaf_limit]; ++}; ++ ++/* Fill in the result of a CPUID query. Returns true if there is ++ room, false if nothing could be stored. */ ++static bool ++_dl_diagnostics_cpuid_store (struct cpuid_collected_data *ccd, ++ unsigned eax, int ecx) ++{ ++ if (ccd->used >= array_length (ccd->qr)) ++ return false; ++ ++ /* Tentatively fill in the next value. */ ++ __cpuid_count (eax, ecx, ++ ccd->qr[ccd->used].r.eax, ++ ccd->qr[ccd->used].r.ebx, ++ ccd->qr[ccd->used].r.ecx, ++ ccd->qr[ccd->used].r.edx); ++ ++ /* If the ECX subleaf is next subleaf after the previous one (for ++ the same leaf), and the values are the same, merge the result ++ with the already-stored one. Do this before skipping zero ++ leaves, which avoids artifiacts for ECX == 256 queries. */ ++ if (ccd->used > 0 ++ && ccd->qr[ccd->used - 1].q.eax == eax ++ && ccd->qr[ccd->used - 1].q.ecx_last + 1 == ecx) ++ { ++ /* Exact match of the previous result. Ignore the value of ++ ecx_preserves_query_byte if this is a singleton range so far ++ because we can treat ECX as fixed if the same value repeats. */ ++ if ((!ccd->qr[ccd->used - 1].q.ecx_preserves_query_byte ++ || (ccd->qr[ccd->used - 1].q.ecx_first ++ == ccd->qr[ccd->used - 1].q.ecx_last)) ++ && memcmp (&ccd->qr[ccd->used - 1].r, &ccd->qr[ccd->used].r, ++ sizeof (ccd->qr[ccd->used].r)) == 0) ++ { ++ ccd->qr[ccd->used - 1].q.ecx_last = ecx; ++ /* ECX is now fixed because the same value has been observed ++ twice, even if we had a low-byte match before. */ ++ ccd->qr[ccd->used - 1].q.ecx_preserves_query_byte = false; ++ return true; ++ } ++ /* Match except for the low byte in ECX, which must match the ++ incoming ECX value. */ ++ if (ccd->qr[ccd->used - 1].q.ecx_preserves_query_byte ++ && (ecx & 0xff) == (ccd->qr[ccd->used].r.ecx & 0xff) ++ && ccd->qr[ccd->used].r.eax == ccd->qr[ccd->used - 1].r.eax ++ && ccd->qr[ccd->used].r.ebx == ccd->qr[ccd->used - 1].r.ebx ++ && ((ccd->qr[ccd->used].r.ecx & 0xffffff00) ++ == (ccd->qr[ccd->used - 1].r.ecx & 0xffffff00)) ++ && ccd->qr[ccd->used].r.edx == ccd->qr[ccd->used - 1].r.edx) ++ { ++ ccd->qr[ccd->used - 1].q.ecx_last = ecx; ++ return true; ++ } ++ } ++ ++ /* Do not store zero results. All-zero values usually mean that the ++ subleaf is unsupported. */ ++ if (ccd->qr[ccd->used].r.eax == 0 ++ && ccd->qr[ccd->used].r.ebx == 0 ++ && ccd->qr[ccd->used].r.ecx == 0 ++ && ccd->qr[ccd->used].r.edx == 0) ++ return true; ++ ++ /* The result needs to be stored. Fill in the query parameters and ++ consume the storage. */ ++ ccd->qr[ccd->used].q.eax = eax; ++ ccd->qr[ccd->used].q.ecx_first = ecx; ++ ccd->qr[ccd->used].q.ecx_last = ecx; ++ ccd->qr[ccd->used].q.ecx_preserves_query_byte ++ = (ecx & 0xff) == (ccd->qr[ccd->used].r.ecx & 0xff); ++ ++ccd->used; ++ return true; ++} ++ ++/* Collected CPUID data into *CCD. If LIMIT, apply per-leaf limits to ++ avoid exceeding the pre-allocated space. Return true if all data ++ could be stored, false if the retrying without a limit is ++ requested. */ ++static bool ++_dl_diagnostics_cpuid_collect_1 (struct cpuid_collected_data *ccd, bool limit) ++{ ++ ccd->used = 0; ++ ccd->ecx_limit ++ = (limit ? cpuid_subleaf_limit : cpuid_initial_subleaf_limit) - 1; ++ _dl_diagnostics_cpuid_store (ccd, 0x00, 0x00); ++ if (ccd->used == 0) ++ /* CPUID reported all 0. Should not happen. */ ++ return true; ++ unsigned int maximum_leaf = ccd->qr[0x00].r.eax; ++ if (limit && maximum_leaf >= cpuid_main_leaf_limit) ++ maximum_leaf = cpuid_main_leaf_limit - 1; ++ ++ for (unsigned int eax = 1; eax <= maximum_leaf; ++eax) ++ { ++ for (unsigned int ecx = 0; ecx <= ccd->ecx_limit; ++ecx) ++ if (!_dl_diagnostics_cpuid_store (ccd, eax, ecx)) ++ return false; ++ } ++ ++ if (!_dl_diagnostics_cpuid_store (ccd, cpuid_extended_leaf_offset, 0x00)) ++ return false; ++ maximum_leaf = ccd->qr[ccd->used - 1].r.eax; ++ if (maximum_leaf < cpuid_extended_leaf_offset) ++ /* No extended CPUID information. */ ++ return true; ++ if (limit ++ && maximum_leaf - cpuid_extended_leaf_offset >= cpuid_main_leaf_limit) ++ maximum_leaf = cpuid_extended_leaf_offset + cpuid_main_leaf_limit - 1; ++ for (unsigned int eax = cpuid_extended_leaf_offset + 1; ++ eax <= maximum_leaf; ++eax) ++ { ++ for (unsigned int ecx = 0; ecx <= ccd->ecx_limit; ++ecx) ++ if (!_dl_diagnostics_cpuid_store (ccd, eax, ecx)) ++ return false; ++ } ++ return true; ++} ++ ++/* Call _dl_diagnostics_cpuid_collect_1 twice if necessary, the ++ second time with the limit applied. */ ++static void ++_dl_diagnostics_cpuid_collect (struct cpuid_collected_data *ccd) ++{ ++ if (!_dl_diagnostics_cpuid_collect_1 (ccd, false)) ++ _dl_diagnostics_cpuid_collect_1 (ccd, true); ++ ++ /* Re-use the result of the official feature probing here. */ ++ const struct cpu_features *cpu_features = __get_cpu_features (); ++ if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE)) ++ { ++ unsigned int xcrlow; ++ unsigned int xcrhigh; ++ asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); ++ ccd->xgetbv_ecx_0 = ((uint64_t) xcrhigh << 32) + xcrlow; ++ } ++ else ++ ccd->xgetbv_ecx_0 = 0; ++} ++ ++/* Print a CPUID register value (passed as REG_VALUE) if it differs ++ from the expected REG_REFERENCE value. PROCESSOR_INDEX is the ++ process sequence number (always starting at zero; not a kernel ID). */ ++static void ++_dl_diagnostics_cpuid_print_reg (unsigned int processor_index, ++ const struct cpuid_query *q, ++ const char *reg_label, unsigned int reg_value, ++ bool subleaf) ++{ ++ if (subleaf) ++ _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]" ++ ".ecx[0x%x].%s=0x%x\n", ++ processor_index, q->eax, q->ecx_first, reg_label, reg_value); ++ else ++ _dl_printf ("x86.processor[0x%x].cpuid.eax[0x%x].%s=0x%x\n", ++ processor_index, q->eax, reg_label, reg_value); ++} ++ ++/* Print CPUID result values in *RESULT for the query in ++ CCD->qr[CCD_IDX]. PROCESSOR_INDEX is the process sequence number ++ (always starting at zero; not a kernel ID). */ ++static void ++_dl_diagnostics_cpuid_print_query (unsigned int processor_index, ++ struct cpuid_collected_data *ccd, ++ unsigned int ccd_idx, ++ const struct cpuid_registers *result) ++{ ++ /* Treat this as a value if subleaves if ecx isn't zero (maybe ++ within the [ecx_fist, ecx_last] range), or if eax matches its ++ neighbors. If the range is [0, ecx_limit], then the subleaves ++ are not distinct (independently of ecx_preserves_query_byte), ++ so do not report them separately. */ ++ struct cpuid_query *q = &ccd->qr[ccd_idx].q; ++ bool subleaf = (q->ecx_first > 0 ++ || (q->ecx_first != q->ecx_last ++ && !(q->ecx_first == 0 && q->ecx_last == ccd->ecx_limit)) ++ || (ccd_idx > 0 && q->eax == ccd->qr[ccd_idx - 1].q.eax) ++ || (ccd_idx + 1 < ccd->used ++ && q->eax == ccd->qr[ccd_idx + 1].q.eax)); ++ _dl_diagnostics_cpuid_print_reg (processor_index, q, "eax", result->eax, ++ subleaf); ++ _dl_diagnostics_cpuid_print_reg (processor_index, q, "ebx", result->ebx, ++ subleaf); ++ _dl_diagnostics_cpuid_print_reg (processor_index, q, "ecx", result->ecx, ++ subleaf); ++ _dl_diagnostics_cpuid_print_reg (processor_index, q, "edx", result->edx, ++ subleaf); ++ ++ if (subleaf && q->ecx_first != q->ecx_last) ++ { ++ _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]" ++ ".ecx[0x%x].until_ecx=0x%x\n", ++ processor_index, q->eax, q->ecx_first, q->ecx_last); ++ if (q->ecx_preserves_query_byte) ++ _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]" ++ ".ecx[0x%x].ecx_query_mask=0xff\n", ++ processor_index, q->eax, q->ecx_first); ++ } ++} ++ ++/* Perform differential reporting of the data in *CURRENT against ++ *BASE. REQUESTED_CPU is the kernel CPU ID the thread was ++ configured to run on, or -1 if no configuration was possible. ++ PROCESSOR_INDEX is the process sequence number (always starting at ++ zero; not a kernel ID). */ ++static void ++_dl_diagnostics_cpuid_report (struct dl_iterate_cpu *dci, ++ struct cpuid_collected_data *current, ++ struct cpuid_collected_data *base) ++{ ++ if (dci->requested_cpu >= 0) ++ _dl_printf ("x86.processor[0x%x].requested=0x%x\n", ++ dci->processor_index, dci->requested_cpu); ++ if (dci->actual_cpu >= 0) ++ _dl_printf ("x86.processor[0x%x].observed=0x%x\n", ++ dci->processor_index, dci->actual_cpu); ++ if (dci->actual_node >= 0) ++ _dl_printf ("x86.processor[0x%x].observed_node=0x%x\n", ++ dci->processor_index, dci->actual_node); ++ ++ _dl_printf ("x86.processor[0x%x].cpuid_leaves=0x%x\n", ++ dci->processor_index, current->used); ++ _dl_printf ("x86.processor[0x%x].ecx_limit=0x%x\n", ++ dci->processor_index, current->ecx_limit); ++ ++ unsigned int base_idx = 0; ++ for (unsigned int current_idx = 0; current_idx < current->used; ++ ++current_idx) ++ { ++ /* Report missing data on the current CPU as 0. */ ++ unsigned long long int current_query ++ = cpuid_query_combined (¤t->qr[current_idx].q); ++ while (base_idx < base->used ++ && cpuid_query_combined (&base->qr[base_idx].q) < current_query) ++ { ++ _dl_diagnostics_cpuid_print_query (dci->processor_index, ++ base, base_idx, ++ &cpuid_registers_zero); ++ ++base_idx; ++ } ++ ++ if (base_idx < base->used ++ && cpuid_query_combined (&base->qr[base_idx].q) == current_query) ++ { ++ _Static_assert (sizeof (struct cpuid_registers) == 4 * 4, ++ "no padding in struct cpuid_registers"); ++ if (current->qr[current_idx].q.ecx_last ++ != base->qr[base_idx].q.ecx_last ++ || memcmp (¤t->qr[current_idx].r, ++ &base->qr[base_idx].r, ++ sizeof (struct cpuid_registers)) != 0) ++ /* The ECX range or the values have changed. Show the ++ new values. */ ++ _dl_diagnostics_cpuid_print_query (dci->processor_index, ++ current, current_idx, ++ ¤t->qr[current_idx].r); ++ ++base_idx; ++ } ++ else ++ /* Data is absent in the base reference. Report the new data. */ ++ _dl_diagnostics_cpuid_print_query (dci->processor_index, ++ current, current_idx, ++ ¤t->qr[current_idx].r); ++ } ++ ++ if (current->xgetbv_ecx_0 != base->xgetbv_ecx_0) ++ { ++ /* Re-use the 64-bit printing routine. */ ++ _dl_printf ("x86.processor[0x%x].", dci->processor_index); ++ _dl_diagnostics_print_labeled_value ("xgetbv.ecx[0x0]", ++ current->xgetbv_ecx_0); ++ } ++} ++ ++static void ++_dl_diagnostics_cpuid (void) ++{ ++#if !HAS_CPUID ++ /* CPUID is not supported, so there is nothing to dump. */ ++ if (__get_cpuid_max (0, 0) == 0) ++ return; ++#endif ++ ++ struct dl_iterate_cpu dic; ++ _dl_iterate_cpu_init (&dic); ++ ++ /* Two copies of the data are used. Data is written to the index ++ (dic.processor_index & 1). The previous version against which the ++ data dump is reported is at index !(processor_index & 1). */ ++ struct cpuid_collected_data ccd[2]; ++ ++ /* The initial data is presumed to be all zero. Zero results are ++ not recorded. */ ++ ccd[1].used = 0; ++ ccd[1].xgetbv_ecx_0 = 0; ++ ++ /* Run the CPUID probing on a specific CPU. There are expected ++ differences for encoding core IDs and topology information in ++ CPUID output, but some firmware/kernel bugs also may result in ++ asymmetric data across CPUs in some cases. */ ++ while (_dl_iterate_cpu_next (&dic)) ++ { ++ _dl_diagnostics_cpuid_collect (&ccd[dic.processor_index & 1]); ++ _dl_diagnostics_cpuid_report ++ (&dic, &ccd[dic.processor_index & 1], ++ &ccd[!(dic.processor_index & 1)]); ++ } + } diff --git a/glibc-RHEL-107540-3.patch b/glibc-RHEL-107540-3.patch new file mode 100644 index 0000000..5a0231c --- /dev/null +++ b/glibc-RHEL-107540-3.patch @@ -0,0 +1,146 @@ +commit f8d8b1b1e6d3b8b93f224efc796b7ea083fdb83f +Author: Florian Weimer +Date: Mon Apr 8 16:48:55 2024 +0200 + + aarch64: Enhanced CPU diagnostics for ld.so + + This prints some information from struct cpu_features, and the midr_el1 + and dczid_el0 system register contents on every CPU. + + Reviewed-by: Szabolcs Nagy + +diff --git a/manual/dynlink.texi b/manual/dynlink.texi +index f2f2341818fb9d18..d71f7a30d6f46808 100644 +--- a/manual/dynlink.texi ++++ b/manual/dynlink.texi +@@ -224,6 +224,40 @@ reflect adjustment by @theglibc{}. + These Linux-specific items show the values of @code{struct utsname}, as + reported by the @code{uname} function. @xref{Platform Type}. + ++@item aarch64.cpu_features.@dots{} ++These items are specific to the AArch64 architectures. They report data ++@theglibc{} uses to activate conditionally supported features such as ++BTI and MTE, and to select alternative function implementations. ++ ++@item aarch64.processor[@var{index}].@dots{} ++These are additional items for the AArch64 architecture and are ++described below. ++ ++@item aarch64.processor[@var{index}].requested=@var{kernel-cpu} ++The kernel is told to run the subsequent probing on the CPU numbered ++@var{kernel-cpu}. The values @var{kernel-cpu} and @var{index} can be ++distinct if there are gaps in the process CPU affinity mask. This line ++is not included if CPU affinity mask information is not available. ++ ++@item aarch64.processor[@var{index}].observed=@var{kernel-cpu} ++This line reports the kernel CPU number @var{kernel-cpu} on which the ++probing code initially ran. If the CPU number cannot be obtained, ++this line is not printed. ++ ++@item aarch64.processor[@var{index}].observed_node=@var{node} ++This reports the observed NUMA node number, as reported by the ++@code{getcpu} system call. If this information cannot be obtained, this ++line is not printed. ++ ++@item aarch64.processor[@var{index}].midr_el1=@var{value} ++The value of the @code{midr_el1} system register on the processor ++@var{index}. This line is only printed if the kernel indicates that ++this system register is supported. ++ ++@item aarch64.processor[@var{index}].dczid_el0=@var{value} ++The value of the @code{dczid_el0} system register on the processor ++@var{index}. ++ + @cindex CPUID (diagnostics) + @item x86.cpu_features.@dots{} + These items are specific to the i386 and x86-64 architectures. They +diff --git a/sysdeps/aarch64/dl-diagnostics-cpu.c b/sysdeps/aarch64/dl-diagnostics-cpu.c +new file mode 100644 +index 0000000000000000..e037e6ea8c43df72 +--- /dev/null ++++ b/sysdeps/aarch64/dl-diagnostics-cpu.c +@@ -0,0 +1,84 @@ ++/* Print CPU diagnostics data in ld.so. AArch64 version. ++ Copyright (C) 2021-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++ ++static void ++print_cpu_features_value (const char *label, uint64_t value) ++{ ++ _dl_printf ("aarch64.cpu_features."); ++ _dl_diagnostics_print_labeled_value (label, value); ++} ++ ++static void ++print_per_cpu_value (const struct dl_iterate_cpu *dic, ++ const char *label, uint64_t value) ++{ ++ _dl_printf ("aarch64.processor[0x%x].", dic->processor_index); ++ _dl_diagnostics_print_labeled_value (label, value); ++} ++ ++void ++_dl_diagnostics_cpu (void) ++{ ++ print_cpu_features_value ("bti", GLRO (dl_aarch64_cpu_features).bti); ++ print_cpu_features_value ("midr_el1", ++ GLRO (dl_aarch64_cpu_features).midr_el1); ++ print_cpu_features_value ("mops", GLRO (dl_aarch64_cpu_features).mops); ++ print_cpu_features_value ("mte_state", ++ GLRO (dl_aarch64_cpu_features).mte_state); ++ print_cpu_features_value ("prefer_sve_ifuncs", ++ GLRO (dl_aarch64_cpu_features).prefer_sve_ifuncs); ++ print_cpu_features_value ("sve", GLRO (dl_aarch64_cpu_features).sve); ++ print_cpu_features_value ("zva_size", ++ GLRO (dl_aarch64_cpu_features).zva_size); ++ ++ struct dl_iterate_cpu dic; ++ _dl_iterate_cpu_init (&dic); ++ ++ while (_dl_iterate_cpu_next (&dic)) ++ { ++ if (dic.requested_cpu >= 0) ++ _dl_printf ("aarch64.processor[0x%x].requested=0x%x\n", ++ dic.processor_index, dic.requested_cpu); ++ if (dic.actual_cpu >= 0) ++ _dl_printf ("aarch64.processor[0x%x].observed=0x%x\n", ++ dic.processor_index, dic.actual_cpu); ++ if (dic.actual_node >= 0) ++ _dl_printf ("aarch64.processor[0x%x].observed_node=0x%x\n", ++ dic.processor_index, dic.actual_node); ++ ++ if (GLRO (dl_hwcap) & HWCAP_CPUID) ++ { ++ uint64_t midr_el1; ++ asm ("mrs %0, midr_el1" : "=r" (midr_el1)); ++ print_per_cpu_value (&dic, "midr_el1", midr_el1); ++ } ++ ++ { ++ uint64_t dczid_el0; ++ asm ("mrs %0, dczid_el0" : "=r" (dczid_el0)); ++ print_per_cpu_value (&dic, "dczid_el0", dczid_el0); ++ } ++ } ++} diff --git a/glibc.spec b/glibc.spec index ed4fe29..c810d91 100644 --- a/glibc.spec +++ b/glibc.spec @@ -145,7 +145,7 @@ Version: %{glibcversion} # - It allows using the Release number without the %%dist tag in the dependency # generator to make the generated requires interchangeable between Rawhide # and ELN (.elnYY < .fcXX). -%global baserelease 48 +%global baserelease 49 Release: %{baserelease}%{?dist} # Licenses: @@ -588,6 +588,9 @@ Patch264: glibc-RHEL-95246-2.patch Patch265: glibc-RHEL-105324.patch Patch266: glibc-RHEL-72564-1.patch Patch267: glibc-RHEL-72564-2.patch +Patch268: glibc-RHEL-107540-1.patch +Patch269: glibc-RHEL-107540-2.patch +Patch270: glibc-RHEL-107540-3.patch ############################################################################## # Continued list of core "glibc" package information: @@ -2600,6 +2603,9 @@ update_gconv_modules_cache () %endif %changelog +* Tue Aug 05 2025 Florian Weimer - 2.39-49 +- x86_64, aarch64: More CPU output in ld.so --list-diagnostics (RHEL-107540) + * Thu Jul 31 2025 Frédéric Bérat - 2.39-48 - Add support for new IBM Z17 hardware in glibc (RHEL-72564)