From 2ea0ac63f1ebcebe8c74687472215bdb519a9905 Mon Sep 17 00:00:00 2001
From: Patsy Griffin <patsy@redhat.com>
Date: Thu, 6 Jun 2024 14:24:58 -0400
Subject: [PATCH] Enhance ld.so --list-diagnostics on aarch64

Resolves: RHEL-22165
---
 glibc-RHEL-22165-1.patch | 229 ++++++++++++++++++
 glibc-RHEL-22165-2.patch |  74 ++++++
 glibc-RHEL-22165-3.patch | 144 +++++++++++
 glibc-RHEL-22165-4.patch | 510 +++++++++++++++++++++++++++++++++++++++
 glibc-RHEL-22165-5.patch | 262 ++++++++++++++++++++
 glibc.spec               |  11 +-
 6 files changed, 1229 insertions(+), 1 deletion(-)
 create mode 100644 glibc-RHEL-22165-1.patch
 create mode 100644 glibc-RHEL-22165-2.patch
 create mode 100644 glibc-RHEL-22165-3.patch
 create mode 100644 glibc-RHEL-22165-4.patch
 create mode 100644 glibc-RHEL-22165-5.patch

diff --git a/glibc-RHEL-22165-1.patch b/glibc-RHEL-22165-1.patch
new file mode 100644
index 0000000..3133966
--- /dev/null
+++ b/glibc-RHEL-22165-1.patch
@@ -0,0 +1,229 @@
+commit f21962ddfc8bb23e92597da1f98e313dbde11cc1
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Fri Aug 25 14:15:28 2023 +0200
+
+    manual: Document ld.so --list-diagnostics output
+    
+    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+
+diff --git a/manual/dynlink.texi b/manual/dynlink.texi
+index 45bf5a5b55..df41c56bfc 100644
+--- a/manual/dynlink.texi
++++ b/manual/dynlink.texi
+@@ -13,9 +13,216 @@ as plugins) later at run time.
+ Dynamic linkers are sometimes called @dfn{dynamic loaders}.
+ 
+ @menu
++* Dynamic Linker Invocation::   Explicit invocation of the dynamic linker.
+ * Dynamic Linker Introspection::    Interfaces for querying mapping information.
+ @end menu
+ 
++@node Dynamic Linker Invocation
++
++@cindex program interpreter
++When a dynamically linked program starts, the operating system
++automatically loads the dynamic linker along with the program.
++@Theglibc{} also supports invoking the dynamic linker explicitly to
++launch a program.  This command uses the implied dynamic linker
++(also sometimes called the @dfn{program interpreter}):
++
++@smallexample
++sh -c 'echo "Hello, world!"'
++@end smallexample
++
++This command specifies the dynamic linker explicitly:
++
++@smallexample
++ld.so /bin/sh -c 'echo "Hello, world!"'
++@end smallexample
++
++Note that @command{ld.so} does not search the @env{PATH} environment
++variable, so the full file name of the executable needs to be specified.
++
++The @command{ld.so} program supports various options.  Options start
++@samp{--} and need to come before the program that is being launched.
++Some of the supported options are listed below.
++
++@table @code
++@item --list-diagnostics
++Print system diagnostic information in a machine-readable format.
++@xref{Dynamic Linker Diagnostics}.
++@end table
++
++@menu
++* Dynamic Linker Diagnostics::   Obtaining system diagnostic information.
++@end menu
++
++@node Dynamic Linker Diagnostics
++@section Dynamic Linker Diagnostics
++@cindex diagnostics (dynamic linker)
++
++The @samp{ld.so --list-diagnostics} produces machine-readable
++diagnostics output.  This output contains system data that affects the
++behavior of @theglibc{}, and potentially application behavior as well.
++
++The exact set of diagnostic items can change between releases of
++@theglibc{}.  The output format itself is not expected to change
++radically.
++
++The following table shows some example lines that can be written by the
++diagnostics command.
++
++@table @code
++@item dl_pagesize=0x1000
++The system page size is 4096 bytes.
++
++@item env[0x14]="LANG=en_US.UTF-8"
++This item indicates that the 21st environment variable at process
++startup contains a setting for @code{LANG}.
++
++@item env_filtered[0x22]="DISPLAY"
++The 35th environment variable is @code{DISPLAY}.  Its value is not
++included in the output for privacy reasons because it is not recognized
++as harmless by the diagnostics code.
++
++@item path.prefix="/usr"
++This means that @theglibc{} was configured with @code{--prefix=/usr}.
++
++@item path.system_dirs[0x0]="/lib64/"
++@itemx path.system_dirs[0x1]="/usr/lib64/"
++The built-in dynamic linker search path contains two directories,
++@code{/lib64} and @code{/usr/lib64}.
++@end table
++
++@subsection Dynamic Linker Diagnostics Output Format
++
++As seen above, diagnostic lines assign values (integers or strings) to a
++sequence of labeled subscripts, separated by @samp{.}.  Some subscripts
++have integer indices associated with them.  The subscript indices are
++not necessarily contiguous or small, so an associative array should be
++used to store them.  Currently, all integers fit into the 64-bit
++unsigned integer range.  Every access path to a value has a fixed type
++(string or integer) independent of subscript index values.  Likewise,
++whether a subscript is indexed does not depend on previous indices (but
++may depend on previous subscript labels).
++
++A syntax description in ABNF (RFC 5234) follows.  Note that
++@code{%x30-39} denotes the range of decimal digits.  Diagnostic output
++lines are expected to match the @code{line} production.
++
++@c ABNF-START
++@smallexample
++HEXDIG = %x30-39 / %x61-6f ; lowercase a-f only
++ALPHA = %x41-5a / %x61-7a / %x7f ; letters and underscore
++ALPHA-NUMERIC = ALPHA / %x30-39 / "_"
++DQUOTE = %x22 ; "
++
++; Numbers are always hexadecimal and use a 0x prefix.
++hex-value-prefix = %x30 %x78
++hex-value = hex-value-prefix 1*HEXDIG
++
++; Strings use octal escape sequences and \\, \".
++string-char = %x20-21 / %x23-5c / %x5d-7e ; printable but not "\
++string-quoted-octal = %x30-33 2*2%x30-37
++string-quoted = "\" ("\" / DQUOTE / string-quoted-octal)
++string-value = DQUOTE *(string-char / string-quoted) DQUOTE
++
++value = hex-value / string-value
++
++label = ALPHA *ALPHA-NUMERIC
++index = "[" hex-value "]"
++subscript = label [index]
++
++line = subscript *("." subscript) "=" value
++@end smallexample
++
++@subsection Dynamic Linker Diagnostics Values
++
++As mentioned above, the set of diagnostics may change between
++@theglibc{} releases.  Nevertheless, the following table documents a few
++common diagnostic items.  All numbers are in hexadecimal, with a
++@samp{0x} prefix.
++
++@table @code
++@item dl_dst_lib=@var{string}
++The @code{$LIB} dynamic string token expands to @var{string}.
++
++@cindex HWCAP (diagnostics)
++@item dl_hwcap=@var{integer}
++@itemx dl_hwcap2=@var{integer}
++The HWCAP and HWCAP2 values, as returned for @code{getauxval}, and as
++used in other places depending on the architecture.
++
++@cindex page size (diagnostics)
++@item dl_pagesize=@var{integer}
++The system page size is @var{integer} bytes.
++
++@item dl_platform=@var{string}
++The @code{$PLATFORM} dynamic string token expands to @var{string}.
++
++@item dso.libc=@var{string}
++This is the soname of the shared @code{libc} object that is part of
++@theglibc{}.  On most architectures, this is @code{libc.so.6}.
++
++@item env[@var{index}]=@var{string}
++@itemx env_filtered[@var{index}]=@var{string}
++An environment variable from the process environment.  The integer
++@var{index} is the array index in the environment array.  Variables
++under @code{env} include the variable value after the @samp{=} (assuming
++that it was present), variables under @code{env_filtered} do not.
++
++@item path.prefix=@var{string}
++This indicates that @theglibc{} was configured using
++@samp{--prefix=@var{string}}.
++
++@item path.sysconfdir=@var{string}
++@Theglibc{} was configured (perhaps implicitly) with
++@samp{--sysconfdir=@var{string}} (typically @code{/etc}).
++
++@item path.system_dirs[@var{index}]=@var{string}
++These items list the elements of the built-in array that describes the
++default library search path.  The value @var{string} is a directory file
++name with a trailing @samp{/}.
++
++@item path.rtld=@var{string}
++This string indicates the application binary interface (ABI) file name
++of the run-time dynamic linker.
++
++@item version.release="stable"
++@itemx version.release="development"
++The value @code{"stable"} indicates that this build of @theglibc{} is
++from a release branch.  Releases labeled as @code{"development"} are
++unreleased development versions.
++
++@cindex version (diagnostics)
++@item version.version="@var{major}.@var{minor}"
++@itemx version.version="@var{major}.@var{minor}.9000"
++@Theglibc{} version.  Development releases end in @samp{.9000}.
++
++@cindex auxiliary vector (diagnostics)
++@item auxv[@var{index}].a_type=@var{type}
++@itemx auxv[@var{index}].a_val=@var{integer}
++@itemx auxv[@var{index}].a_val_string=@var{string}
++An entry in the auxiliary vector (specific to Linux).  The values
++@var{type} (an integer) and @var{integer} correspond to the members of
++@code{struct auxv}.  If the value is a string, @code{a_val_string} is
++used instead of @code{a_val}, so that values have consistent types.
++
++The @code{AT_HWCAP} and @code{AT_HWCAP2} values in this output do not
++reflect adjustment by @theglibc{}.
++
++@item uname.sysname=@var{string}
++@itemx uname.nodename=@var{string}
++@itemx uname.release=@var{string}
++@itemx uname.version=@var{string}
++@itemx uname.machine=@var{string}
++@itemx uname.domain=@var{string}
++These Linux-specific items show the values of @code{struct utsname}, as
++reported by the @code{uname} function.  @xref{Platform Type}.
++
++@cindex CPUID (diagnostics)
++@item x86.cpu_features.@dots{}
++These items are specific to the i386 and x86-64 architectures.  They
++reflect supported CPU features and information on cache geometry, mostly
++collected using the @code{CPUID} instruction.
++@end table
++
+ @node Dynamic Linker Introspection
+ @section Dynamic Linker Introspection
+ 
diff --git a/glibc-RHEL-22165-2.patch b/glibc-RHEL-22165-2.patch
new file mode 100644
index 0000000..62c2711
--- /dev/null
+++ b/glibc-RHEL-22165-2.patch
@@ -0,0 +1,74 @@
+commit d99609a3eb8bc96c3af841fd35294a679e0fea7f
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Wed Sep 6 18:37:21 2023 +0200
+
+    manual: Fix ld.so diagnostics menu/section structure
+    
+    And shorten the section/node names a bit, so that the menu
+    entries become easier to read.
+    
+    Texinfo 6.5 fails to process the previous structure:
+    
+    ./dynlink.texi:56: warning: node `Dynamic Linker Introspection' is
+      next for `Dynamic Linker Diagnostics' in sectioning but not in menu
+    ./dynlink.texi:56: warning: node up `Dynamic Linker Diagnostics'
+      in menu `Dynamic Linker Invocation' and
+      in sectioning `Dynamic Linker' differ
+    ./dynlink.texi:1: node `Dynamic Linker' lacks menu item for
+      `Dynamic Linker Diagnostics' despite being its Up target
+    ./dynlink.texi:226: warning: node prev `Dynamic Linker Introspection' in menu `Dynamic Linker Invocation'
+      and in sectioning `Dynamic Linker Diagnostics' differ
+    
+    Texinfo 7.0.2 does not report an error.
+    
+    This fixes commit f21962ddfc8bb23e92597da1f98e313dbde11cc1
+    ("manual: Document ld.so --list-diagnostics output").
+    
+    Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
+
+diff --git a/manual/dynlink.texi b/manual/dynlink.texi
+index df41c56bfc..06a6c15533 100644
+--- a/manual/dynlink.texi
++++ b/manual/dynlink.texi
+@@ -18,6 +18,7 @@ Dynamic linkers are sometimes called @dfn{dynamic loaders}.
+ @end menu
+ 
+ @node Dynamic Linker Invocation
++@section Dynamic Linker Invocation
+ 
+ @cindex program interpreter
+ When a dynamically linked program starts, the operating system
+@@ -54,7 +55,7 @@ Print system diagnostic information in a machine-readable format.
+ @end menu
+ 
+ @node Dynamic Linker Diagnostics
+-@section Dynamic Linker Diagnostics
++@subsection Dynamic Linker Diagnostics
+ @cindex diagnostics (dynamic linker)
+ 
+ The @samp{ld.so --list-diagnostics} produces machine-readable
+@@ -90,7 +91,13 @@ The built-in dynamic linker search path contains two directories,
+ @code{/lib64} and @code{/usr/lib64}.
+ @end table
+ 
+-@subsection Dynamic Linker Diagnostics Output Format
++@menu
++* Dynamic Linker Diagnostics Format::  Format of ld.so output.
++* Dynamic Linker Diagnostics Values::  Data contain in ld.so output.
++@end menu
++
++@node Dynamic Linker Diagnostics Format
++@subsubsection Dynamic Linker Diagnostics Format
+ 
+ As seen above, diagnostic lines assign values (integers or strings) to a
+ sequence of labeled subscripts, separated by @samp{.}.  Some subscripts
+@@ -132,7 +139,8 @@ subscript = label [index]
+ line = subscript *("." subscript) "=" value
+ @end smallexample
+ 
+-@subsection Dynamic Linker Diagnostics Values
++@node Dynamic Linker Diagnostics Values
++@subsubsection Dynamic Linker Diagnostics Values
+ 
+ As mentioned above, the set of diagnostics may change between
+ @theglibc{} releases.  Nevertheless, the following table documents a few
diff --git a/glibc-RHEL-22165-3.patch b/glibc-RHEL-22165-3.patch
new file mode 100644
index 0000000..392e746
--- /dev/null
+++ b/glibc-RHEL-22165-3.patch
@@ -0,0 +1,144 @@
+commit f8d8b1b1e6d3b8b93f224efc796b7ea083fdb83f
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Mon Apr 8 16:48:55 2024 +0200
+
+    aarch64: Enhanced CPU diagnostics for ld.so
+    
+    This prints some information from struct cpu_features, and the midr_el1
+    and dczid_el0 system register contents on every CPU.
+    
+    Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
+
+    Modified for RHEL by: Patsy Griffin <patsy@redhat.com>
+    Diagnostics for the cpu_features mops and prefer_sve_ifuncs are not
+    currently supported on aarch64.
+
+diff -Nrup a/manual/dynlink.texi b/manual/dynlink.texi
+--- a/manual/dynlink.texi	2024-05-31 20:55:08.238959456 -0400
++++ b/manual/dynlink.texi	2024-05-31 20:55:41.298121623 -0400
+@@ -224,6 +224,40 @@ reflect adjustment by @theglibc{}.
+ These Linux-specific items show the values of @code{struct utsname}, as
+ reported by the @code{uname} function.  @xref{Platform Type}.
+ 
++@item aarch64.cpu_features.@dots{}
++These items are specific to the AArch64 architectures.  They report data
++@theglibc{} uses to activate conditionally supported features such as
++BTI and MTE, and to select alternative function implementations.
++
++@item aarch64.processor[@var{index}].@dots{}
++These are additional items for the AArch64 architecture and are
++described below.
++
++@item aarch64.processor[@var{index}].requested=@var{kernel-cpu}
++The kernel is told to run the subsequent probing on the CPU numbered
++@var{kernel-cpu}.  The values @var{kernel-cpu} and @var{index} can be
++distinct if there are gaps in the process CPU affinity mask.  This line
++is not included if CPU affinity mask information is not available.
++
++@item aarch64.processor[@var{index}].observed=@var{kernel-cpu}
++This line reports the kernel CPU number @var{kernel-cpu} on which the
++probing code initially ran.  If the CPU number cannot be obtained,
++this line is not printed.
++
++@item aarch64.processor[@var{index}].observed_node=@var{node}
++This reports the observed NUMA node number, as reported by the
++@code{getcpu} system call.  If this information cannot be obtained, this
++line is not printed.
++
++@item aarch64.processor[@var{index}].midr_el1=@var{value}
++The value of the @code{midr_el1} system register on the processor
++@var{index}.  This line is only printed if the kernel indicates that
++this system register is supported.
++
++@item aarch64.processor[@var{index}].dczid_el0=@var{value}
++The value of the @code{dczid_el0} system register on the processor
++@var{index}.
++
+ @cindex CPUID (diagnostics)
+ @item x86.cpu_features.@dots{}
+ These items are specific to the i386 and x86-64 architectures.  They
+diff -Nrup a/sysdeps/aarch64/dl-diagnostics-cpu.c b/sysdeps/aarch64/dl-diagnostics-cpu.c
+--- a/sysdeps/aarch64/dl-diagnostics-cpu.c	1969-12-31 19:00:00.000000000 -0500
++++ b/sysdeps/aarch64/dl-diagnostics-cpu.c	2024-05-31 20:57:23.536623129 -0400
+@@ -0,0 +1,81 @@
++/* Print CPU diagnostics data in ld.so.  AArch64 version.
++   Copyright (C) 2021-2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <dl-diagnostics.h>
++
++#include <cpu-features.h>
++#include <dl-iterate_cpu.h>
++#include <ldsodefs.h>
++#include <sys/auxv.h>
++
++static void
++print_cpu_features_value (const char *label, uint64_t value)
++{
++  _dl_printf ("aarch64.cpu_features.");
++  _dl_diagnostics_print_labeled_value (label, value);
++}
++
++static void
++print_per_cpu_value (const struct dl_iterate_cpu *dic,
++                     const char *label, uint64_t value)
++{
++  _dl_printf ("aarch64.processor[0x%x].", dic->processor_index);
++  _dl_diagnostics_print_labeled_value (label, value);
++}
++
++void
++_dl_diagnostics_cpu (void)
++{
++  print_cpu_features_value ("bti", GLRO (dl_aarch64_cpu_features).bti);
++  print_cpu_features_value ("midr_el1",
++                            GLRO (dl_aarch64_cpu_features).midr_el1);
++  print_cpu_features_value ("mte_state",
++                            GLRO (dl_aarch64_cpu_features).mte_state);
++  print_cpu_features_value ("sve", GLRO (dl_aarch64_cpu_features).sve);
++  print_cpu_features_value ("zva_size",
++                            GLRO (dl_aarch64_cpu_features).zva_size);
++
++  struct dl_iterate_cpu dic;
++  _dl_iterate_cpu_init (&dic);
++
++  while (_dl_iterate_cpu_next (&dic))
++    {
++      if (dic.requested_cpu >= 0)
++        _dl_printf ("aarch64.processor[0x%x].requested=0x%x\n",
++                    dic.processor_index, dic.requested_cpu);
++      if (dic.actual_cpu >= 0)
++        _dl_printf ("aarch64.processor[0x%x].observed=0x%x\n",
++                    dic.processor_index, dic.actual_cpu);
++      if (dic.actual_node >= 0)
++        _dl_printf ("aarch64.processor[0x%x].observed_node=0x%x\n",
++                    dic.processor_index, dic.actual_node);
++
++      if (GLRO (dl_hwcap) & HWCAP_CPUID)
++        {
++          uint64_t midr_el1;
++          asm ("mrs %0, midr_el1" : "=r" (midr_el1));
++          print_per_cpu_value (&dic, "midr_el1", midr_el1);
++        }
++
++      {
++        uint64_t dczid_el0;
++        asm ("mrs %0, dczid_el0" : "=r" (dczid_el0));
++        print_per_cpu_value (&dic, "dczid_el0", dczid_el0);
++      }
++    }
++}
diff --git a/glibc-RHEL-22165-4.patch b/glibc-RHEL-22165-4.patch
new file mode 100644
index 0000000..0335b6f
--- /dev/null
+++ b/glibc-RHEL-22165-4.patch
@@ -0,0 +1,510 @@
+commit 7a430f40c46acfa7ce4c3bff193b278c190b2efc
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Mon Apr 8 16:48:55 2024 +0200
+
+    x86: Add generic CPUID data dumper to ld.so --list-diagnostics
+
+    This is surprisingly difficult to implement if the goal is to produce
+    reasonably sized output.  With the current approaches to output
+    compression (suppressing zeros and repeated results between CPUs,
+    folding ranges of identical subleaves, dealing with the %ecx
+    reflection issue), the output is less than 600 KiB even for systems
+    with 256 logical CPUs.
+
+    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
+
+diff -Nrup a/manual/dynlink.texi b/manual/dynlink.texi
+--- a/manual/dynlink.texi	2024-06-03 08:42:17.011026573 -0400
++++ b/manual/dynlink.texi	2024-06-03 08:55:18.607771972 -0400
+@@ -262,7 +262,90 @@ The value of the @code{dczid_el0} system
+ @item x86.cpu_features.@dots{}
+ These items are specific to the i386 and x86-64 architectures.  They
+ reflect supported CPU features and information on cache geometry, mostly
+-collected using the @code{CPUID} instruction.
++collected using the CPUID instruction.
++
++@item x86.processor[@var{index}].@dots{}
++These are additional items for the i386 and x86-64 architectures, as
++described below.  They mostly contain raw data from the CPUID
++instruction.  The probes are performed for each active CPU for the
++@code{ld.so} process, and data for different probed CPUs receives a
++uniqe @var{index} value.  Some CPUID data is expected to differ from CPU
++core to CPU core.  In some cases, CPUs are not correctly initialized and
++indicate the presence of different feature sets.
++
++@item x86.processor[@var{index}].requested=@var{kernel-cpu}
++The kernel is told to run the subsequent probing on the CPU numbered
++@var{kernel-cpu}.  The values @var{kernel-cpu} and @var{index} can be
++distinct if there are gaps in the process CPU affinity mask.  This line
++is not included if CPU affinity mask information is not available.
++
++@item x86.processor[@var{index}].observed=@var{kernel-cpu}
++This line reports the kernel CPU number @var{kernel-cpu} on which the
++probing code initially ran.  If the CPU number cannot be obtained,
++this line is not printed.
++
++@item x86.processor[@var{index}].observed_node=@var{node}
++This reports the observed NUMA node number, as reported by the
++@code{getcpu} system call.  If this information cannot be obtained, this
++line is not printed.
++
++@item x86.processor[@var{index}].cpuid_leaves=@var{count}
++This line indicates that @var{count} distinct CPUID leaves were
++encountered.  (This reflects internal @code{ld.so} storage space, it
++does not directly correspond to @code{CPUID} enumeration ranges.)
++
++@item x86.processor[@var{index}].ecx_limit=@var{value}
++The CPUID data extraction code uses a brute-force approach to enumerate
++subleaves (see the @samp{.subleaf_eax} lines below).  The last
++@code{%rcx} value used in a CPUID query on this probed CPU was
++@var{value}.
++
++@item x86.processor[@var{index}].cpuid.eax[@var{query_eax}].eax=@var{eax}
++@itemx x86.processor[@var{index}].cpuid.eax[@var{query_eax}].ebx=@var{ebx}
++@itemx x86.processor[@var{index}].cpuid.eax[@var{query_eax}].ecx=@var{ecx}
++@itemx x86.processor[@var{index}].cpuid.eax[@var{query_eax}].edx=@var{edx}
++These lines report the register contents after executing the CPUID
++instruction with @samp{%rax == @var{query_eax}} and @samp{%rcx == 0} (a
++@dfn{leaf}).  For the first probed CPU (with a zero @var{index}), only
++leaves with non-zero register contents are reported.  For subsequent
++CPUs, only leaves whose register contents differs from the previously
++probed CPUs (with @var{index} one less) are reported.
++
++Basic and extended leaves are reported using the same syntax.  This
++means there is a large jump in @var{query_eax} for the first reported
++extended leaf.
++
++@item x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].eax=@var{eax}
++@itemx x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].ebx=@var{ebx}
++@itemx x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].ecx=@var{ecx}
++@itemx x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].edx=@var{edx}
++This is similar to the leaves above, but for a @dfn{subleaf}.  For
++subleaves, the CPUID instruction is executed with @samp{%rax ==
++@var{query_eax}} and @samp{%rcx == @var{query_ecx}}, so the result
++depends on both register values.  The same rules about filtering zero
++and identical results apply.
++
++@item x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].until_ecx=@var{ecx_limit}
++Some CPUID results are the same regardless the @var{query_ecx} value.
++If this situation is detected, a line with the @samp{.until_ecx}
++selector ins included, and this indicates that the CPUID register
++contents is the same for @code{%rcx} values between @var{query_ecx}
++and @var{ecx_limit} (inclusive).
++
++@item x86.processor[@var{index}].cpuid.subleaf_eax[@var{query_eax}].ecx[@var{query_ecx}].ecx_query_mask=0xff
++This line indicates that in an @samp{.until_ecx} range, the CPUID
++instruction preserved the lowested 8 bits of the input @code{%rcx} in
++the output @code{%rcx} registers.  Otherwise, the subleaves in the range
++have identical values.  This special treatment is necessary to report
++compact range information in case such copying occurs (because the
++subleaves would otherwise be all different).
++
++@item x86.processor[@var{index}].xgetbv.ecx[@var{query_ecx}]=@var{result}
++This line shows the 64-bit @var{result} value in the @code{%rdx:%rax}
++register pair after executing the XGETBV instruction with @code{%rcx}
++set to @var{query_ecx}.  Zero values and values matching the previously
++probed CPU are omitted.  Nothing is printed if the system does not
++support the XGETBV instruction.
+ @end table
+ 
+ @node Dynamic Linker Introspection
+diff -Nrup a/sysdeps/x86/dl-diagnostics-cpu.c b/sysdeps/x86/dl-diagnostics-cpu.c
+--- a/sysdeps/x86/dl-diagnostics-cpu.c	2024-06-03 08:42:16.825025689 -0400
++++ b/sysdeps/x86/dl-diagnostics-cpu.c	2024-06-03 09:49:47.528510916 -0400
+@@ -17,7 +17,18 @@
+    <https://www.gnu.org/licenses/>.  */
+ 
+ #include <dl-diagnostics.h>
++
++#include <array_length.h>
++#include <cpu-features.h>
++#include <cpuid.h>
++#include <dl-iterate_cpu.h>
+ #include <ldsodefs.h>
++#include <stdbool.h>
++#include <string.h>
++#include <sysdep.h>
++
++/* The generic CPUID dumping code.  */
++static void _dl_diagnostics_cpuid (void);
+ 
+ static void
+ print_cpu_features_value (const char *label, uint64_t value)
+@@ -119,4 +130,377 @@ _dl_diagnostics_cpu (void)
+                   "last cpu_features field has been printed");
+   print_cpu_features_value ("cachesize_non_temporal_divisor",
+ 			    __rtld_global_ro_cachesize_non_temporal_divisor);
++
++  _dl_diagnostics_cpuid ();
++}
++
++/* The following code implements a generic CPUID dumper that tries to
++   gather CPUID data without knowing about CPUID implementation
++   details.  */
++
++/* Register arguments to CPUID.  Multiple ECX subleaf values yielding
++   the same result are combined, to shorten the output.  Both
++   identical matches (EAX to EDX are the same) and matches where EAX,
++   EBX, EDX, and ECX are equal except in the lower byte, which must
++   match the query ECX value.  The latter is needed to compress ranges
++   on CPUs which preserve the lowest byte in ECX if an unknown leaf is
++   queried.  */
++struct cpuid_query
++{
++  unsigned int eax;
++  unsigned ecx_first;
++  unsigned ecx_last;
++  bool ecx_preserves_query_byte;
++};
++
++/* Single integer value that can be used for sorting/ordering
++   comparisons.  Uses Q->eax and Q->ecx_first only because ecx_last is
++   always greater than the previous ecx_first value and less than the
++   subsequent one.  */
++static inline unsigned long long int
++cpuid_query_combined (struct cpuid_query *q)
++{
++  /* ecx can be -1 (that is, ~0U).  If this happens, this the only ecx
++     value for this eax value, so the ordering does not matter.  */
++  return ((unsigned long long int) q->eax << 32) | (unsigned int) q->ecx_first;
++};
++
++/* Used for differential reporting of zero/non-zero values.  */
++static const struct cpuid_registers cpuid_registers_zero;
++
++/* Register arguments to CPUID paired with the results that came back.  */
++struct cpuid_query_result
++{
++  struct cpuid_query q;
++  struct cpuid_registers r;
++};
++
++/* During a first enumeration pass, we try to collect data for
++  cpuid_initial_subleaf_limit subleaves per leaf/EAX value.  If we run
++  out of space, we try once more with applying the lower limit.  */
++enum { cpuid_main_leaf_limit = 128 };
++enum { cpuid_initial_subleaf_limit = 512 };
++enum { cpuid_subleaf_limit = 32 };
++
++/* Offset of the extended leaf area.  */
++enum {cpuid_extended_leaf_offset = 0x80000000 };
++
++/* Collected CPUID data.  Everything is stored in a statically sized
++   array that is sized so that the second pass will collect some data
++   for all leaves, after the limit is applied.  On the second pass,
++   ecx_limit is set to cpuid_subleaf_limit.  */
++struct cpuid_collected_data
++{
++  unsigned int used;
++  unsigned int ecx_limit;
++  uint64_t xgetbv_ecx_0;
++  struct cpuid_query_result qr[cpuid_main_leaf_limit
++                               * 2 * cpuid_subleaf_limit];
++};
++
++/* Fill in the result of a CPUID query.  Returns true if there is
++   room, false if nothing could be stored.  */
++static bool
++_dl_diagnostics_cpuid_store (struct cpuid_collected_data *ccd,
++                             unsigned eax, int ecx)
++{
++  if (ccd->used >= array_length (ccd->qr))
++    return false;
++
++  /* Tentatively fill in the next value.  */
++  __cpuid_count (eax, ecx,
++                 ccd->qr[ccd->used].r.eax,
++                 ccd->qr[ccd->used].r.ebx,
++                 ccd->qr[ccd->used].r.ecx,
++                 ccd->qr[ccd->used].r.edx);
++
++  /* If the ECX subleaf is next subleaf after the previous one (for
++     the same leaf), and the values are the same, merge the result
++     with the already-stored one.  Do this before skipping zero
++     leaves, which avoids artifiacts for ECX == 256 queries.  */
++  if (ccd->used > 0
++      && ccd->qr[ccd->used - 1].q.eax == eax
++      && ccd->qr[ccd->used - 1].q.ecx_last + 1 == ecx)
++    {
++      /* Exact match of the previous result. Ignore the value of
++         ecx_preserves_query_byte if this is a singleton range so far
++         because we can treat ECX as fixed if the same value repeats.  */
++      if ((!ccd->qr[ccd->used - 1].q.ecx_preserves_query_byte
++           || (ccd->qr[ccd->used - 1].q.ecx_first
++               == ccd->qr[ccd->used - 1].q.ecx_last))
++          && memcmp (&ccd->qr[ccd->used - 1].r, &ccd->qr[ccd->used].r,
++                     sizeof (ccd->qr[ccd->used].r)) == 0)
++        {
++          ccd->qr[ccd->used - 1].q.ecx_last = ecx;
++          /* ECX is now fixed because the same value has been observed
++             twice, even if we had a low-byte match before.  */
++          ccd->qr[ccd->used - 1].q.ecx_preserves_query_byte = false;
++          return true;
++        }
++      /* Match except for the low byte in ECX, which must match the
++         incoming ECX value.  */
++      if (ccd->qr[ccd->used - 1].q.ecx_preserves_query_byte
++          && (ecx & 0xff) == (ccd->qr[ccd->used].r.ecx & 0xff)
++          && ccd->qr[ccd->used].r.eax == ccd->qr[ccd->used - 1].r.eax
++          && ccd->qr[ccd->used].r.ebx == ccd->qr[ccd->used - 1].r.ebx
++          && ((ccd->qr[ccd->used].r.ecx & 0xffffff00)
++              == (ccd->qr[ccd->used - 1].r.ecx & 0xffffff00))
++          && ccd->qr[ccd->used].r.edx == ccd->qr[ccd->used - 1].r.edx)
++        {
++          ccd->qr[ccd->used - 1].q.ecx_last = ecx;
++          return true;
++        }
++    }
++
++  /* Do not store zero results.  All-zero values usually mean that the
++     subleaf is unsupported.  */
++  if (ccd->qr[ccd->used].r.eax == 0
++      && ccd->qr[ccd->used].r.ebx == 0
++      && ccd->qr[ccd->used].r.ecx == 0
++      && ccd->qr[ccd->used].r.edx == 0)
++    return true;
++
++  /* The result needs to be stored.  Fill in the query parameters and
++     consume the storage.  */
++  ccd->qr[ccd->used].q.eax = eax;
++  ccd->qr[ccd->used].q.ecx_first = ecx;
++  ccd->qr[ccd->used].q.ecx_last = ecx;
++  ccd->qr[ccd->used].q.ecx_preserves_query_byte
++    = (ecx & 0xff) == (ccd->qr[ccd->used].r.ecx & 0xff);
++  ++ccd->used;
++  return true;
++}
++
++/* Collected CPUID data into *CCD.  If LIMIT, apply per-leaf limits to
++   avoid exceeding the pre-allocated space.  Return true if all data
++   could be stored, false if the retrying without a limit is
++   requested.  */
++static bool
++_dl_diagnostics_cpuid_collect_1 (struct cpuid_collected_data *ccd, bool limit)
++{
++  ccd->used = 0;
++  ccd->ecx_limit
++    = (limit ? cpuid_subleaf_limit : cpuid_initial_subleaf_limit) - 1;
++  _dl_diagnostics_cpuid_store (ccd, 0x00, 0x00);
++  if (ccd->used == 0)
++    /* CPUID reported all 0.  Should not happen.  */
++    return true;
++  unsigned int maximum_leaf = ccd->qr[0x00].r.eax;
++  if (limit && maximum_leaf >= cpuid_main_leaf_limit)
++    maximum_leaf = cpuid_main_leaf_limit - 1;
++
++  for (unsigned int eax = 1; eax <= maximum_leaf; ++eax)
++    {
++      for (unsigned int ecx = 0; ecx <= ccd->ecx_limit; ++ecx)
++        if (!_dl_diagnostics_cpuid_store (ccd, eax, ecx))
++          return false;
++    }
++
++  if (!_dl_diagnostics_cpuid_store (ccd, cpuid_extended_leaf_offset, 0x00))
++    return false;
++  maximum_leaf = ccd->qr[ccd->used - 1].r.eax;
++  if (maximum_leaf < cpuid_extended_leaf_offset)
++    /* No extended CPUID information.  */
++    return true;
++  if (limit
++      && maximum_leaf - cpuid_extended_leaf_offset >= cpuid_main_leaf_limit)
++    maximum_leaf = cpuid_extended_leaf_offset + cpuid_main_leaf_limit - 1;
++  for (unsigned int eax = cpuid_extended_leaf_offset + 1;
++       eax <= maximum_leaf; ++eax)
++    {
++      for (unsigned int ecx = 0; ecx <= ccd->ecx_limit; ++ecx)
++        if (!_dl_diagnostics_cpuid_store (ccd, eax, ecx))
++          return false;
++    }
++  return true;
++}
++
++/* Call _dl_diagnostics_cpuid_collect_1 twice if necessary, the
++   second time with the limit applied.  */
++static void
++_dl_diagnostics_cpuid_collect (struct cpuid_collected_data *ccd)
++{
++  if (!_dl_diagnostics_cpuid_collect_1 (ccd, false))
++    _dl_diagnostics_cpuid_collect_1 (ccd, true);
++
++  /* Re-use the result of the official feature probing here.  */
++  const struct cpu_features *cpu_features = __get_cpu_features ();
++  if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
++    {
++      unsigned int xcrlow;
++      unsigned int xcrhigh;
++      asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
++      ccd->xgetbv_ecx_0 = ((uint64_t) xcrhigh << 32) + xcrlow;
++    }
++  else
++    ccd->xgetbv_ecx_0 = 0;
++}
++
++/* Print a CPUID register value (passed as REG_VALUE) if it differs
++   from the expected REG_REFERENCE value.  PROCESSOR_INDEX is the
++   process sequence number (always starting at zero; not a kernel ID).  */
++static void
++_dl_diagnostics_cpuid_print_reg (unsigned int processor_index,
++                                 const struct cpuid_query *q,
++                                 const char *reg_label, unsigned int reg_value,
++                                 bool subleaf)
++{
++  if (subleaf)
++    _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]"
++                ".ecx[0x%x].%s=0x%x\n",
++                processor_index, q->eax, q->ecx_first, reg_label, reg_value);
++  else
++    _dl_printf ("x86.processor[0x%x].cpuid.eax[0x%x].%s=0x%x\n",
++                processor_index, q->eax, reg_label, reg_value);
++}
++
++/* Print CPUID result values in *RESULT for the query in
++   CCD->qr[CCD_IDX].  PROCESSOR_INDEX is the process sequence number
++   (always starting at zero; not a kernel ID).  */
++static void
++_dl_diagnostics_cpuid_print_query (unsigned int processor_index,
++                                   struct cpuid_collected_data *ccd,
++                                   unsigned int ccd_idx,
++                                   const struct cpuid_registers *result)
++{
++  /* Treat this as a value if subleaves if ecx isn't zero (maybe
++     within the [ecx_fist, ecx_last] range), or if eax matches its
++     neighbors.  If the range is [0, ecx_limit], then the subleaves
++     are not distinct (independently of ecx_preserves_query_byte),
++     so do not report them separately.  */
++  struct cpuid_query *q = &ccd->qr[ccd_idx].q;
++  bool subleaf = (q->ecx_first > 0
++                  || (q->ecx_first != q->ecx_last
++                      && !(q->ecx_first == 0 && q->ecx_last == ccd->ecx_limit))
++                  || (ccd_idx > 0 && q->eax == ccd->qr[ccd_idx - 1].q.eax)
++                  || (ccd_idx + 1 < ccd->used
++                      && q->eax == ccd->qr[ccd_idx + 1].q.eax));
++  _dl_diagnostics_cpuid_print_reg (processor_index, q, "eax", result->eax,
++                                   subleaf);
++  _dl_diagnostics_cpuid_print_reg (processor_index, q, "ebx", result->ebx,
++                                   subleaf);
++  _dl_diagnostics_cpuid_print_reg (processor_index, q, "ecx", result->ecx,
++                                   subleaf);
++  _dl_diagnostics_cpuid_print_reg (processor_index, q, "edx", result->edx,
++                                   subleaf);
++
++  if (subleaf && q->ecx_first != q->ecx_last)
++    {
++      _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]"
++                  ".ecx[0x%x].until_ecx=0x%x\n",
++                  processor_index, q->eax, q->ecx_first, q->ecx_last);
++      if (q->ecx_preserves_query_byte)
++        _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]"
++                    ".ecx[0x%x].ecx_query_mask=0xff\n",
++                    processor_index, q->eax, q->ecx_first);
++    }
++}
++
++/* Perform differential reporting of the data in *CURRENT against
++   *BASE.  REQUESTED_CPU is the kernel CPU ID the thread was
++   configured to run on, or -1 if no configuration was possible.
++   PROCESSOR_INDEX is the process sequence number (always starting at
++   zero; not a kernel ID).  */
++static void
++_dl_diagnostics_cpuid_report (struct dl_iterate_cpu *dci,
++                              struct cpuid_collected_data *current,
++                              struct cpuid_collected_data *base)
++{
++  if (dci->requested_cpu >= 0)
++    _dl_printf ("x86.processor[0x%x].requested=0x%x\n",
++                dci->processor_index, dci->requested_cpu);
++  if (dci->actual_cpu >= 0)
++    _dl_printf ("x86.processor[0x%x].observed=0x%x\n",
++                dci->processor_index, dci->actual_cpu);
++  if (dci->actual_node >= 0)
++    _dl_printf ("x86.processor[0x%x].observed_node=0x%x\n",
++                dci->processor_index, dci->actual_node);
++
++  _dl_printf ("x86.processor[0x%x].cpuid_leaves=0x%x\n",
++              dci->processor_index, current->used);
++  _dl_printf ("x86.processor[0x%x].ecx_limit=0x%x\n",
++              dci->processor_index, current->ecx_limit);
++
++  unsigned int base_idx = 0;
++  for (unsigned int current_idx = 0; current_idx < current->used;
++       ++current_idx)
++    {
++      /* Report missing data on the current CPU as 0.  */
++      unsigned long long int current_query
++        = cpuid_query_combined (&current->qr[current_idx].q);
++      while (base_idx < base->used
++             && cpuid_query_combined (&base->qr[base_idx].q) < current_query)
++      {
++        _dl_diagnostics_cpuid_print_query (dci->processor_index,
++                                           base, base_idx,
++                                           &cpuid_registers_zero);
++        ++base_idx;
++      }
++
++      if (base_idx < base->used
++          && cpuid_query_combined (&base->qr[base_idx].q) == current_query)
++        {
++          _Static_assert (sizeof (struct cpuid_registers) == 4 * 4,
++                          "no padding in struct cpuid_registers");
++          if (current->qr[current_idx].q.ecx_last
++              != base->qr[base_idx].q.ecx_last
++              || memcmp (&current->qr[current_idx].r,
++                         &base->qr[base_idx].r,
++                         sizeof (struct cpuid_registers)) != 0)
++              /* The ECX range or the values have changed.  Show the
++                 new values.  */
++            _dl_diagnostics_cpuid_print_query (dci->processor_index,
++                                               current, current_idx,
++                                               &current->qr[current_idx].r);
++          ++base_idx;
++        }
++      else
++        /* Data is absent in the base reference.  Report the new data.  */
++        _dl_diagnostics_cpuid_print_query (dci->processor_index,
++                                           current, current_idx,
++                                           &current->qr[current_idx].r);
++    }
++
++  if (current->xgetbv_ecx_0 != base->xgetbv_ecx_0)
++    {
++      /* Re-use the 64-bit printing routine.  */
++      _dl_printf ("x86.processor[0x%x].", dci->processor_index);
++      _dl_diagnostics_print_labeled_value ("xgetbv.ecx[0x0]",
++                                           current->xgetbv_ecx_0);
++    }
++}
++
++static void
++_dl_diagnostics_cpuid (void)
++{
++#if !HAS_CPUID
++  /* CPUID is not supported, so there is nothing to dump.  */
++  if (__get_cpuid_max (0, 0) == 0)
++    return;
++#endif
++
++  struct dl_iterate_cpu dic;
++  _dl_iterate_cpu_init (&dic);
++
++  /* Two copies of the data are used.  Data is written to the index
++     (dic.processor_index & 1).  The previous version against which the
++     data dump is reported is at index !(processor_index & 1).  */
++  struct cpuid_collected_data ccd[2];
++
++  /* The initial data is presumed to be all zero.  Zero results are
++     not recorded.  */
++  ccd[1].used = 0;
++  ccd[1].xgetbv_ecx_0 = 0;
++
++  /* Run the CPUID probing on a specific CPU.  There are expected
++     differences for encoding core IDs and topology information in
++     CPUID output, but some firmware/kernel bugs also may result in
++     asymmetric data across CPUs in some cases.  */
++  while (_dl_iterate_cpu_next (&dic))
++    {
++      _dl_diagnostics_cpuid_collect (&ccd[dic.processor_index & 1]);
++      _dl_diagnostics_cpuid_report
++        (&dic, &ccd[dic.processor_index & 1],
++         &ccd[!(dic.processor_index & 1)]);
++    }
+ }
diff --git a/glibc-RHEL-22165-5.patch b/glibc-RHEL-22165-5.patch
new file mode 100644
index 0000000..76b42ea
--- /dev/null
+++ b/glibc-RHEL-22165-5.patch
@@ -0,0 +1,262 @@
+commit 5653ccd847f0cd3a98906e44c97c71d68652d326
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Mon Apr 8 16:48:55 2024 +0200
+
+    elf: Add CPU iteration support for future use in ld.so diagnostics
+    
+    Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
+
+diff --git a/elf/dl-iterate_cpu.h b/elf/dl-iterate_cpu.h
+new file mode 100644
+index 0000000000..60db167b13
+--- /dev/null
++++ b/elf/dl-iterate_cpu.h
+@@ -0,0 +1,136 @@
++/* Iterate over all CPUs, for CPU-specific diagnostics.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#ifndef DL_ITERATE_CPU_H
++#define DL_ITERATE_CPU_H
++
++#include <dl-affinity.h>
++#include <stdbool.h>
++
++struct dl_iterate_cpu
++{
++  /* Sequential iteration count, starting at 0.  */
++  unsigned int processor_index;
++
++  /* Requested CPU.  Can be -1 if affinity could not be set.  */
++  int requested_cpu;
++
++  /* Observed current CPU.  -1 if unavailable.  */
++  int actual_cpu;
++
++  /* Observed node ID for the CPU.  -1 if unavailable.  */
++  int actual_node;
++
++  /* Internal fields to implement the iteration.   */
++
++  /* Affinity as obtained by _dl_iterate_cpu_init, using
++     _dl_getaffinity.  Space for 8,192 CPUs.  */
++  unsigned long int mask_reference[8192 / sizeof (unsigned long int) / 8];
++
++  /* This array is used by _dl_setaffinity calls.  */
++  unsigned long int mask_request[8192 / sizeof (unsigned long int) / 8];
++
++  /* Return value from the initial _dl_getaffinity call.   */
++  int length_reference;
++};
++
++static void
++_dl_iterate_cpu_init (struct dl_iterate_cpu *dic)
++{
++  dic->length_reference
++    = _dl_getaffinity (dic->mask_reference, sizeof (dic->mask_reference));
++  /* Prepare for the first _dl_iterate_cpu_next call.  */
++  dic->processor_index = -1;
++  dic->requested_cpu = -1;
++}
++
++static bool
++_dl_iterate_cpu_next (struct dl_iterate_cpu *dic)
++{
++  ++dic->processor_index;
++
++  if (dic->length_reference > 0)
++    {
++      /* Search for the next CPU to switch to.  */
++      while (true)
++        {
++          ++dic->requested_cpu;
++
++          /* Array index and bit number within the array.  */
++          unsigned int long_index
++            = dic->requested_cpu / sizeof (unsigned long int) / 8;
++          unsigned int bit_index
++            = dic->requested_cpu % (sizeof (unsigned long int) * 8);
++
++          if (long_index * sizeof (unsigned long int) >= dic->length_reference)
++            /* All possible CPUs have been covered.  */
++            return false;
++
++          unsigned long int bit = 1UL << bit_index;
++          if (dic->mask_reference[long_index] & bit)
++            {
++              /* The CPU is available.  Try to select it.  */
++              dic->mask_request[long_index] = bit;
++              if (_dl_setaffinity (dic->mask_request,
++                                   (long_index + 1)
++                                   * sizeof (unsigned long int)) < 0)
++                {
++                  /* Record that we could not perform a CPU request.  */
++                  dic->length_reference = -1;
++
++                  if (dic->processor_index > 0)
++                    /* We already reported something.  There is no need to
++                       continue because the new data is probably not useful.  */
++                    return false;
++                }
++
++              /* Clear the bit in case the next iteration switches to the
++                 next long value.  */
++              dic->mask_request[long_index] = 0;
++
++              /* We found a CPU to run on.  */
++              break;
++            }
++        }
++    }
++  else
++    {
++      /* No way to set CPU affinity.  Iterate just once.  */
++      if (dic->processor_index > 0)
++        return false;
++    }
++
++  /* Fill in the actual CPU information.  CPU pinning may not actually
++     be effective, depending on the container host.  */
++  unsigned int cpu, node;
++  if (_dl_getcpu (&cpu, &node) < 0)
++    {
++      /* No CPU information available.  */
++      dic->actual_cpu = -1;
++      dic->actual_node = -1;
++    }
++  else
++    {
++      dic->actual_cpu = cpu;
++      dic->actual_node = node;
++    }
++
++  return true;
++}
++
++#endif /* DL_ITERATE_CPU_H */
+diff --git a/sysdeps/generic/dl-affinity.h b/sysdeps/generic/dl-affinity.h
+new file mode 100644
+index 0000000000..d117f737e9
+--- /dev/null
++++ b/sysdeps/generic/dl-affinity.h
+@@ -0,0 +1,54 @@
++/* CPU affinity handling for the dynamic linker.  Stub version.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#ifndef DL_AFFINITY_H
++#define DL_AFFINITY_H
++
++#include <errno.h>
++#include <stddef.h>
++
++/* On success, write the current CPU ID to *CPU, and the current node
++   ID to *NODE, and return 0.  Return a negative error code on
++   failure.  */
++static inline int
++_dl_getcpu (unsigned int *cpu, unsigned int *node)
++{
++  return -ENOSYS;
++}
++
++/* On success, write CPU ID affinity bits for the current thread to
++   *BITS, which must be SIZE bytes long, and return the number of
++   bytes updated, a multiple of sizeof (unsigned long int).  On
++   failure, return a negative error code.  */
++static int
++_dl_getaffinity (unsigned long int *bits, size_t size)
++{
++  return -ENOSYS;
++}
++
++/* Set the CPU affinity mask for the current thread to *BITS, using
++   the SIZE bytes from that array, which should be a multiple of
++   sizeof (unsigned long int).  Return 0 on success, and a negative
++   error code on failure.  */
++static int
++_dl_setaffinity (const unsigned long int *bits, size_t size)
++{
++  return -ENOSYS;
++}
++
++#endif /* DL_AFFINITY_H */
+diff --git a/sysdeps/unix/sysv/linux/dl-affinity.h b/sysdeps/unix/sysv/linux/dl-affinity.h
+new file mode 100644
+index 0000000000..bbfede7750
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/dl-affinity.h
+@@ -0,0 +1,46 @@
++/* CPU affinity handling for the dynamic linker.  Linux version.
++   Copyright (C) 2024 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* See sysdeps/generic/dl-affinity.h for documentation of these interfaces.  */
++
++#ifndef DL_AFFINITY_H
++#define DL_AFFINITY_H
++
++#include <sysdep.h>
++#include <stddef.h>
++#include <unistd.h>
++
++static inline int
++_dl_getcpu (unsigned int *cpu, unsigned int *node)
++{
++  return INTERNAL_SYSCALL_CALL (getcpu, cpu, node);
++}
++
++static int
++_dl_getaffinity (unsigned long int *bits, size_t size)
++{
++  return INTERNAL_SYSCALL_CALL (sched_getaffinity, /* TID */ 0, size, bits);
++}
++
++static int
++_dl_setaffinity (const unsigned long int *bits, size_t size)
++{
++  return INTERNAL_SYSCALL_CALL (sched_setaffinity, /* TID */ 0, size, bits);
++}
++
++#endif /* DL_AFFINITY_H */
diff --git a/glibc.spec b/glibc.spec
index 8ebbf50..28e00a9 100644
--- a/glibc.spec
+++ b/glibc.spec
@@ -155,7 +155,7 @@ end \
 Summary: The GNU libc libraries
 Name: glibc
 Version: %{glibcversion}
-Release: 107%{?dist}
+Release: 108%{?dist}
 
 # In general, GPLv2+ is used by programs, LGPLv2+ is used for
 # libraries.
@@ -820,6 +820,11 @@ Patch583: glibc-RHEL-25046.patch
 Patch584: glibc-RHEL-32681-1.patch
 Patch585: glibc-RHEL-32681-2.patch
 Patch586: glibc-RHEL-39006.patch
+Patch587: glibc-RHEL-22165-1.patch
+Patch588: glibc-RHEL-22165-2.patch
+Patch589: glibc-RHEL-22165-3.patch
+Patch590: glibc-RHEL-22165-4.patch
+Patch591: glibc-RHEL-22165-5.patch
 
 ##############################################################################
 # Continued list of core "glibc" package information:
@@ -2978,6 +2983,10 @@ update_gconv_modules_cache ()
 %endif
 
 %changelog
+* Thu Jun 06 2024 Patsy Griffin <patsy@redhat.com> - 2.34-108
+- aarch64: enhance ld.so --list-diagnostics on aarch64 similar
+  to x86_64 (RHEL-22165)
+
 * Tue May 28 2024 <dj@redhat.com> - 2.34-107
 - Add MMAP_ABOVE4G from Linux 6.6 to sys/mman.h (RHEL-39006)