import glibc-2.28-180.el8

This commit is contained in:
CentOS Sources 2021-12-24 04:19:18 +00:00 committed by Stepan Oksanichenko
parent e885cc3798
commit 33ddd02b4d
38 changed files with 6232 additions and 2 deletions

View File

@ -0,0 +1,38 @@
This patch is a downstream-only variant of this upstream commit:
commit 45b1e17e9150dbd9ac2d578579063fbfa8e1b327
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Thu Dec 17 10:03:05 2020 +0000
aarch64: use PTR_ARG and SIZE_ARG instead of DELOUSE
DELOUSE was added to asm code to make them compatible with non-LP64
ABIs, but it is an unfortunate name and the code was not compatible
with ABIs where pointer and size_t are different. Glibc currently
only supports the LP64 ABI so these macros are not really needed or
tested, but for now the name is changed to be more meaningful instead
of removing them completely.
Some DELOUSE macros were dropped: clone, strlen and strnlen used it
unnecessarily.
The out of tree ILP32 patches are currently not maintained and will
likely need a rework to rebase them on top of the time64 changes.
Keeping the DELOUSE macro avoids the need to update all string
functions. Lack of BTI markers and architecture variants cause many
conflicts in a full upstream backport.
diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h
index 5b30709436d3acea..1bcf15d4f18586ba 100644
--- a/sysdeps/aarch64/sysdep.h
+++ b/sysdeps/aarch64/sysdep.h
@@ -32,6 +32,8 @@
# define PTR_LOG_SIZE 2
# define DELOUSE(n) mov w##n, w##n
#endif
+#define PTR_ARG(n) DELOUSE(n)
+#define SIZE_ARG(n) DELOUSE(n)
#define PTR_SIZE (1<<PTR_LOG_SIZE)

View File

@ -0,0 +1,88 @@
commit 77d175e14e5f4cf24e9579c03eef5d006a286316
Author: Naohiro Tamura <naohirot@jp.fujitsu.com>
Date: Wed May 12 09:26:40 2021 +0000
config: Added HAVE_AARCH64_SVE_ASM for aarch64
This patch checks if assembler supports '-march=armv8.2-a+sve' to
generate SVE code or not, and then define HAVE_AARCH64_SVE_ASM macro.
Conflicts:
config.h.in
(missing PAC+BTI support downstream, missing other ports)
diff --git a/config.h.in b/config.h.in
index 8520b0fa8d4668fb..94d5ea367e10f849 100644
--- a/config.h.in
+++ b/config.h.in
@@ -112,6 +112,11 @@
/* AArch64 big endian ABI */
#undef HAVE_AARCH64_BE
+/* Assembler support ARMv8.2-A SVE.
+ This macro becomes obsolete when glibc increased the minimum
+ required version of GNU 'binutils' to 2.28 or later. */
+#define HAVE_AARCH64_SVE_ASM 0
+
/* RISC-V integer ABI for ld.so. */
#undef RISCV_ABI_XLEN
diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure
index f78a79338aba1e34..9fb713155d4ee6d8 100644
--- a/sysdeps/aarch64/configure
+++ b/sysdeps/aarch64/configure
@@ -212,3 +212,31 @@ fi
$as_echo "$libc_cv_aarch64_variant_pcs" >&6; }
config_vars="$config_vars
aarch64-variant-pcs = $libc_cv_aarch64_variant_pcs"
+
+# Check if asm support armv8.2-a+sve
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SVE support in assembler" >&5
+$as_echo_n "checking for SVE support in assembler... " >&6; }
+if ${libc_cv_asm_sve+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat > conftest.s <<\EOF
+ ptrue p0.b
+EOF
+if { ac_try='${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&5'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ libc_cv_aarch64_sve_asm=yes
+else
+ libc_cv_aarch64_sve_asm=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_sve" >&5
+$as_echo "$libc_cv_asm_sve" >&6; }
+if test $libc_cv_aarch64_sve_asm = yes; then
+ $as_echo "#define HAVE_AARCH64_SVE_ASM 1" >>confdefs.h
+
+fi
diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac
index 7f13bfb93b60bfd7..0236cfcdf3c8d10d 100644
--- a/sysdeps/aarch64/configure.ac
+++ b/sysdeps/aarch64/configure.ac
@@ -42,3 +42,18 @@ EOF
fi
rm -rf conftest.*])
LIBC_CONFIG_VAR([aarch64-variant-pcs], [$libc_cv_aarch64_variant_pcs])
+
+# Check if asm support armv8.2-a+sve
+AC_CACHE_CHECK(for SVE support in assembler, libc_cv_asm_sve, [dnl
+cat > conftest.s <<\EOF
+ ptrue p0.b
+EOF
+if AC_TRY_COMMAND(${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&AS_MESSAGE_LOG_FD); then
+ libc_cv_aarch64_sve_asm=yes
+else
+ libc_cv_aarch64_sve_asm=no
+fi
+rm -f conftest*])
+if test $libc_cv_aarch64_sve_asm = yes; then
+ AC_DEFINE(HAVE_AARCH64_SVE_ASM)
+fi

View File

@ -0,0 +1,140 @@
commit 38560563587ad8eafa700c56800ff844f18fbad1
Author: Naohiro Tamura <naohirot@fujitsu.com>
Date: Thu May 20 07:34:37 2021 +0000
aarch64: Added Vector Length Set test helper script
This patch is a test helper script to change Vector Length for child
process. This script can be used as test-wrapper for 'make check'.
Usage examples:
~/build$ make check subdirs=string \
test-wrapper='~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 16'
~/build$ ~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 16 \
make test t=string/test-memcpy
~/build$ ~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 32 \
./debugglibc.sh string/test-memmove
~/build$ ~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 64 \
./testrun.sh string/test-memset
diff --git a/INSTALL b/INSTALL
index 065565093bd76d5b..b3a4370f592c5047 100644
--- a/INSTALL
+++ b/INSTALL
@@ -387,6 +387,10 @@ the same syntax as 'test-wrapper-env', the only difference in its
semantics being starting with an empty set of environment variables
rather than the ambient set.
+ For AArch64 with SVE, when testing the GNU C Library, 'test-wrapper'
+may be set to "SRCDIR/sysdeps/unix/sysv/linux/aarch64/vltest.py
+VECTOR-LENGTH" to change Vector Length.
+
Installing the C Library
========================
diff --git a/manual/install.texi b/manual/install.texi
index 7e9f2be150e6f98a..c262fd56d0cef67b 100644
--- a/manual/install.texi
+++ b/manual/install.texi
@@ -425,6 +425,9 @@ use has the same syntax as @samp{test-wrapper-env}, the only
difference in its semantics being starting with an empty set of
environment variables rather than the ambient set.
+For AArch64 with SVE, when testing @theglibc{}, @samp{test-wrapper}
+may be set to "@var{srcdir}/sysdeps/unix/sysv/linux/aarch64/vltest.py
+@var{vector-length}" to change Vector Length.
@node Running make install
@appendixsec Installing the C Library
diff --git a/sysdeps/unix/sysv/linux/aarch64/vltest.py b/sysdeps/unix/sysv/linux/aarch64/vltest.py
new file mode 100755
index 0000000000000000..bed62ad151e06868
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/aarch64/vltest.py
@@ -0,0 +1,82 @@
+#!/usr/bin/python3
+# Set Scalable Vector Length test helper
+# Copyright (C) 2021 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+"""Set Scalable Vector Length test helper.
+
+Set Scalable Vector Length for child process.
+
+examples:
+
+~/build$ make check subdirs=string \
+test-wrapper='~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 16'
+
+~/build$ ~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 16 \
+make test t=string/test-memcpy
+
+~/build$ ~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 32 \
+./debugglibc.sh string/test-memmove
+
+~/build$ ~/glibc/sysdeps/unix/sysv/linux/aarch64/vltest.py 64 \
+./testrun.sh string/test-memset
+"""
+import argparse
+from ctypes import cdll, CDLL
+import os
+import sys
+
+EXIT_SUCCESS = 0
+EXIT_FAILURE = 1
+EXIT_UNSUPPORTED = 77
+
+AT_HWCAP = 16
+HWCAP_SVE = (1 << 22)
+
+PR_SVE_GET_VL = 51
+PR_SVE_SET_VL = 50
+PR_SVE_SET_VL_ONEXEC = (1 << 18)
+PR_SVE_VL_INHERIT = (1 << 17)
+PR_SVE_VL_LEN_MASK = 0xffff
+
+def main(args):
+ libc = CDLL("libc.so.6")
+ if not libc.getauxval(AT_HWCAP) & HWCAP_SVE:
+ print("CPU doesn't support SVE")
+ sys.exit(EXIT_UNSUPPORTED)
+
+ libc.prctl(PR_SVE_SET_VL,
+ args.vl[0] | PR_SVE_SET_VL_ONEXEC | PR_SVE_VL_INHERIT)
+ os.execvp(args.args[0], args.args)
+ print("exec system call failure")
+ sys.exit(EXIT_FAILURE)
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description=
+ "Set Scalable Vector Length test helper",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+ # positional argument
+ parser.add_argument("vl", nargs=1, type=int,
+ choices=range(16, 257, 16),
+ help=('vector length '\
+ 'which is multiples of 16 from 16 to 256'))
+ # remainDer arguments
+ parser.add_argument('args', nargs=argparse.REMAINDER,
+ help=('args '\
+ 'which is passed to child process'))
+ args = parser.parse_args()
+ main(args)

View File

@ -0,0 +1,623 @@
commit fa527f345cbbe852ec085932fbea979956c195b5
Author: Naohiro Tamura <naohirot@jp.fujitsu.com>
Date: Thu May 27 07:42:35 2021 +0000
aarch64: Added optimized memcpy and memmove for A64FX
This patch optimizes the performance of memcpy/memmove for A64FX [1]
which implements ARMv8-A SVE and has L1 64KB cache per core and L2 8MB
cache per NUMA node.
The performance optimization makes use of Scalable Vector Register
with several techniques such as loop unrolling, memory access
alignment, cache zero fill, and software pipelining.
SVE assembler code for memcpy/memmove is implemented as Vector Length
Agnostic code so theoretically it can be run on any SOC which supports
ARMv8-A SVE standard.
We confirmed that all testcases have been passed by running 'make
check' and 'make xcheck' not only on A64FX but also on ThunderX2.
And also we confirmed that the SVE 512 bit vector register performance
is roughly 4 times better than Advanced SIMD 128 bit register and 8
times better than scalar 64 bit register by running 'make bench'.
[1] https://github.com/fujitsu/A64FX
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
Reviewed-by: Szabolcs Nagy <Szabolcs.Nagy@arm.com>
Conflicts:
manual/tunables.texi
sysdeps/aarch64/multiarch/Makefile
sysdeps/aarch64/multiarch/ifunc-impl-list.c
sysdeps/aarch64/multiarch/init-arch.h
sysdeps/aarch64/multiarch/memcpy.c
sysdeps/aarch64/multiarch/memmove.c
sysdeps/unix/sysv/linux/aarch64/cpu-features.c
sysdeps/unix/sysv/linux/aarch64/cpu-features.h
(all conflicts due to missing optimizations for other CPUs)
diff --git a/manual/tunables.texi b/manual/tunables.texi
index bd737b5d57080462..07887981748bc44b 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -386,7 +386,7 @@ This tunable is specific to powerpc, powerpc64 and powerpc64le.
The @code{glibc.cpu.name=xxx} tunable allows the user to tell @theglibc{} to
assume that the CPU is @code{xxx} where xxx may have one of these values:
@code{generic}, @code{falkor}, @code{thunderxt88}, @code{thunderx2t99},
-@code{thunderx2t99p1}.
+@code{thunderx2t99p1}, @code{a64fx}.
This tunable is specific to aarch64.
@end deftp
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
index 57ffdf72382c0a44..5a19ba0308e80983 100644
--- a/sysdeps/aarch64/multiarch/Makefile
+++ b/sysdeps/aarch64/multiarch/Makefile
@@ -1,4 +1,5 @@
ifeq ($(subdir),string)
sysdep_routines += memcpy_generic memcpy_thunderx memcpy_thunderx2 \
- memcpy_falkor memmove_falkor memset_generic memset_falkor
+ memcpy_falkor memcpy_a64fx \
+ memmove_falkor memset_generic memset_falkor
endif
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index e55be80103b948a2..f53db12acce37877 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -25,7 +25,7 @@
#include <stdio.h>
/* Maximum number of IFUNC implementations. */
-#define MAX_IFUNC 4
+#define MAX_IFUNC 7
size_t
__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
@@ -42,10 +42,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx2)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_falkor)
+#if HAVE_AARCH64_SVE_ASM
+ IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_a64fx)
+#endif
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
IFUNC_IMPL (i, name, memmove,
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_falkor)
+#if HAVE_AARCH64_SVE_ASM
+ IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_a64fx)
+#endif
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
IFUNC_IMPL (i, name, memset,
/* Enable this on non-falkor processors too so that other cores
diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
index d1e5703cb25fdcff..65dc8f82ff23c754 100644
--- a/sysdeps/aarch64/multiarch/init-arch.h
+++ b/sysdeps/aarch64/multiarch/init-arch.h
@@ -22,4 +22,6 @@
uint64_t __attribute__((unused)) midr = \
GLRO(dl_aarch64_cpu_features).midr_el1; \
unsigned __attribute__((unused)) zva_size = \
- GLRO(dl_aarch64_cpu_features).zva_size;
+ GLRO(dl_aarch64_cpu_features).zva_size; \
+ bool __attribute__((unused)) sve = \
+ GLRO(dl_aarch64_cpu_features).sve;
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
index 4a04a63b0fe0c84b..e0313c42e82a7b86 100644
--- a/sysdeps/aarch64/multiarch/memcpy.c
+++ b/sysdeps/aarch64/multiarch/memcpy.c
@@ -32,6 +32,9 @@ extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden;
+# if HAVE_AARCH64_SVE_ASM
+extern __typeof (__redirect_memcpy) __memcpy_a64fx attribute_hidden;
+# endif
libc_ifunc (__libc_memcpy,
(IS_THUNDERX (midr)
@@ -40,8 +43,13 @@ libc_ifunc (__libc_memcpy,
? __memcpy_falkor
: (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
? __memcpy_thunderx2
+# if HAVE_AARCH64_SVE_ASM
+ : (IS_A64FX (midr)
+ ? __memcpy_a64fx
+ : __memcpy_generic)))));
+# else
: __memcpy_generic))));
-
+# endif
# undef memcpy
strong_alias (__libc_memcpy, memcpy);
#endif
diff --git a/sysdeps/aarch64/multiarch/memcpy_a64fx.S b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
new file mode 100644
index 0000000000000000..65528405bb123737
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
@@ -0,0 +1,406 @@
+/* Optimized memcpy for Fujitsu A64FX processor.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8.2-a, AArch64, unaligned accesses, sve
+ *
+ */
+
+#define L2_SIZE (8*1024*1024)/2 // L2 8MB/2
+#define CACHE_LINE_SIZE 256
+#define ZF_DIST (CACHE_LINE_SIZE * 21) // Zerofill distance
+#define dest x0
+#define src x1
+#define n x2 // size
+#define tmp1 x3
+#define tmp2 x4
+#define tmp3 x5
+#define rest x6
+#define dest_ptr x7
+#define src_ptr x8
+#define vector_length x9
+#define cl_remainder x10 // CACHE_LINE_SIZE remainder
+
+#if HAVE_AARCH64_SVE_ASM
+# if IS_IN (libc)
+# define MEMCPY __memcpy_a64fx
+# define MEMMOVE __memmove_a64fx
+
+ .arch armv8.2-a+sve
+
+ .macro dc_zva times
+ dc zva, tmp1
+ add tmp1, tmp1, CACHE_LINE_SIZE
+ .if \times-1
+ dc_zva "(\times-1)"
+ .endif
+ .endm
+
+ .macro ld1b_unroll8
+ ld1b z0.b, p0/z, [src_ptr, #0, mul vl]
+ ld1b z1.b, p0/z, [src_ptr, #1, mul vl]
+ ld1b z2.b, p0/z, [src_ptr, #2, mul vl]
+ ld1b z3.b, p0/z, [src_ptr, #3, mul vl]
+ ld1b z4.b, p0/z, [src_ptr, #4, mul vl]
+ ld1b z5.b, p0/z, [src_ptr, #5, mul vl]
+ ld1b z6.b, p0/z, [src_ptr, #6, mul vl]
+ ld1b z7.b, p0/z, [src_ptr, #7, mul vl]
+ .endm
+
+ .macro stld1b_unroll4a
+ st1b z0.b, p0, [dest_ptr, #0, mul vl]
+ st1b z1.b, p0, [dest_ptr, #1, mul vl]
+ ld1b z0.b, p0/z, [src_ptr, #0, mul vl]
+ ld1b z1.b, p0/z, [src_ptr, #1, mul vl]
+ st1b z2.b, p0, [dest_ptr, #2, mul vl]
+ st1b z3.b, p0, [dest_ptr, #3, mul vl]
+ ld1b z2.b, p0/z, [src_ptr, #2, mul vl]
+ ld1b z3.b, p0/z, [src_ptr, #3, mul vl]
+ .endm
+
+ .macro stld1b_unroll4b
+ st1b z4.b, p0, [dest_ptr, #4, mul vl]
+ st1b z5.b, p0, [dest_ptr, #5, mul vl]
+ ld1b z4.b, p0/z, [src_ptr, #4, mul vl]
+ ld1b z5.b, p0/z, [src_ptr, #5, mul vl]
+ st1b z6.b, p0, [dest_ptr, #6, mul vl]
+ st1b z7.b, p0, [dest_ptr, #7, mul vl]
+ ld1b z6.b, p0/z, [src_ptr, #6, mul vl]
+ ld1b z7.b, p0/z, [src_ptr, #7, mul vl]
+ .endm
+
+ .macro stld1b_unroll8
+ stld1b_unroll4a
+ stld1b_unroll4b
+ .endm
+
+ .macro st1b_unroll8
+ st1b z0.b, p0, [dest_ptr, #0, mul vl]
+ st1b z1.b, p0, [dest_ptr, #1, mul vl]
+ st1b z2.b, p0, [dest_ptr, #2, mul vl]
+ st1b z3.b, p0, [dest_ptr, #3, mul vl]
+ st1b z4.b, p0, [dest_ptr, #4, mul vl]
+ st1b z5.b, p0, [dest_ptr, #5, mul vl]
+ st1b z6.b, p0, [dest_ptr, #6, mul vl]
+ st1b z7.b, p0, [dest_ptr, #7, mul vl]
+ .endm
+
+ .macro shortcut_for_small_size exit
+ // if rest <= vector_length * 2
+ whilelo p0.b, xzr, n
+ whilelo p1.b, vector_length, n
+ b.last 1f
+ ld1b z0.b, p0/z, [src, #0, mul vl]
+ ld1b z1.b, p1/z, [src, #1, mul vl]
+ st1b z0.b, p0, [dest, #0, mul vl]
+ st1b z1.b, p1, [dest, #1, mul vl]
+ ret
+1: // if rest > vector_length * 8
+ cmp n, vector_length, lsl 3 // vector_length * 8
+ b.hi \exit
+ // if rest <= vector_length * 4
+ lsl tmp1, vector_length, 1 // vector_length * 2
+ whilelo p2.b, tmp1, n
+ incb tmp1
+ whilelo p3.b, tmp1, n
+ b.last 1f
+ ld1b z0.b, p0/z, [src, #0, mul vl]
+ ld1b z1.b, p1/z, [src, #1, mul vl]
+ ld1b z2.b, p2/z, [src, #2, mul vl]
+ ld1b z3.b, p3/z, [src, #3, mul vl]
+ st1b z0.b, p0, [dest, #0, mul vl]
+ st1b z1.b, p1, [dest, #1, mul vl]
+ st1b z2.b, p2, [dest, #2, mul vl]
+ st1b z3.b, p3, [dest, #3, mul vl]
+ ret
+1: // if rest <= vector_length * 8
+ lsl tmp1, vector_length, 2 // vector_length * 4
+ whilelo p4.b, tmp1, n
+ incb tmp1
+ whilelo p5.b, tmp1, n
+ b.last 1f
+ ld1b z0.b, p0/z, [src, #0, mul vl]
+ ld1b z1.b, p1/z, [src, #1, mul vl]
+ ld1b z2.b, p2/z, [src, #2, mul vl]
+ ld1b z3.b, p3/z, [src, #3, mul vl]
+ ld1b z4.b, p4/z, [src, #4, mul vl]
+ ld1b z5.b, p5/z, [src, #5, mul vl]
+ st1b z0.b, p0, [dest, #0, mul vl]
+ st1b z1.b, p1, [dest, #1, mul vl]
+ st1b z2.b, p2, [dest, #2, mul vl]
+ st1b z3.b, p3, [dest, #3, mul vl]
+ st1b z4.b, p4, [dest, #4, mul vl]
+ st1b z5.b, p5, [dest, #5, mul vl]
+ ret
+1: lsl tmp1, vector_length, 2 // vector_length * 4
+ incb tmp1 // vector_length * 5
+ incb tmp1 // vector_length * 6
+ whilelo p6.b, tmp1, n
+ incb tmp1
+ whilelo p7.b, tmp1, n
+ ld1b z0.b, p0/z, [src, #0, mul vl]
+ ld1b z1.b, p1/z, [src, #1, mul vl]
+ ld1b z2.b, p2/z, [src, #2, mul vl]
+ ld1b z3.b, p3/z, [src, #3, mul vl]
+ ld1b z4.b, p4/z, [src, #4, mul vl]
+ ld1b z5.b, p5/z, [src, #5, mul vl]
+ ld1b z6.b, p6/z, [src, #6, mul vl]
+ ld1b z7.b, p7/z, [src, #7, mul vl]
+ st1b z0.b, p0, [dest, #0, mul vl]
+ st1b z1.b, p1, [dest, #1, mul vl]
+ st1b z2.b, p2, [dest, #2, mul vl]
+ st1b z3.b, p3, [dest, #3, mul vl]
+ st1b z4.b, p4, [dest, #4, mul vl]
+ st1b z5.b, p5, [dest, #5, mul vl]
+ st1b z6.b, p6, [dest, #6, mul vl]
+ st1b z7.b, p7, [dest, #7, mul vl]
+ ret
+ .endm
+
+ENTRY (MEMCPY)
+
+ PTR_ARG (0)
+ PTR_ARG (1)
+ SIZE_ARG (2)
+
+L(memcpy):
+ cntb vector_length
+ // shortcut for less than vector_length * 8
+ // gives a free ptrue to p0.b for n >= vector_length
+ shortcut_for_small_size L(vl_agnostic)
+ // end of shortcut
+
+L(vl_agnostic): // VL Agnostic
+ mov rest, n
+ mov dest_ptr, dest
+ mov src_ptr, src
+ // if rest >= L2_SIZE && vector_length == 64 then L(L2)
+ mov tmp1, 64
+ cmp rest, L2_SIZE
+ ccmp vector_length, tmp1, 0, cs
+ b.eq L(L2)
+
+L(unroll8): // unrolling and software pipeline
+ lsl tmp1, vector_length, 3 // vector_length * 8
+ .p2align 3
+ cmp rest, tmp1
+ b.cc L(last)
+ ld1b_unroll8
+ add src_ptr, src_ptr, tmp1
+ sub rest, rest, tmp1
+ cmp rest, tmp1
+ b.cc 2f
+ .p2align 3
+1: stld1b_unroll8
+ add dest_ptr, dest_ptr, tmp1
+ add src_ptr, src_ptr, tmp1
+ sub rest, rest, tmp1
+ cmp rest, tmp1
+ b.ge 1b
+2: st1b_unroll8
+ add dest_ptr, dest_ptr, tmp1
+
+ .p2align 3
+L(last):
+ whilelo p0.b, xzr, rest
+ whilelo p1.b, vector_length, rest
+ b.last 1f
+ ld1b z0.b, p0/z, [src_ptr, #0, mul vl]
+ ld1b z1.b, p1/z, [src_ptr, #1, mul vl]
+ st1b z0.b, p0, [dest_ptr, #0, mul vl]
+ st1b z1.b, p1, [dest_ptr, #1, mul vl]
+ ret
+1: lsl tmp1, vector_length, 1 // vector_length * 2
+ whilelo p2.b, tmp1, rest
+ incb tmp1
+ whilelo p3.b, tmp1, rest
+ b.last 1f
+ ld1b z0.b, p0/z, [src_ptr, #0, mul vl]
+ ld1b z1.b, p1/z, [src_ptr, #1, mul vl]
+ ld1b z2.b, p2/z, [src_ptr, #2, mul vl]
+ ld1b z3.b, p3/z, [src_ptr, #3, mul vl]
+ st1b z0.b, p0, [dest_ptr, #0, mul vl]
+ st1b z1.b, p1, [dest_ptr, #1, mul vl]
+ st1b z2.b, p2, [dest_ptr, #2, mul vl]
+ st1b z3.b, p3, [dest_ptr, #3, mul vl]
+ ret
+1: lsl tmp1, vector_length, 2 // vector_length * 4
+ whilelo p4.b, tmp1, rest
+ incb tmp1
+ whilelo p5.b, tmp1, rest
+ incb tmp1
+ whilelo p6.b, tmp1, rest
+ incb tmp1
+ whilelo p7.b, tmp1, rest
+ ld1b z0.b, p0/z, [src_ptr, #0, mul vl]
+ ld1b z1.b, p1/z, [src_ptr, #1, mul vl]
+ ld1b z2.b, p2/z, [src_ptr, #2, mul vl]
+ ld1b z3.b, p3/z, [src_ptr, #3, mul vl]
+ ld1b z4.b, p4/z, [src_ptr, #4, mul vl]
+ ld1b z5.b, p5/z, [src_ptr, #5, mul vl]
+ ld1b z6.b, p6/z, [src_ptr, #6, mul vl]
+ ld1b z7.b, p7/z, [src_ptr, #7, mul vl]
+ st1b z0.b, p0, [dest_ptr, #0, mul vl]
+ st1b z1.b, p1, [dest_ptr, #1, mul vl]
+ st1b z2.b, p2, [dest_ptr, #2, mul vl]
+ st1b z3.b, p3, [dest_ptr, #3, mul vl]
+ st1b z4.b, p4, [dest_ptr, #4, mul vl]
+ st1b z5.b, p5, [dest_ptr, #5, mul vl]
+ st1b z6.b, p6, [dest_ptr, #6, mul vl]
+ st1b z7.b, p7, [dest_ptr, #7, mul vl]
+ ret
+
+L(L2):
+ // align dest address at CACHE_LINE_SIZE byte boundary
+ mov tmp1, CACHE_LINE_SIZE
+ ands tmp2, dest_ptr, CACHE_LINE_SIZE - 1
+ // if cl_remainder == 0
+ b.eq L(L2_dc_zva)
+ sub cl_remainder, tmp1, tmp2
+ // process remainder until the first CACHE_LINE_SIZE boundary
+ whilelo p1.b, xzr, cl_remainder // keep p0.b all true
+ whilelo p2.b, vector_length, cl_remainder
+ b.last 1f
+ ld1b z1.b, p1/z, [src_ptr, #0, mul vl]
+ ld1b z2.b, p2/z, [src_ptr, #1, mul vl]
+ st1b z1.b, p1, [dest_ptr, #0, mul vl]
+ st1b z2.b, p2, [dest_ptr, #1, mul vl]
+ b 2f
+1: lsl tmp1, vector_length, 1 // vector_length * 2
+ whilelo p3.b, tmp1, cl_remainder
+ incb tmp1
+ whilelo p4.b, tmp1, cl_remainder
+ ld1b z1.b, p1/z, [src_ptr, #0, mul vl]
+ ld1b z2.b, p2/z, [src_ptr, #1, mul vl]
+ ld1b z3.b, p3/z, [src_ptr, #2, mul vl]
+ ld1b z4.b, p4/z, [src_ptr, #3, mul vl]
+ st1b z1.b, p1, [dest_ptr, #0, mul vl]
+ st1b z2.b, p2, [dest_ptr, #1, mul vl]
+ st1b z3.b, p3, [dest_ptr, #2, mul vl]
+ st1b z4.b, p4, [dest_ptr, #3, mul vl]
+2: add dest_ptr, dest_ptr, cl_remainder
+ add src_ptr, src_ptr, cl_remainder
+ sub rest, rest, cl_remainder
+
+L(L2_dc_zva):
+ // zero fill
+ and tmp1, dest, 0xffffffffffffff
+ and tmp2, src, 0xffffffffffffff
+ subs tmp1, tmp1, tmp2 // diff
+ b.ge 1f
+ neg tmp1, tmp1
+1: mov tmp3, ZF_DIST + CACHE_LINE_SIZE * 2
+ cmp tmp1, tmp3
+ b.lo L(unroll8)
+ mov tmp1, dest_ptr
+ dc_zva (ZF_DIST / CACHE_LINE_SIZE) - 1
+ // unroll
+ ld1b_unroll8 // this line has to be after "b.lo L(unroll8)"
+ add src_ptr, src_ptr, CACHE_LINE_SIZE * 2
+ sub rest, rest, CACHE_LINE_SIZE * 2
+ mov tmp1, ZF_DIST
+ .p2align 3
+1: stld1b_unroll4a
+ add tmp2, dest_ptr, tmp1 // dest_ptr + ZF_DIST
+ dc zva, tmp2
+ stld1b_unroll4b
+ add tmp2, tmp2, CACHE_LINE_SIZE
+ dc zva, tmp2
+ add dest_ptr, dest_ptr, CACHE_LINE_SIZE * 2
+ add src_ptr, src_ptr, CACHE_LINE_SIZE * 2
+ sub rest, rest, CACHE_LINE_SIZE * 2
+ cmp rest, tmp3 // ZF_DIST + CACHE_LINE_SIZE * 2
+ b.ge 1b
+ st1b_unroll8
+ add dest_ptr, dest_ptr, CACHE_LINE_SIZE * 2
+ b L(unroll8)
+
+END (MEMCPY)
+libc_hidden_builtin_def (MEMCPY)
+
+
+ENTRY (MEMMOVE)
+
+ PTR_ARG (0)
+ PTR_ARG (1)
+ SIZE_ARG (2)
+
+ // remove tag address
+ // dest has to be immutable because it is the return value
+ // src has to be immutable because it is used in L(bwd_last)
+ and tmp2, dest, 0xffffffffffffff // save dest_notag into tmp2
+ and tmp3, src, 0xffffffffffffff // save src_notag intp tmp3
+ cmp n, 0
+ ccmp tmp2, tmp3, 4, ne
+ b.ne 1f
+ ret
+1: cntb vector_length
+ // shortcut for less than vector_length * 8
+ // gives a free ptrue to p0.b for n >= vector_length
+ // tmp2 and tmp3 should not be used in this macro to keep
+ // notag addresses
+ shortcut_for_small_size L(dispatch)
+ // end of shortcut
+
+L(dispatch):
+ // tmp2 = dest_notag, tmp3 = src_notag
+ // diff = dest_notag - src_notag
+ sub tmp1, tmp2, tmp3
+ // if diff <= 0 || diff >= n then memcpy
+ cmp tmp1, 0
+ ccmp tmp1, n, 2, gt
+ b.cs L(vl_agnostic)
+
+L(bwd_start):
+ mov rest, n
+ add dest_ptr, dest, n // dest_end
+ add src_ptr, src, n // src_end
+
+L(bwd_unroll8): // unrolling and software pipeline
+ lsl tmp1, vector_length, 3 // vector_length * 8
+ .p2align 3
+ cmp rest, tmp1
+ b.cc L(bwd_last)
+ sub src_ptr, src_ptr, tmp1
+ ld1b_unroll8
+ sub rest, rest, tmp1
+ cmp rest, tmp1
+ b.cc 2f
+ .p2align 3
+1: sub src_ptr, src_ptr, tmp1
+ sub dest_ptr, dest_ptr, tmp1
+ stld1b_unroll8
+ sub rest, rest, tmp1
+ cmp rest, tmp1
+ b.ge 1b
+2: sub dest_ptr, dest_ptr, tmp1
+ st1b_unroll8
+
+L(bwd_last):
+ mov dest_ptr, dest
+ mov src_ptr, src
+ b L(last)
+
+END (MEMMOVE)
+libc_hidden_builtin_def (MEMMOVE)
+# endif /* IS_IN (libc) */
+#endif /* HAVE_AARCH64_SVE_ASM */
diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
index e69d8162910b938e..d96612b9cf7c3a4e 100644
--- a/sysdeps/aarch64/multiarch/memmove.c
+++ b/sysdeps/aarch64/multiarch/memmove.c
@@ -31,14 +31,22 @@ extern __typeof (__redirect_memmove) __libc_memmove;
extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden;
+# if HAVE_AARCH64_SVE_ASM
+extern __typeof (__redirect_memmove) __memmove_a64fx attribute_hidden;
+# endif
libc_ifunc (__libc_memmove,
(IS_THUNDERX (midr)
? __memmove_thunderx
: (IS_FALKOR (midr) || IS_PHECDA (midr)
? __memmove_falkor
+# if HAVE_AARCH64_SVE_ASM
+ : (IS_A64FX (midr)
+ ? __memmove_a64fx
+ : __memmove_generic))));
+# else
: __memmove_generic)));
-
+# endif
# undef memmove
strong_alias (__libc_memmove, memmove);
#endif
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
index b4f348509eb1c6b3..71e4355c972f1ffb 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
@@ -36,6 +36,7 @@ static struct cpu_list cpu_list[] = {
{"thunderx2t99", 0x431F0AF0},
{"thunderx2t99p1", 0x420F5160},
{"phecda", 0x680F0000},
+ {"a64fx", 0x460F0010},
{"generic", 0x0}
};
@@ -80,4 +81,7 @@ init_cpu_features (struct cpu_features *cpu_features)
if ((dczid & DCZID_DZP_MASK) == 0)
cpu_features->zva_size = 4 << (dczid & DCZID_BS_MASK);
+
+ /* Check if SVE is supported. */
+ cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE;
}
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
index eb35adfbe9d429d5..5691aea6de3cb7f4 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
@@ -20,6 +20,7 @@
#define _CPU_FEATURES_AARCH64_H
#include <stdint.h>
+#include <stdbool.h>
#define MIDR_PARTNUM_SHIFT 4
#define MIDR_PARTNUM_MASK (0xfff << MIDR_PARTNUM_SHIFT)
@@ -52,10 +53,14 @@
#define IS_PHECDA(midr) (MIDR_IMPLEMENTOR(midr) == 'h' \
&& MIDR_PARTNUM(midr) == 0x000)
+#define IS_A64FX(midr) (MIDR_IMPLEMENTOR(midr) == 'F' \
+ && MIDR_PARTNUM(midr) == 0x001)
+
struct cpu_features
{
uint64_t midr_el1;
unsigned zva_size;
+ bool sve;
};
#endif /* _CPU_FEATURES_AARCH64_H */

View File

@ -0,0 +1,371 @@
commit 4f26956d5ba394eb3ade6c1c20b5c16864a00766
Author: Naohiro Tamura <naohirot@jp.fujitsu.com>
Date: Thu May 27 07:44:12 2021 +0000
aarch64: Added optimized memset for A64FX
This patch optimizes the performance of memset for A64FX [1] which
implements ARMv8-A SVE and has L1 64KB cache per core and L2 8MB cache
per NUMA node.
The performance optimization makes use of Scalable Vector Register
with several techniques such as loop unrolling, memory access
alignment, cache zero fill and prefetch.
SVE assembler code for memset is implemented as Vector Length Agnostic
code so theoretically it can be run on any SOC which supports ARMv8-A
SVE standard.
We confirmed that all testcases have been passed by running 'make
check' and 'make xcheck' not only on A64FX but also on ThunderX2.
And also we confirmed that the SVE 512 bit vector register performance
is roughly 4 times better than Advanced SIMD 128 bit register and 8
times better than scalar 64 bit register by running 'make bench'.
[1] https://github.com/fujitsu/A64FX
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
Reviewed-by: Szabolcs Nagy <Szabolcs.Nagy@arm.com>
Conflicts:
sysdeps/aarch64/multiarch/Makefile
sysdeps/aarch64/multiarch/ifunc-impl-list.c
sysdeps/aarch64/multiarch/memset.c
(all conflicts due to missing other CPU implementations downstream)
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
index 5a19ba0308e80983..5ff883a8ad8e3067 100644
--- a/sysdeps/aarch64/multiarch/Makefile
+++ b/sysdeps/aarch64/multiarch/Makefile
@@ -1,5 +1,6 @@
ifeq ($(subdir),string)
sysdep_routines += memcpy_generic memcpy_thunderx memcpy_thunderx2 \
memcpy_falkor memcpy_a64fx \
- memmove_falkor memset_generic memset_falkor
+ memmove_falkor memset_generic memset_falkor \
+ memset_a64fx
endif
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index f53db12acce37877..53e3e162a1025e40 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -37,7 +37,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
INIT_ARCH ();
- /* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c. */
+ /* Support sysdeps/aarch64/multiarch/memcpy.c, memmove.c and memset.c. */
IFUNC_IMPL (i, name, memcpy,
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx2)
@@ -57,6 +57,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Enable this on non-falkor processors too so that other cores
can do a comparative analysis with __memset_generic. */
IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor)
+#if HAVE_AARCH64_SVE_ASM
+ IFUNC_IMPL_ADD (array, i, memset, sve, __memset_a64fx)
+#endif
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic))
return i;
diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
index d74ed3a549a54b10..2c8cc72bb0b18474 100644
--- a/sysdeps/aarch64/multiarch/memset.c
+++ b/sysdeps/aarch64/multiarch/memset.c
@@ -29,12 +29,21 @@
extern __typeof (__redirect_memset) __libc_memset;
extern __typeof (__redirect_memset) __memset_falkor attribute_hidden;
+# if HAVE_AARCH64_SVE_ASM
+extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden;
+# endif
extern __typeof (__redirect_memset) __memset_generic attribute_hidden;
libc_ifunc (__libc_memset,
((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64
? __memset_falkor
+# if HAVE_AARCH64_SVE_ASM
+ : (IS_A64FX (midr)
+ ? __memset_a64fx
+ : __memset_generic)));
+# else
: __memset_generic));
+# endif
# undef memset
strong_alias (__libc_memset, memset);
diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S
new file mode 100644
index 0000000000000000..ce54e5418b08c8bc
--- /dev/null
+++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
@@ -0,0 +1,268 @@
+/* Optimized memset for Fujitsu A64FX processor.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sysdeps/aarch64/memset-reg.h>
+
+/* Assumptions:
+ *
+ * ARMv8.2-a, AArch64, unaligned accesses, sve
+ *
+ */
+
+#define L1_SIZE (64*1024) // L1 64KB
+#define L2_SIZE (8*1024*1024) // L2 8MB - 1MB
+#define CACHE_LINE_SIZE 256
+#define PF_DIST_L1 (CACHE_LINE_SIZE * 16) // Prefetch distance L1
+#define ZF_DIST (CACHE_LINE_SIZE * 21) // Zerofill distance
+#define rest x8
+#define vector_length x9
+#define vl_remainder x10 // vector_length remainder
+#define cl_remainder x11 // CACHE_LINE_SIZE remainder
+
+#if HAVE_AARCH64_SVE_ASM
+# if IS_IN (libc)
+# define MEMSET __memset_a64fx
+
+ .arch armv8.2-a+sve
+
+ .macro dc_zva times
+ dc zva, tmp1
+ add tmp1, tmp1, CACHE_LINE_SIZE
+ .if \times-1
+ dc_zva "(\times-1)"
+ .endif
+ .endm
+
+ .macro st1b_unroll first=0, last=7
+ st1b z0.b, p0, [dst, #\first, mul vl]
+ .if \last-\first
+ st1b_unroll "(\first+1)", \last
+ .endif
+ .endm
+
+ .macro shortcut_for_small_size exit
+ // if rest <= vector_length * 2
+ whilelo p0.b, xzr, count
+ whilelo p1.b, vector_length, count
+ b.last 1f
+ st1b z0.b, p0, [dstin, #0, mul vl]
+ st1b z0.b, p1, [dstin, #1, mul vl]
+ ret
+1: // if rest > vector_length * 8
+ cmp count, vector_length, lsl 3 // vector_length * 8
+ b.hi \exit
+ // if rest <= vector_length * 4
+ lsl tmp1, vector_length, 1 // vector_length * 2
+ whilelo p2.b, tmp1, count
+ incb tmp1
+ whilelo p3.b, tmp1, count
+ b.last 1f
+ st1b z0.b, p0, [dstin, #0, mul vl]
+ st1b z0.b, p1, [dstin, #1, mul vl]
+ st1b z0.b, p2, [dstin, #2, mul vl]
+ st1b z0.b, p3, [dstin, #3, mul vl]
+ ret
+1: // if rest <= vector_length * 8
+ lsl tmp1, vector_length, 2 // vector_length * 4
+ whilelo p4.b, tmp1, count
+ incb tmp1
+ whilelo p5.b, tmp1, count
+ b.last 1f
+ st1b z0.b, p0, [dstin, #0, mul vl]
+ st1b z0.b, p1, [dstin, #1, mul vl]
+ st1b z0.b, p2, [dstin, #2, mul vl]
+ st1b z0.b, p3, [dstin, #3, mul vl]
+ st1b z0.b, p4, [dstin, #4, mul vl]
+ st1b z0.b, p5, [dstin, #5, mul vl]
+ ret
+1: lsl tmp1, vector_length, 2 // vector_length * 4
+ incb tmp1 // vector_length * 5
+ incb tmp1 // vector_length * 6
+ whilelo p6.b, tmp1, count
+ incb tmp1
+ whilelo p7.b, tmp1, count
+ st1b z0.b, p0, [dstin, #0, mul vl]
+ st1b z0.b, p1, [dstin, #1, mul vl]
+ st1b z0.b, p2, [dstin, #2, mul vl]
+ st1b z0.b, p3, [dstin, #3, mul vl]
+ st1b z0.b, p4, [dstin, #4, mul vl]
+ st1b z0.b, p5, [dstin, #5, mul vl]
+ st1b z0.b, p6, [dstin, #6, mul vl]
+ st1b z0.b, p7, [dstin, #7, mul vl]
+ ret
+ .endm
+
+ENTRY (MEMSET)
+
+ PTR_ARG (0)
+ SIZE_ARG (2)
+
+ cbnz count, 1f
+ ret
+1: dup z0.b, valw
+ cntb vector_length
+ // shortcut for less than vector_length * 8
+ // gives a free ptrue to p0.b for n >= vector_length
+ shortcut_for_small_size L(vl_agnostic)
+ // end of shortcut
+
+L(vl_agnostic): // VL Agnostic
+ mov rest, count
+ mov dst, dstin
+ add dstend, dstin, count
+ // if rest >= L2_SIZE && vector_length == 64 then L(L2)
+ mov tmp1, 64
+ cmp rest, L2_SIZE
+ ccmp vector_length, tmp1, 0, cs
+ b.eq L(L2)
+ // if rest >= L1_SIZE && vector_length == 64 then L(L1_prefetch)
+ cmp rest, L1_SIZE
+ ccmp vector_length, tmp1, 0, cs
+ b.eq L(L1_prefetch)
+
+L(unroll32):
+ lsl tmp1, vector_length, 3 // vector_length * 8
+ lsl tmp2, vector_length, 5 // vector_length * 32
+ .p2align 3
+1: cmp rest, tmp2
+ b.cc L(unroll8)
+ st1b_unroll
+ add dst, dst, tmp1
+ st1b_unroll
+ add dst, dst, tmp1
+ st1b_unroll
+ add dst, dst, tmp1
+ st1b_unroll
+ add dst, dst, tmp1
+ sub rest, rest, tmp2
+ b 1b
+
+L(unroll8):
+ lsl tmp1, vector_length, 3
+ .p2align 3
+1: cmp rest, tmp1
+ b.cc L(last)
+ st1b_unroll
+ add dst, dst, tmp1
+ sub rest, rest, tmp1
+ b 1b
+
+L(last):
+ whilelo p0.b, xzr, rest
+ whilelo p1.b, vector_length, rest
+ b.last 1f
+ st1b z0.b, p0, [dst, #0, mul vl]
+ st1b z0.b, p1, [dst, #1, mul vl]
+ ret
+1: lsl tmp1, vector_length, 1 // vector_length * 2
+ whilelo p2.b, tmp1, rest
+ incb tmp1
+ whilelo p3.b, tmp1, rest
+ b.last 1f
+ st1b z0.b, p0, [dst, #0, mul vl]
+ st1b z0.b, p1, [dst, #1, mul vl]
+ st1b z0.b, p2, [dst, #2, mul vl]
+ st1b z0.b, p3, [dst, #3, mul vl]
+ ret
+1: lsl tmp1, vector_length, 2 // vector_length * 4
+ whilelo p4.b, tmp1, rest
+ incb tmp1
+ whilelo p5.b, tmp1, rest
+ incb tmp1
+ whilelo p6.b, tmp1, rest
+ incb tmp1
+ whilelo p7.b, tmp1, rest
+ st1b z0.b, p0, [dst, #0, mul vl]
+ st1b z0.b, p1, [dst, #1, mul vl]
+ st1b z0.b, p2, [dst, #2, mul vl]
+ st1b z0.b, p3, [dst, #3, mul vl]
+ st1b z0.b, p4, [dst, #4, mul vl]
+ st1b z0.b, p5, [dst, #5, mul vl]
+ st1b z0.b, p6, [dst, #6, mul vl]
+ st1b z0.b, p7, [dst, #7, mul vl]
+ ret
+
+L(L1_prefetch): // if rest >= L1_SIZE
+ .p2align 3
+1: st1b_unroll 0, 3
+ prfm pstl1keep, [dst, PF_DIST_L1]
+ st1b_unroll 4, 7
+ prfm pstl1keep, [dst, PF_DIST_L1 + CACHE_LINE_SIZE]
+ add dst, dst, CACHE_LINE_SIZE * 2
+ sub rest, rest, CACHE_LINE_SIZE * 2
+ cmp rest, L1_SIZE
+ b.ge 1b
+ cbnz rest, L(unroll32)
+ ret
+
+L(L2):
+ // align dst address at vector_length byte boundary
+ sub tmp1, vector_length, 1
+ ands tmp2, dst, tmp1
+ // if vl_remainder == 0
+ b.eq 1f
+ sub vl_remainder, vector_length, tmp2
+ // process remainder until the first vector_length boundary
+ whilelt p2.b, xzr, vl_remainder
+ st1b z0.b, p2, [dst]
+ add dst, dst, vl_remainder
+ sub rest, rest, vl_remainder
+ // align dstin address at CACHE_LINE_SIZE byte boundary
+1: mov tmp1, CACHE_LINE_SIZE
+ ands tmp2, dst, CACHE_LINE_SIZE - 1
+ // if cl_remainder == 0
+ b.eq L(L2_dc_zva)
+ sub cl_remainder, tmp1, tmp2
+ // process remainder until the first CACHE_LINE_SIZE boundary
+ mov tmp1, xzr // index
+2: whilelt p2.b, tmp1, cl_remainder
+ st1b z0.b, p2, [dst, tmp1]
+ incb tmp1
+ cmp tmp1, cl_remainder
+ b.lo 2b
+ add dst, dst, cl_remainder
+ sub rest, rest, cl_remainder
+
+L(L2_dc_zva):
+ // zero fill
+ mov tmp1, dst
+ dc_zva (ZF_DIST / CACHE_LINE_SIZE) - 1
+ mov zva_len, ZF_DIST
+ add tmp1, zva_len, CACHE_LINE_SIZE * 2
+ // unroll
+ .p2align 3
+1: st1b_unroll 0, 3
+ add tmp2, dst, zva_len
+ dc zva, tmp2
+ st1b_unroll 4, 7
+ add tmp2, tmp2, CACHE_LINE_SIZE
+ dc zva, tmp2
+ add dst, dst, CACHE_LINE_SIZE * 2
+ sub rest, rest, CACHE_LINE_SIZE * 2
+ cmp rest, tmp1 // ZF_DIST + CACHE_LINE_SIZE * 2
+ b.ge 1b
+ cbnz rest, L(unroll8)
+ ret
+
+END (MEMSET)
+libc_hidden_builtin_def (MEMSET)
+
+#endif /* IS_IN (libc) */
+#endif /* HAVE_AARCH64_SVE_ASM */

View File

@ -0,0 +1,50 @@
From 756c306502498f999fdd494477b9cea1b45e4faf Mon Sep 17 00:00:00 2001
From: Stefan Liebler <stli@linux.ibm.com>
Date: Fri, 21 Aug 2020 11:23:17 +0200
Subject: [PATCH] S390: Sync HWCAP names with kernel by adding aliases [BZ
#25971]
Unfortunately some HWCAP names like HWCAP_S390_VX differs between
kernel (see <kernel>/arch/s390/include/asm/elf.h) and glibc.
Therefore, those HWCAP names from kernel are now introduced as alias
---
sysdeps/s390/dl-procinfo.h | 3 +++
sysdeps/unix/sysv/linux/s390/bits/hwcap.h | 3 +++
2 files changed, 6 insertions(+)
diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h
index 0db4bc39c7..08eee109f7 100644
--- a/sysdeps/s390/dl-procinfo.h
+++ b/sysdeps/s390/dl-procinfo.h
@@ -51,8 +51,11 @@ enum
HWCAP_S390_HIGH_GPRS = 1 << 9,
HWCAP_S390_TE = 1 << 10,
HWCAP_S390_VX = 1 << 11,
+ HWCAP_S390_VXRS = HWCAP_S390_VX,
HWCAP_S390_VXD = 1 << 12,
+ HWCAP_S390_VXRS_BCD = HWCAP_S390_VXD,
HWCAP_S390_VXE = 1 << 13,
+ HWCAP_S390_VXRS_EXT = HWCAP_S390_VXE,
HWCAP_S390_GS = 1 << 14,
HWCAP_S390_VXRS_EXT2 = 1 << 15,
HWCAP_S390_VXRS_PDE = 1 << 16,
diff --git a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h
index 6adbec018b..f2998ff131 100644
--- a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h
+++ b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h
@@ -36,8 +36,11 @@
#define HWCAP_S390_HIGH_GPRS 512
#define HWCAP_S390_TE 1024
#define HWCAP_S390_VX 2048
+#define HWCAP_S390_VXRS HWCAP_S390_VX
#define HWCAP_S390_VXD 4096
+#define HWCAP_S390_VXRS_BCD HWCAP_S390_VXD
#define HWCAP_S390_VXE 8192
+#define HWCAP_S390_VXRS_EXT HWCAP_S390_VXE
#define HWCAP_S390_GS 16384
#define HWCAP_S390_VXRS_EXT2 32768
#define HWCAP_S390_VXRS_PDE 65536
--
2.31.1

View File

@ -0,0 +1,67 @@
From 25251c0707fe34f30a27381a5fabc35435a96621 Mon Sep 17 00:00:00 2001
From: Stefan Liebler <stli@linux.ibm.com>
Date: Tue, 16 Feb 2021 16:18:56 +0100
Subject: [PATCH] S390: Add new hwcap values.
The new hwcap values indicate support for arch14 architecture.
---
sysdeps/s390/dl-procinfo.c | 5 +++--
sysdeps/s390/dl-procinfo.h | 4 +++-
sysdeps/unix/sysv/linux/s390/bits/hwcap.h | 2 ++
3 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/sysdeps/s390/dl-procinfo.c b/sysdeps/s390/dl-procinfo.c
index 0c334a2551..c174e27b35 100644
--- a/sysdeps/s390/dl-procinfo.c
+++ b/sysdeps/s390/dl-procinfo.c
@@ -46,12 +46,13 @@
#if !defined PROCINFO_DECL && defined SHARED
._dl_s390_cap_flags
#else
-PROCINFO_CLASS const char _dl_s390_cap_flags[19][9]
+PROCINFO_CLASS const char _dl_s390_cap_flags[21][9]
#endif
#ifndef PROCINFO_DECL
= {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh",
- "highgprs", "te", "vx", "vxd", "vxe", "gs", "vxe2", "vxp", "sort", "dflt"
+ "highgprs", "te", "vx", "vxd", "vxe", "gs", "vxe2", "vxp", "sort", "dflt",
+ "vxp2", "nnpa"
}
#endif
#if !defined SHARED || defined PROCINFO_DECL
diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h
index 9e1a8c7ba9..2d9c305808 100644
--- a/sysdeps/s390/dl-procinfo.h
+++ b/sysdeps/s390/dl-procinfo.h
@@ -21,7 +21,7 @@
#define _DL_PROCINFO_H 1
#include <ldsodefs.h>
-#define _DL_HWCAP_COUNT 19
+#define _DL_HWCAP_COUNT 21
#define _DL_PLATFORMS_COUNT 10
@@ -61,6 +61,8 @@ enum
HWCAP_S390_VXRS_PDE = 1 << 16,
HWCAP_S390_SORT = 1 << 17,
HWCAP_S390_DFLT = 1 << 18,
+ HWCAP_S390_VXRS_PDE2 = 1 << 19,
+ HWCAP_S390_NNPA = 1 << 20,
};
#define HWCAP_IMPORTANT (HWCAP_S390_ZARCH | HWCAP_S390_LDISP \
diff --git a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h
index 696616e779..e9bd3684db 100644
--- a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h
+++ b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h
@@ -46,3 +46,5 @@
#define HWCAP_S390_VXRS_PDE 65536
#define HWCAP_S390_SORT 131072
#define HWCAP_S390_DFLT 262144
+#define HWCAP_S390_VXRS_PDE2 524288
+#define HWCAP_S390_NNPA 1048576
--
2.31.1

View File

@ -0,0 +1,88 @@
From f2e06656d04a9fcb0603802a4f8ce7aa3a1f055e Mon Sep 17 00:00:00 2001
From: Stefan Liebler <stli@linux.ibm.com>
Date: Tue, 5 Oct 2021 16:14:10 +0200
Subject: [PATCH] S390: Add PCI_MIO and SIE HWCAPs
Both new HWCAPs were introduced in these kernel commits:
- 7e8403ecaf884f307b627f3c371475913dd29292
"s390: add HWCAP_S390_PCI_MIO to ELF hwcaps"
- 7e82523f2583e9813e4109df3656707162541297
"s390/hwcaps: make sie capability regular hwcap"
Also note that the kernel commit 511ad531afd4090625def4d9aba1f5227bd44b8e
"s390/hwcaps: shorten HWCAP defines" has shortened the prefix of the macros
from "HWCAP_S390_" to "HWCAP_". For compatibility reasons, we do not
change the prefix in public glibc header file.
---
sysdeps/s390/dl-procinfo.c | 4 ++--
sysdeps/s390/dl-procinfo.h | 4 +++-
sysdeps/unix/sysv/linux/s390/bits/hwcap.h | 7 +++++++
3 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/sysdeps/s390/dl-procinfo.c b/sysdeps/s390/dl-procinfo.c
index 7314c31b15..97be34fe9d 100644
--- a/sysdeps/s390/dl-procinfo.c
+++ b/sysdeps/s390/dl-procinfo.c
@@ -45,13 +45,13 @@
#if !defined PROCINFO_DECL && defined SHARED
._dl_s390_cap_flags
#else
-PROCINFO_CLASS const char _dl_s390_cap_flags[21][9]
+PROCINFO_CLASS const char _dl_s390_cap_flags[23][9]
#endif
#ifndef PROCINFO_DECL
= {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh",
"highgprs", "te", "vx", "vxd", "vxe", "gs", "vxe2", "vxp", "sort", "dflt",
- "vxp2", "nnpa"
+ "vxp2", "nnpa", "pcimio", "sie"
}
#endif
#if !defined SHARED || defined PROCINFO_DECL
diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h
index 2502dd2604..d9a3b264ff 100644
--- a/sysdeps/s390/dl-procinfo.h
+++ b/sysdeps/s390/dl-procinfo.h
@@ -20,7 +20,7 @@
#define _DL_PROCINFO_H 1
#include <ldsodefs.h>
-#define _DL_HWCAP_COUNT 21
+#define _DL_HWCAP_COUNT 23
#define _DL_PLATFORMS_COUNT 10
@@ -62,6 +62,8 @@ enum
HWCAP_S390_DFLT = 1 << 18,
HWCAP_S390_VXRS_PDE2 = 1 << 19,
HWCAP_S390_NNPA = 1 << 20,
+ HWCAP_S390_PCI_MIO = 1 << 21,
+ HWCAP_S390_SIE = 1 << 22,
};
#define HWCAP_IMPORTANT (HWCAP_S390_ZARCH | HWCAP_S390_LDISP \
diff --git a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h
index e9bd3684db..00e73a3e3b 100644
--- a/sysdeps/unix/sysv/linux/s390/bits/hwcap.h
+++ b/sysdeps/unix/sysv/linux/s390/bits/hwcap.h
@@ -22,6 +22,11 @@
/*
* The following must match the kernels asm/elf.h.
+ * Note: The kernel commit 511ad531afd4090625def4d9aba1f5227bd44b8e
+ * "s390/hwcaps: shorten HWCAP defines" has shortened the prefix of the macros
+ * from "HWCAP_S390_" to "HWCAP_". For compatibility reasons, we do not
+ * change the prefix in public glibc header file.
+ *
* Note that these are *not* the same as the STORE FACILITY LIST bits.
*/
#define HWCAP_S390_ESAN3 1
@@ -48,3 +53,5 @@
#define HWCAP_S390_DFLT 262144
#define HWCAP_S390_VXRS_PDE2 524288
#define HWCAP_S390_NNPA 1048576
+#define HWCAP_S390_PCI_MIO 2097152
+#define HWCAP_S390_SIE 4194304
--
2.31.1

View File

@ -0,0 +1,30 @@
commit ad78d702757a189b1fa552d607e8aaa22252a45f
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue May 12 19:06:18 2020 +0200
elf: Remove redundant add_to_global_resize_failure call from dl_open_args
The second call does not do anything because the data structures have
already been resized by the call that comes before the demarcation
point. Fixes commit a509eb117fac1d764b15eba64993f4bdb63d7f3c
("Avoid late dlopen failure due to scope, TLS slotinfo updates
[BZ #25112]").
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/elf/dl-open.c b/elf/dl-open.c
index 3d49a84596e99bf6..b052bb0bc2cd17aa 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -769,11 +769,6 @@ dl_open_worker (void *a)
DL_STATIC_INIT (new);
#endif
- /* Perform the necessary allocations for adding new global objects
- to the global scope below, via add_to_global_update. */
- if (mode & RTLD_GLOBAL)
- add_to_global_resize (new);
-
/* Run the initializer functions of new objects. Temporarily
disable the exception handler, so that lazy binding failures are
fatal. */

View File

@ -0,0 +1,23 @@
commit 52290d8c04569615fb011ee286d52dc5147afbd7
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Thu Apr 15 09:57:10 2021 +0100
elf: Fix missing include in test case [BZ #27136]
Broken test was introduced in
commit 8f85075a2e9c26ff7486d4bbaf358999807d215c
elf: Add a DTV setup test [BZ #27136]
diff --git a/elf/tst-tls20.c b/elf/tst-tls20.c
index ac5f8c8d39b66dd6..9977ec803208b9c8 100644
--- a/elf/tst-tls20.c
+++ b/elf/tst-tls20.c
@@ -21,6 +21,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <support/check.h>
+#include <support/support.h>
#include <support/xdlfcn.h>
#include <support/xthread.h>

View File

@ -0,0 +1,160 @@
commit 2208066603a136f95cfb815ca9281262e6465784
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Thu Feb 11 13:24:47 2021 +0000
elf: Remove lazy tlsdesc relocation related code
Remove generic tlsdesc code related to lazy tlsdesc processing since
lazy tlsdesc relocation is no longer supported. This includes removing
GL(dl_load_lock) from _dl_make_tlsdesc_dynamic which is only called at
load time when that lock is already held.
Added a documentation comment too.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/elf/tlsdeschtab.h b/elf/tlsdeschtab.h
index fea9eefe72edcd6b..c20857e5b4264f00 100644
--- a/elf/tlsdeschtab.h
+++ b/elf/tlsdeschtab.h
@@ -78,6 +78,10 @@ map_generation (struct link_map *map)
return GL(dl_tls_generation) + 1;
}
+/* Returns the data pointer for a given map and tls offset that is used
+ to fill in one of the GOT entries referenced by a TLSDESC relocation
+ when using dynamic TLS. This requires allocation, returns NULL on
+ allocation failure. */
void *
_dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset)
{
@@ -85,18 +89,12 @@ _dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset)
void **entry;
struct tlsdesc_dynamic_arg *td, test;
- /* FIXME: We could use a per-map lock here, but is it worth it? */
- __rtld_lock_lock_recursive (GL(dl_load_lock));
-
ht = map->l_mach.tlsdesc_table;
if (! ht)
{
ht = htab_create ();
if (! ht)
- {
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
- return 0;
- }
+ return 0;
map->l_mach.tlsdesc_table = ht;
}
@@ -104,15 +102,11 @@ _dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset)
test.tlsinfo.ti_offset = ti_offset;
entry = htab_find_slot (ht, &test, 1, hash_tlsdesc, eq_tlsdesc);
if (! entry)
- {
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
- return 0;
- }
+ return 0;
if (*entry)
{
td = *entry;
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
return td;
}
@@ -122,44 +116,9 @@ _dl_make_tlsdesc_dynamic (struct link_map *map, size_t ti_offset)
thread. */
td->gen_count = map_generation (map);
td->tlsinfo = test.tlsinfo;
-
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
return td;
}
# endif /* SHARED */
-/* The idea of the following two functions is to stop multiple threads
- from attempting to resolve the same TLS descriptor without busy
- waiting. Ideally, we should be able to release the lock right
- after changing td->entry, and then using say a condition variable
- or a futex wake to wake up any waiting threads, but let's try to
- avoid introducing such dependencies. */
-
-static int
-__attribute__ ((unused))
-_dl_tlsdesc_resolve_early_return_p (struct tlsdesc volatile *td, void *caller)
-{
- if (caller != atomic_load_relaxed (&td->entry))
- return 1;
-
- __rtld_lock_lock_recursive (GL(dl_load_lock));
- if (caller != atomic_load_relaxed (&td->entry))
- {
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
- return 1;
- }
-
- atomic_store_relaxed (&td->entry, _dl_tlsdesc_resolve_hold);
-
- return 0;
-}
-
-static void
-__attribute__ ((unused))
-_dl_tlsdesc_wake_up_held_fixups (void)
-{
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
-}
-
#endif
diff --git a/sysdeps/aarch64/tlsdesc.c b/sysdeps/aarch64/tlsdesc.c
index 357465f23d76e2bd..1ead73ab8250e29c 100644
--- a/sysdeps/aarch64/tlsdesc.c
+++ b/sysdeps/aarch64/tlsdesc.c
@@ -22,7 +22,6 @@
#include <tls.h>
#include <dl-tlsdesc.h>
#include <dl-unmap-segments.h>
-#define _dl_tlsdesc_resolve_hold 0
#include <tlsdeschtab.h>
/* Unmap the dynamic object, but also release its TLS descriptor table
diff --git a/sysdeps/arm/tlsdesc.c b/sysdeps/arm/tlsdesc.c
index d142d7a2c91e9adb..b78e3f65785bf587 100644
--- a/sysdeps/arm/tlsdesc.c
+++ b/sysdeps/arm/tlsdesc.c
@@ -20,7 +20,6 @@
#include <tls.h>
#include <dl-tlsdesc.h>
#include <dl-unmap-segments.h>
-#define _dl_tlsdesc_resolve_hold 0
#include <tlsdeschtab.h>
/* Unmap the dynamic object, but also release its TLS descriptor table
diff --git a/sysdeps/i386/tlsdesc.c b/sysdeps/i386/tlsdesc.c
index 1b4227c8381e1b3d..c242ffce726d50e4 100644
--- a/sysdeps/i386/tlsdesc.c
+++ b/sysdeps/i386/tlsdesc.c
@@ -20,7 +20,6 @@
#include <tls.h>
#include <dl-tlsdesc.h>
#include <dl-unmap-segments.h>
-#define _dl_tlsdesc_resolve_hold 0
#include <tlsdeschtab.h>
/* Unmap the dynamic object, but also release its TLS descriptor table
diff --git a/sysdeps/x86_64/tlsdesc.c b/sysdeps/x86_64/tlsdesc.c
index 61a19ae26944c84f..a9325827d0e5e31b 100644
--- a/sysdeps/x86_64/tlsdesc.c
+++ b/sysdeps/x86_64/tlsdesc.c
@@ -20,7 +20,6 @@
#include <tls.h>
#include <dl-tlsdesc.h>
#include <dl-unmap-segments.h>
-#define _dl_tlsdesc_resolve_hold 0
#include <tlsdeschtab.h>
/* Unmap the dynamic object, but also release its TLS descriptor table

View File

@ -0,0 +1,182 @@
commit 1387ad6225c2222f027790e3f460e31aa5dd2c54
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Wed Dec 30 19:19:37 2020 +0000
elf: Fix data races in pthread_create and TLS access [BZ #19329]
DTV setup at thread creation (_dl_allocate_tls_init) is changed
to take the dlopen lock, GL(dl_load_lock). Avoiding data races
here without locks would require design changes: the map that is
accessed for static TLS initialization here may be concurrently
freed by dlclose. That use after free may be solved by only
locking around static TLS setup or by ensuring dlclose does not
free modules with static TLS, however currently every link map
with TLS has to be accessed at least to see if it needs static
TLS. And even if that's solved, still a lot of atomics would be
needed to synchronize DTV related globals without a lock. So fix
both bug 19329 and bug 27111 with a lock that prevents DTV setup
running concurrently with dlopen or dlclose.
_dl_update_slotinfo at TLS access still does not use any locks
so CONCURRENCY NOTES are added to explain the synchronization.
The early exit from the slotinfo walk when max_modid is reached
is not strictly necessary, but does not hurt either.
An incorrect acquire load was removed from _dl_resize_dtv: it
did not synchronize with any release store or fence and
synchronization is now handled separately at thread creation
and TLS access time.
There are still a number of racy read accesses to globals that
will be changed to relaxed MO atomics in a followup patch. This
should not introduce regressions compared to existing behaviour
and avoid cluttering the main part of the fix.
Not all TLS access related data races got fixed here: there are
additional races at lazy tlsdesc relocations see bug 27137.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 15ed01d795a8627a..da83cd6ae2ee6504 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -471,14 +471,11 @@ extern dtv_t _dl_static_dtv[];
#endif
static dtv_t *
-_dl_resize_dtv (dtv_t *dtv)
+_dl_resize_dtv (dtv_t *dtv, size_t max_modid)
{
/* Resize the dtv. */
dtv_t *newp;
- /* Load GL(dl_tls_max_dtv_idx) atomically since it may be written to by
- other threads concurrently. */
- size_t newsize
- = atomic_load_acquire (&GL(dl_tls_max_dtv_idx)) + DTV_SURPLUS;
+ size_t newsize = max_modid + DTV_SURPLUS;
size_t oldsize = dtv[-1].counter;
if (dtv == GL(dl_initial_dtv))
@@ -524,11 +521,14 @@ _dl_allocate_tls_init (void *result)
size_t total = 0;
size_t maxgen = 0;
+ /* Protects global dynamic TLS related state. */
+ __rtld_lock_lock_recursive (GL(dl_load_lock));
+
/* Check if the current dtv is big enough. */
if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
{
/* Resize the dtv. */
- dtv = _dl_resize_dtv (dtv);
+ dtv = _dl_resize_dtv (dtv, GL(dl_tls_max_dtv_idx));
/* Install this new dtv in the thread data structures. */
INSTALL_DTV (result, &dtv[-1]);
@@ -596,6 +596,7 @@ _dl_allocate_tls_init (void *result)
listp = listp->next;
assert (listp != NULL);
}
+ __rtld_lock_unlock_recursive (GL(dl_load_lock));
/* The DTV version is up-to-date now. */
dtv[0].counter = maxgen;
@@ -730,12 +731,29 @@ _dl_update_slotinfo (unsigned long int req_modid)
if (dtv[0].counter < listp->slotinfo[idx].gen)
{
- /* The generation counter for the slot is higher than what the
- current dtv implements. We have to update the whole dtv but
- only those entries with a generation counter <= the one for
- the entry we need. */
+ /* CONCURRENCY NOTES:
+
+ Here the dtv needs to be updated to new_gen generation count.
+
+ This code may be called during TLS access when GL(dl_load_lock)
+ is not held. In that case the user code has to synchronize with
+ dlopen and dlclose calls of relevant modules. A module m is
+ relevant if the generation of m <= new_gen and dlclose of m is
+ synchronized: a memory access here happens after the dlopen and
+ before the dlclose of relevant modules. The dtv entries for
+ relevant modules need to be updated, other entries can be
+ arbitrary.
+
+ This e.g. means that the first part of the slotinfo list can be
+ accessed race free, but the tail may be concurrently extended.
+ Similarly relevant slotinfo entries can be read race free, but
+ other entries are racy. However updating a non-relevant dtv
+ entry does not affect correctness. For a relevant module m,
+ max_modid >= modid of m. */
size_t new_gen = listp->slotinfo[idx].gen;
size_t total = 0;
+ size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
+ assert (max_modid >= req_modid);
/* We have to look through the entire dtv slotinfo list. */
listp = GL(dl_tls_dtv_slotinfo_list);
@@ -745,12 +763,14 @@ _dl_update_slotinfo (unsigned long int req_modid)
{
size_t modid = total + cnt;
+ /* Later entries are not relevant. */
+ if (modid > max_modid)
+ break;
+
size_t gen = listp->slotinfo[cnt].gen;
if (gen > new_gen)
- /* This is a slot for a generation younger than the
- one we are handling now. It might be incompletely
- set up so ignore it. */
+ /* Not relevant. */
continue;
/* If the entry is older than the current dtv layout we
@@ -767,7 +787,7 @@ _dl_update_slotinfo (unsigned long int req_modid)
continue;
/* Resize the dtv. */
- dtv = _dl_resize_dtv (dtv);
+ dtv = _dl_resize_dtv (dtv, max_modid);
assert (modid <= dtv[-1].counter);
@@ -789,8 +809,17 @@ _dl_update_slotinfo (unsigned long int req_modid)
}
total += listp->len;
+ if (total > max_modid)
+ break;
+
+ /* Synchronize with _dl_add_to_slotinfo. Ideally this would
+ be consume MO since we only need to order the accesses to
+ the next node after the read of the address and on most
+ hardware (other than alpha) a normal load would do that
+ because of the address dependency. */
+ listp = atomic_load_acquire (&listp->next);
}
- while ((listp = listp->next) != NULL);
+ while (listp != NULL);
/* This will be the new maximum generation counter. */
dtv[0].counter = new_gen;
@@ -982,7 +1011,7 @@ _dl_add_to_slotinfo (struct link_map *l, bool do_add)
the first slot. */
assert (idx == 0);
- listp = prevp->next = (struct dtv_slotinfo_list *)
+ listp = (struct dtv_slotinfo_list *)
malloc (sizeof (struct dtv_slotinfo_list)
+ TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
if (listp == NULL)
@@ -996,6 +1025,8 @@ cannot create TLS data structures"));
listp->next = NULL;
memset (listp->slotinfo, '\0',
TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
+ /* Synchronize with _dl_update_slotinfo. */
+ atomic_store_release (&prevp->next, listp);
}
/* Add the information into the slotinfo data structure. */

View File

@ -0,0 +1,193 @@
commit f4f8f4d4e0f92488431b268c8cd9555730b9afe9
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Wed Dec 30 19:19:37 2020 +0000
elf: Use relaxed atomics for racy accesses [BZ #19329]
This is a follow up patch to the fix for bug 19329. This adds relaxed
MO atomics to accesses that were previously data races but are now
race conditions, and where relaxed MO is sufficient.
The race conditions all follow the pattern that the write is behind the
dlopen lock, but a read can happen concurrently (e.g. during tls access)
without holding the lock. For slotinfo entries the read value only
matters if it reads from a synchronized write in dlopen or dlclose,
otherwise the related dtv entry is not valid to access so it is fine
to leave it in an inconsistent state. The same applies for
GL(dl_tls_max_dtv_idx) and GL(dl_tls_generation), but there the
algorithm relies on the fact that the read of the last synchronized
write is an increasing value.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/elf/dl-close.c b/elf/dl-close.c
index 1ece0ae1dd062d1e..7d2dc2272cd643f5 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -79,9 +79,10 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp,
{
assert (old_map->l_tls_modid == idx);
- /* Mark the entry as unused. */
- listp->slotinfo[idx - disp].gen = GL(dl_tls_generation) + 1;
- listp->slotinfo[idx - disp].map = NULL;
+ /* Mark the entry as unused. These can be read concurrently. */
+ atomic_store_relaxed (&listp->slotinfo[idx - disp].gen,
+ GL(dl_tls_generation) + 1);
+ atomic_store_relaxed (&listp->slotinfo[idx - disp].map, NULL);
}
/* If this is not the last currently used entry no need to look
@@ -96,8 +97,8 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp,
if (listp->slotinfo[idx - disp].map != NULL)
{
- /* Found a new last used index. */
- GL(dl_tls_max_dtv_idx) = idx;
+ /* Found a new last used index. This can be read concurrently. */
+ atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), idx);
return true;
}
}
@@ -571,7 +572,9 @@ _dl_close_worker (struct link_map *map, bool force)
GL(dl_tls_dtv_slotinfo_list), 0,
imap->l_init_called))
/* All dynamically loaded modules with TLS are unloaded. */
- GL(dl_tls_max_dtv_idx) = GL(dl_tls_static_nelem);
+ /* Can be read concurrently. */
+ atomic_store_relaxed (&GL(dl_tls_max_dtv_idx),
+ GL(dl_tls_static_nelem));
if (imap->l_tls_offset != NO_TLS_OFFSET
&& imap->l_tls_offset != FORCED_DYNAMIC_TLS_OFFSET)
@@ -769,8 +772,11 @@ _dl_close_worker (struct link_map *map, bool force)
/* If we removed any object which uses TLS bump the generation counter. */
if (any_tls)
{
- if (__glibc_unlikely (++GL(dl_tls_generation) == 0))
+ size_t newgen = GL(dl_tls_generation) + 1;
+ if (__glibc_unlikely (newgen == 0))
_dl_fatal_printf ("TLS generation counter wrapped! Please report as described in "REPORT_BUGS_TO".\n");
+ /* Can be read concurrently. */
+ atomic_store_relaxed (&GL(dl_tls_generation), newgen);
if (tls_free_end == GL(dl_tls_static_used))
GL(dl_tls_static_used) = tls_free_start;
diff --git a/elf/dl-open.c b/elf/dl-open.c
index b052bb0bc2cd17aa..a67fb3aee40860e1 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -395,9 +395,12 @@ update_tls_slotinfo (struct link_map *new)
}
}
- if (__builtin_expect (++GL(dl_tls_generation) == 0, 0))
+ size_t newgen = GL(dl_tls_generation) + 1;
+ if (__glibc_unlikely (newgen == 0))
_dl_fatal_printf (N_("\
TLS generation counter wrapped! Please report this."));
+ /* Can be read concurrently. */
+ atomic_store_relaxed (&GL(dl_tls_generation), newgen);
/* We need a second pass for static tls data, because
_dl_update_slotinfo must not be run while calls to
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index da83cd6ae2ee6504..801eafad3961573c 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -175,7 +175,9 @@ _dl_next_tls_modid (void)
/* No gaps, allocate a new entry. */
nogaps:
- result = ++GL(dl_tls_max_dtv_idx);
+ result = GL(dl_tls_max_dtv_idx) + 1;
+ /* Can be read concurrently. */
+ atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), result);
}
return result;
@@ -359,10 +361,12 @@ allocate_dtv (void *result)
dtv_t *dtv;
size_t dtv_length;
+ /* Relaxed MO, because the dtv size is later rechecked, not relied on. */
+ size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
/* We allocate a few more elements in the dtv than are needed for the
initial set of modules. This should avoid in most cases expansions
of the dtv. */
- dtv_length = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
+ dtv_length = max_modid + DTV_SURPLUS;
dtv = calloc (dtv_length + 2, sizeof (dtv_t));
if (dtv != NULL)
{
@@ -767,7 +771,7 @@ _dl_update_slotinfo (unsigned long int req_modid)
if (modid > max_modid)
break;
- size_t gen = listp->slotinfo[cnt].gen;
+ size_t gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen);
if (gen > new_gen)
/* Not relevant. */
@@ -779,7 +783,8 @@ _dl_update_slotinfo (unsigned long int req_modid)
continue;
/* If there is no map this means the entry is empty. */
- struct link_map *map = listp->slotinfo[cnt].map;
+ struct link_map *map
+ = atomic_load_relaxed (&listp->slotinfo[cnt].map);
/* Check whether the current dtv array is large enough. */
if (dtv[-1].counter < modid)
{
@@ -923,7 +928,12 @@ __tls_get_addr (GET_ADDR_ARGS)
{
dtv_t *dtv = THREAD_DTV ();
- if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation)))
+ /* Update is needed if dtv[0].counter < the generation of the accessed
+ module. The global generation counter is used here as it is easier
+ to check. Synchronization for the relaxed MO access is guaranteed
+ by user code, see CONCURRENCY NOTES in _dl_update_slotinfo. */
+ size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
+ if (__glibc_unlikely (dtv[0].counter != gen))
return update_get_addr (GET_ADDR_PARAM);
void *p = dtv[GET_ADDR_MODULE].pointer.val;
@@ -946,7 +956,10 @@ _dl_tls_get_addr_soft (struct link_map *l)
return NULL;
dtv_t *dtv = THREAD_DTV ();
- if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation)))
+ /* This may be called without holding the GL(dl_load_lock). Reading
+ arbitrary gen value is fine since this is best effort code. */
+ size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
+ if (__glibc_unlikely (dtv[0].counter != gen))
{
/* This thread's DTV is not completely current,
but it might already cover this module. */
@@ -1032,7 +1045,9 @@ cannot create TLS data structures"));
/* Add the information into the slotinfo data structure. */
if (do_add)
{
- listp->slotinfo[idx].map = l;
- listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1;
+ /* Can be read concurrently. See _dl_update_slotinfo. */
+ atomic_store_relaxed (&listp->slotinfo[idx].map, l);
+ atomic_store_relaxed (&listp->slotinfo[idx].gen,
+ GL(dl_tls_generation) + 1);
}
}
diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c
index 533ee2b3a6e85ad8..bc543dcc264ea361 100644
--- a/sysdeps/x86_64/dl-tls.c
+++ b/sysdeps/x86_64/dl-tls.c
@@ -40,7 +40,8 @@ __tls_get_addr_slow (GET_ADDR_ARGS)
{
dtv_t *dtv = THREAD_DTV ();
- if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation)))
+ size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
+ if (__glibc_unlikely (dtv[0].counter != gen))
return update_get_addr (GET_ADDR_PARAM);
return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);

View File

@ -0,0 +1,133 @@
commit 9d0e30329c23b5ad736fda3f174208c25970dbce
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Tue Dec 13 12:28:41 2016 +0000
elf: Add test case for [BZ #19329]
Test concurrent dlopen and pthread_create when the loaded modules have
TLS. This triggers dl-tls assertion failures more reliably than the
nptl/tst-stack4 test.
The dlopened module has 100 DT_NEEDED dependencies with TLS, they were
reused from an existing TLS test. The number of created threads during
dlopen depends on filesystem speed and hardware, but at most 3 threads
are alive at a time to limit resource usage.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Conflicts:
elf/Makefile
(usual testing differences)
diff --git a/elf/Makefile b/elf/Makefile
index 0995d810b57d0dda..be40e3761cf91c4a 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -210,7 +210,7 @@ tests += restest1 preloadtest loadfail multiload origtest resolvfail \
tst-tls-ie tst-tls-ie-dlmopen \
argv0test \
tst-glibc-hwcaps tst-glibc-hwcaps-prepend tst-glibc-hwcaps-mask \
- tst-tls20
+ tst-tls20 tst-tls21
# reldep9
tests-internal += loadtest unload unload2 circleload1 \
neededtest neededtest2 neededtest3 neededtest4 \
@@ -333,7 +333,7 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \
libmarkermod2-1 libmarkermod2-2 \
libmarkermod3-1 libmarkermod3-2 libmarkermod3-3 \
libmarkermod4-1 libmarkermod4-2 libmarkermod4-3 libmarkermod4-4 \
- tst-tls20mod-bad
+ tst-tls20mod-bad tst-tls21mod \
# Most modules build with _ISOMAC defined, but those filtered out
# depend on internal headers.
@@ -1836,3 +1836,8 @@ tst-tls20mod-bad.so-no-z-defs = yes
$(objpfx)tst-tls20: $(libdl) $(shared-thread-library)
$(objpfx)tst-tls20.out: $(objpfx)tst-tls20mod-bad.so \
$(tst-tls-many-dynamic-modules:%=$(objpfx)%.so)
+
+# Reuses tst-tls-many-dynamic-modules
+$(objpfx)tst-tls21: $(libdl) $(shared-thread-library)
+$(objpfx)tst-tls21.out: $(objpfx)tst-tls21mod.so
+$(objpfx)tst-tls21mod.so: $(tst-tls-many-dynamic-modules:%=$(objpfx)%.so)
diff --git a/elf/tst-tls21.c b/elf/tst-tls21.c
new file mode 100644
index 0000000000000000..560bf5813a746417
--- /dev/null
+++ b/elf/tst-tls21.c
@@ -0,0 +1,68 @@
+/* Test concurrent dlopen and pthread_create: BZ 19329.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <dlfcn.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdatomic.h>
+#include <support/xdlfcn.h>
+#include <support/xthread.h>
+
+#define THREADS 10000
+
+static atomic_int done;
+
+static void *
+start (void *a)
+{
+ /* Load a module with many dependencies that each have TLS. */
+ xdlopen ("tst-tls21mod.so", RTLD_LAZY);
+ atomic_store_explicit (&done, 1, memory_order_release);
+ return 0;
+}
+
+static void *
+nop (void *a)
+{
+ return 0;
+}
+
+static int
+do_test (void)
+{
+ pthread_t t1, t2;
+ int i;
+
+ /* Load a module with lots of dependencies and TLS. */
+ t1 = xpthread_create (0, start, 0);
+
+ /* Concurrently create lots of threads until dlopen is observably done. */
+ for (i = 0; i < THREADS; i++)
+ {
+ if (atomic_load_explicit (&done, memory_order_acquire) != 0)
+ break;
+ t2 = xpthread_create (0, nop, 0);
+ xpthread_join (t2);
+ }
+
+ xpthread_join (t1);
+ printf ("threads created during dlopen: %d\n", i);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/elf/tst-tls21mod.c b/elf/tst-tls21mod.c
new file mode 100644
index 0000000000000000..206ece4fb34622a9
--- /dev/null
+++ b/elf/tst-tls21mod.c
@@ -0,0 +1 @@
+int __thread x;

View File

@ -0,0 +1,81 @@
commit 572bd547d57a39b6cf0ea072545dc4048921f4c3
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Thu Dec 31 13:59:38 2020 +0000
elf: Fix DTV gap reuse logic [BZ #27135]
For some reason only dlopen failure caused dtv gaps to be reused.
It is possible that the intent was to never reuse modids for a
different module, but after dlopen failure all gaps are reused
not just the ones caused by the unfinished dlopened.
So the code has to handle reused modids already which seems to
work, however the data races at thread creation and tls access
(see bug 19329 and bug 27111) may be more severe if slots are
reused so this is scheduled after those fixes. I think fixing
the races are not simpler if reuse is disallowed and reuse has
other benefits, so set GL(dl_tls_dtv_gaps) whenever entries are
removed from the middle of the slotinfo list. The value does
not have to be correct: incorrect true value causes the next
modid query to do a slotinfo walk, incorrect false will leave
gaps and new entries are added at the end.
Fixes bug 27135.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/elf/dl-close.c b/elf/dl-close.c
index 7d2dc2272cd643f5..41cb6c58491c364b 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -88,7 +88,11 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp,
/* If this is not the last currently used entry no need to look
further. */
if (idx != GL(dl_tls_max_dtv_idx))
- return true;
+ {
+ /* There is an unused dtv entry in the middle. */
+ GL(dl_tls_dtv_gaps) = true;
+ return true;
+ }
}
while (idx - disp > (disp == 0 ? 1 + GL(dl_tls_static_nelem) : 0))
diff --git a/elf/dl-open.c b/elf/dl-open.c
index a67fb3aee40860e1..54727402750f4c0c 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -896,16 +896,6 @@ no more namespaces available for dlmopen()"));
state if relocation failed, for example. */
if (args.map)
{
- /* Maybe some of the modules which were loaded use TLS.
- Since it will be removed in the following _dl_close call
- we have to mark the dtv array as having gaps to fill the
- holes. This is a pessimistic assumption which won't hurt
- if not true. There is no need to do this when we are
- loading the auditing DSOs since TLS has not yet been set
- up. */
- if ((mode & __RTLD_AUDIT) == 0)
- GL(dl_tls_dtv_gaps) = true;
-
_dl_close_worker (args.map, true);
/* All l_nodelete_pending objects should have been deleted
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 801eafad3961573c..bacb4101e2e2c4e5 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -187,10 +187,7 @@ _dl_next_tls_modid (void)
size_t
_dl_count_modids (void)
{
- /* It is rare that we have gaps; see elf/dl-open.c (_dl_open) where
- we fail to load a module and unload it leaving a gap. If we don't
- have gaps then the number of modids is the current maximum so
- return that. */
+ /* The count is the max unless dlclose or failed dlopen created gaps. */
if (__glibc_likely (!GL(dl_tls_dtv_gaps)))
return GL(dl_tls_max_dtv_idx);

View File

@ -0,0 +1,71 @@
commit 40ebfd016ad284872f434bdd76dbe9c708db4d6b
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Jun 25 08:09:08 2021 +0200
elf: Disable most of TLS modid gaps processing [BZ #27135]
Revert "elf: Fix DTV gap reuse logic [BZ #27135]"
This reverts commit 572bd547d57a39b6cf0ea072545dc4048921f4c3.
It turns out that the _dl_next_tls_modid in _dl_map_object_from_fd keeps
returning the same modid over and over again if there is a gap and
more than TLS-using module is loaded in one dlopen call. This corrupts
TLS data structures. The bug is still present after a revert, but
empirically it is much more difficult to trigger (because it involves a
dlopen failure).
diff --git a/elf/dl-close.c b/elf/dl-close.c
index 41cb6c58491c364b..7d2dc2272cd643f5 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -88,11 +88,7 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp,
/* If this is not the last currently used entry no need to look
further. */
if (idx != GL(dl_tls_max_dtv_idx))
- {
- /* There is an unused dtv entry in the middle. */
- GL(dl_tls_dtv_gaps) = true;
- return true;
- }
+ return true;
}
while (idx - disp > (disp == 0 ? 1 + GL(dl_tls_static_nelem) : 0))
diff --git a/elf/dl-open.c b/elf/dl-open.c
index 54727402750f4c0c..a67fb3aee40860e1 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -896,6 +896,16 @@ no more namespaces available for dlmopen()"));
state if relocation failed, for example. */
if (args.map)
{
+ /* Maybe some of the modules which were loaded use TLS.
+ Since it will be removed in the following _dl_close call
+ we have to mark the dtv array as having gaps to fill the
+ holes. This is a pessimistic assumption which won't hurt
+ if not true. There is no need to do this when we are
+ loading the auditing DSOs since TLS has not yet been set
+ up. */
+ if ((mode & __RTLD_AUDIT) == 0)
+ GL(dl_tls_dtv_gaps) = true;
+
_dl_close_worker (args.map, true);
/* All l_nodelete_pending objects should have been deleted
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index bacb4101e2e2c4e5..801eafad3961573c 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -187,7 +187,10 @@ _dl_next_tls_modid (void)
size_t
_dl_count_modids (void)
{
- /* The count is the max unless dlclose or failed dlopen created gaps. */
+ /* It is rare that we have gaps; see elf/dl-open.c (_dl_open) where
+ we fail to load a module and unload it leaving a gap. If we don't
+ have gaps then the number of modids is the current maximum so
+ return that. */
if (__glibc_likely (!GL(dl_tls_dtv_gaps)))
return GL(dl_tls_max_dtv_idx);

View File

@ -0,0 +1,585 @@
commit ba33937be210da5d07f7f01709323743f66011ce
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Fri Jun 25 10:54:12 2021 -0300
elf: Fix DTV gap reuse logic (BZ #27135)
This is updated version of the 572bd547d57a (reverted by 40ebfd016ad2)
that fixes the _dl_next_tls_modid issues.
This issue with 572bd547d57a patch is the DTV entry will be only
update on dl_open_worker() with the update_tls_slotinfo() call after
all dependencies are being processed by _dl_map_object_deps(). However
_dl_map_object_deps() itself might call _dl_next_tls_modid(), and since
the _dl_tls_dtv_slotinfo_list::map is not yet set the entry will be
wrongly reused.
This patch fixes by renaming the _dl_next_tls_modid() function to
_dl_assign_tls_modid() and by passing the link_map so it can set
the slotinfo value so a subsequente _dl_next_tls_modid() call will
see the entry as allocated.
The intermediary value is cleared up on remove_slotinfo() for the case
a library fails to load with RTLD_NOW.
This patch fixes BZ #27135.
Checked on x86_64-linux-gnu.
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
Conflicts:
elf/Makefile
(testing differences; libdl removal upstream)
diff --git a/elf/Makefile b/elf/Makefile
index be40e3761cf91c4a..3e71939d3234c4c3 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -242,6 +242,13 @@ one-hundred = $(foreach x,0 1 2 3 4 5 6 7 8 9, \
0$x 1$x 2$x 3$x 4$x 5$x 6$x 7$x 8$x 9$x)
tst-tls-many-dynamic-modules := \
$(foreach n,$(one-hundred),tst-tls-manydynamic$(n)mod)
+tst-tls-many-dynamic-modules-dep-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 \
+ 14 15 16 17 18 19
+tst-tls-many-dynamic-modules-dep = \
+ $(foreach n,$(tst-tls-many-dynamic-modules-dep-suffixes),tst-tls-manydynamic$(n)mod-dep)
+tst-tls-many-dynamic-modules-dep-bad-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
+tst-tls-many-dynamic-modules-dep-bad = \
+ $(foreach n,$(tst-tls-many-dynamic-modules-dep-bad-suffixes),tst-tls-manydynamic$(n)mod-dep-bad)
extra-test-objs += $(tlsmod17a-modules:=.os) $(tlsmod18a-modules:=.os) \
tst-tlsalign-vars.o
test-extras += tst-tlsmod17a tst-tlsmod18a tst-tlsalign-vars
@@ -314,6 +321,8 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \
tst-audit11mod1 tst-audit11mod2 tst-auditmod11 \
tst-audit12mod1 tst-audit12mod2 tst-audit12mod3 tst-auditmod12 \
tst-latepthreadmod $(tst-tls-many-dynamic-modules) \
+ $(tst-tls-many-dynamic-modules-dep) \
+ $(tst-tls-many-dynamic-modules-dep-bad) \
tst-nodelete-dlclose-dso tst-nodelete-dlclose-plugin \
tst-main1mod tst-libc_dlvsym-dso tst-absolute-sym-lib \
tst-absolute-zero-lib tst-big-note-lib \
@@ -1832,10 +1841,63 @@ $(objpfx)tst-rtld-help.out: $(objpfx)ld.so
$(evaluate-test)
# Reuses tst-tls-many-dynamic-modules
+$(patsubst %,$(objpfx)%.os,$(tst-tls-many-dynamic-modules-dep)): \
+ $(objpfx)tst-tls-manydynamic%mod-dep.os : tst-tls-manydynamicmod.c
+ $(compile-command.c) \
+ -DNAME=tls_global_$* -DSETTER=set_value_$* -DGETTER=get_value_$*
+$(patsubst %,$(objpfx)%.os,$(tst-tls-many-dynamic-modules-dep-bad)): \
+ $(objpfx)tst-tls-manydynamic%mod-dep-bad.os : tst-tls-manydynamicmod.c
+ $(compile-command.c) \
+ -DNAME=tls_global_$* -DSETTER=set_value_$* -DGETTER=get_value_$*
tst-tls20mod-bad.so-no-z-defs = yes
+# Single dependency.
+$(objpfx)tst-tls-manydynamic0mod-dep.so: $(objpfx)tst-tls-manydynamic1mod-dep.so
+# Double dependencies.
+$(objpfx)tst-tls-manydynamic2mod-dep.so: $(objpfx)tst-tls-manydynamic3mod-dep.so \
+ $(objpfx)tst-tls-manydynamic4mod-dep.so
+# Double dependencies with each dependency depent of another module.
+$(objpfx)tst-tls-manydynamic5mod-dep.so: $(objpfx)tst-tls-manydynamic6mod-dep.so \
+ $(objpfx)tst-tls-manydynamic7mod-dep.so
+$(objpfx)tst-tls-manydynamic6mod-dep.so: $(objpfx)tst-tls-manydynamic8mod-dep.so
+$(objpfx)tst-tls-manydynamic7mod-dep.so: $(objpfx)tst-tls-manydynamic8mod-dep.so
+# Long chain with one double dependency in the middle
+$(objpfx)tst-tls-manydynamic9mod-dep.so: $(objpfx)tst-tls-manydynamic10mod-dep.so \
+ $(objpfx)tst-tls-manydynamic11mod-dep.so
+$(objpfx)tst-tls-manydynamic10mod-dep.so: $(objpfx)tst-tls-manydynamic12mod-dep.so
+$(objpfx)tst-tls-manydynamic12mod-dep.so: $(objpfx)tst-tls-manydynamic13mod-dep.so
+# Long chain with two double depedencies in the middle
+$(objpfx)tst-tls-manydynamic14mod-dep.so: $(objpfx)tst-tls-manydynamic15mod-dep.so
+$(objpfx)tst-tls-manydynamic15mod-dep.so: $(objpfx)tst-tls-manydynamic16mod-dep.so \
+ $(objpfx)tst-tls-manydynamic17mod-dep.so
+$(objpfx)tst-tls-manydynamic16mod-dep.so: $(objpfx)tst-tls-manydynamic18mod-dep.so \
+ $(objpfx)tst-tls-manydynamic19mod-dep.so
+# Same but with an invalid module.
+# Single dependency.
+$(objpfx)tst-tls-manydynamic0mod-dep-bad.so: $(objpfx)tst-tls20mod-bad.so
+# Double dependencies.
+$(objpfx)tst-tls-manydynamic1mod-dep-bad.so: $(objpfx)tst-tls-manydynamic2mod-dep-bad.so \
+ $(objpfx)tst-tls20mod-bad.so
+# Double dependencies with each dependency depent of another module.
+$(objpfx)tst-tls-manydynamic3mod-dep-bad.so: $(objpfx)tst-tls-manydynamic4mod-dep-bad.so \
+ $(objpfx)tst-tls-manydynamic5mod-dep-bad.so
+$(objpfx)tst-tls-manydynamic4mod-dep-bad.so: $(objpfx)tst-tls20mod-bad.so
+$(objpfx)tst-tls-manydynamic5mod-dep-bad.so: $(objpfx)tst-tls20mod-bad.so
+# Long chain with one double dependency in the middle
+$(objpfx)tst-tls-manydynamic6mod-dep-bad.so: $(objpfx)tst-tls-manydynamic7mod-dep-bad.so \
+ $(objpfx)tst-tls-manydynamic8mod-dep-bad.so
+$(objpfx)tst-tls-manydynamic7mod-dep-bad.so: $(objpfx)tst-tls-manydynamic9mod-dep-bad.so
+$(objpfx)tst-tls-manydynamic9mod-dep-bad.so: $(objpfx)tst-tls20mod-bad.so
+# Long chain with two double depedencies in the middle
+$(objpfx)tst-tls-manydynamic10mod-dep-bad.so: $(objpfx)tst-tls-manydynamic11mod-dep-bad.so
+$(objpfx)tst-tls-manydynamic11mod-dep-bad.so: $(objpfx)tst-tls-manydynamic12mod-dep-bad.so \
+ $(objpfx)tst-tls-manydynamic13mod-dep-bad.so
+$(objpfx)tst-tls-manydynamic12mod-dep-bad.so: $(objpfx)tst-tls-manydynamic14mod-dep-bad.so \
+ $(objpfx)tst-tls20mod-bad.so
$(objpfx)tst-tls20: $(libdl) $(shared-thread-library)
$(objpfx)tst-tls20.out: $(objpfx)tst-tls20mod-bad.so \
- $(tst-tls-many-dynamic-modules:%=$(objpfx)%.so)
+ $(tst-tls-many-dynamic-modules:%=$(objpfx)%.so) \
+ $(tst-tls-many-dynamic-modules-dep:%=$(objpfx)%.so) \
+ $(tst-tls-many-dynamic-modules-dep-bad:%=$(objpfx)%.so) \
# Reuses tst-tls-many-dynamic-modules
$(objpfx)tst-tls21: $(libdl) $(shared-thread-library)
diff --git a/elf/dl-close.c b/elf/dl-close.c
index 7d2dc2272cd643f5..18227fe992029364 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -77,8 +77,6 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp,
object that wasn't fully set up. */
if (__glibc_likely (old_map != NULL))
{
- assert (old_map->l_tls_modid == idx);
-
/* Mark the entry as unused. These can be read concurrently. */
atomic_store_relaxed (&listp->slotinfo[idx - disp].gen,
GL(dl_tls_generation) + 1);
@@ -88,7 +86,11 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp,
/* If this is not the last currently used entry no need to look
further. */
if (idx != GL(dl_tls_max_dtv_idx))
- return true;
+ {
+ /* There is an unused dtv entry in the middle. */
+ GL(dl_tls_dtv_gaps) = true;
+ return true;
+ }
}
while (idx - disp > (disp == 0 ? 1 + GL(dl_tls_static_nelem) : 0))
diff --git a/elf/dl-load.c b/elf/dl-load.c
index 80fc38041a936c3c..cdb5d4b5b67f1ca1 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -1419,7 +1419,7 @@ cannot enable executable stack as shared object requires");
not set up TLS data structures, so don't use them now. */
|| __glibc_likely (GL(dl_tls_dtv_slotinfo_list) != NULL)))
/* Assign the next available module ID. */
- l->l_tls_modid = _dl_next_tls_modid ();
+ _dl_assign_tls_modid (l);
#ifdef DL_AFTER_LOAD
DL_AFTER_LOAD (l);
diff --git a/elf/dl-open.c b/elf/dl-open.c
index a67fb3aee40860e1..54727402750f4c0c 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -896,16 +896,6 @@ no more namespaces available for dlmopen()"));
state if relocation failed, for example. */
if (args.map)
{
- /* Maybe some of the modules which were loaded use TLS.
- Since it will be removed in the following _dl_close call
- we have to mark the dtv array as having gaps to fill the
- holes. This is a pessimistic assumption which won't hurt
- if not true. There is no need to do this when we are
- loading the auditing DSOs since TLS has not yet been set
- up. */
- if ((mode & __RTLD_AUDIT) == 0)
- GL(dl_tls_dtv_gaps) = true;
-
_dl_close_worker (args.map, true);
/* All l_nodelete_pending objects should have been deleted
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 801eafad3961573c..8c0f9e972d7a0eac 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -122,8 +122,8 @@ oom (void)
}
-size_t
-_dl_next_tls_modid (void)
+void
+_dl_assign_tls_modid (struct link_map *l)
{
size_t result;
@@ -153,7 +153,11 @@ _dl_next_tls_modid (void)
}
if (result - disp < runp->len)
- break;
+ {
+ /* Mark the entry as used, so any dependency see it. */
+ atomic_store_relaxed (&runp->slotinfo[result - disp].map, l);
+ break;
+ }
disp += runp->len;
}
@@ -180,17 +184,14 @@ _dl_next_tls_modid (void)
atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), result);
}
- return result;
+ l->l_tls_modid = result;
}
size_t
_dl_count_modids (void)
{
- /* It is rare that we have gaps; see elf/dl-open.c (_dl_open) where
- we fail to load a module and unload it leaving a gap. If we don't
- have gaps then the number of modids is the current maximum so
- return that. */
+ /* The count is the max unless dlclose or failed dlopen created gaps. */
if (__glibc_likely (!GL(dl_tls_dtv_gaps)))
return GL(dl_tls_max_dtv_idx);
diff --git a/elf/rtld.c b/elf/rtld.c
index 992f825ba00762a7..118c454a2329573f 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -1693,7 +1693,7 @@ ERROR: '%s': cannot process note segment.\n", _dl_argv[0]);
/* Add the dynamic linker to the TLS list if it also uses TLS. */
if (GL(dl_rtld_map).l_tls_blocksize != 0)
/* Assign a module ID. Do this before loading any audit modules. */
- GL(dl_rtld_map).l_tls_modid = _dl_next_tls_modid ();
+ _dl_assign_tls_modid (&GL(dl_rtld_map));
audit_list_add_dynamic_tag (&state.audit_list, main_map, DT_AUDIT);
audit_list_add_dynamic_tag (&state.audit_list, main_map, DT_DEPAUDIT);
diff --git a/elf/tst-tls20.c b/elf/tst-tls20.c
index 9977ec803208b9c8..d8d04fe574597f35 100644
--- a/elf/tst-tls20.c
+++ b/elf/tst-tls20.c
@@ -16,12 +16,14 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <array_length.h>
#include <dlfcn.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <support/check.h>
#include <support/support.h>
+#include <support/test-driver.h>
#include <support/xdlfcn.h>
#include <support/xthread.h>
@@ -59,28 +61,75 @@ access (int i)
char *buf = xasprintf ("tls_global_%02d", i);
dlerror ();
int *p = dlsym (mod[i], buf);
- printf ("mod[%d]: &tls = %p\n", i, p);
+ if (test_verbose)
+ printf ("mod[%d]: &tls = %p\n", i, p);
if (p == NULL)
FAIL_EXIT1 ("dlsym failed: %s\n", dlerror ());
+ TEST_COMPARE (*p, 0);
++*p;
free (buf);
}
+static void
+access_mod (const char *modname, void *mod, int i)
+{
+ char *modsym = xasprintf ("tls_global_%d", i);
+ dlerror ();
+ int *p = dlsym (mod, modsym);
+ if (test_verbose)
+ printf ("%s: &tls = %p\n", modname, p);
+ if (p == NULL)
+ FAIL_EXIT1 ("dlsym failed: %s\n", dlerror ());
+ TEST_COMPARE (*p, 0);
+ ++*p;
+ free (modsym);
+}
+
+static void
+access_dep (int i)
+{
+ char *modname = xasprintf ("tst-tls-manydynamic%dmod-dep.so", i);
+ void *moddep = xdlopen (modname, RTLD_LAZY);
+ access_mod (modname, moddep, i);
+ free (modname);
+ xdlclose (moddep);
+}
+
+struct start_args
+{
+ const char *modname;
+ void *mod;
+ int modi;
+ int ndeps;
+ const int *deps;
+};
+
static void *
start (void *a)
{
+ struct start_args *args = a;
+
for (int i = 0; i < NMOD; i++)
if (mod[i] != NULL)
access (i);
+
+ if (args != NULL)
+ {
+ access_mod (args->modname, args->mod, args->modi);
+ for (int n = 0; n < args->ndeps; n++)
+ access_dep (args->deps[n]);
+ }
+
return 0;
}
-static int
-do_test (void)
+/* This test gaps with shared libraries with dynamic TLS that has no
+ dependencies. The DTV gap is set with by trying to load an invalid
+ module, the entry should be used on the dlopen. */
+static void
+do_test_no_depedency (void)
{
- int i;
-
- for (i = 0; i < NMOD; i++)
+ for (int i = 0; i < NMOD; i++)
{
load_mod (i);
/* Bump the generation of mod[0] without using new dtv slot. */
@@ -91,8 +140,220 @@ do_test (void)
pthread_t t = xpthread_create (0, start, 0);
xpthread_join (t);
}
- for (i = 0; i < NMOD; i++)
+ for (int i = 0; i < NMOD; i++)
unload_mod (i);
+}
+
+/* The following test check DTV gaps handling with shared libraries that has
+ dependencies. It defines 5 different sets:
+
+ 1. Single dependency:
+ mod0 -> mod1
+ 2. Double dependency:
+ mod2 -> [mod3,mod4]
+ 3. Double dependency with each dependency depent of another module:
+ mod5 -> [mod6,mod7] -> mod8
+ 4. Long chain with one double dependency in the middle:
+ mod9 -> [mod10, mod11] -> mod12 -> mod13
+ 5. Long chain with two double depedencies in the middle:
+ mod14 -> mod15 -> [mod16, mod17]
+ mod15 -> [mod18, mod19]
+
+ This does not cover all the possible gaps and configuration, but it
+ should check if different dynamic shared sets are placed correctly in
+ different gaps configurations. */
+
+static int
+nmodules (uint32_t v)
+{
+ unsigned int r = 0;
+ while (v >>= 1)
+ r++;
+ return r + 1;
+}
+
+static inline bool
+is_mod_set (uint32_t g, uint32_t n)
+{
+ return (1U << (n - 1)) & g;
+}
+
+static void
+print_gap (uint32_t g)
+{
+ if (!test_verbose)
+ return;
+ printf ("gap: ");
+ int nmods = nmodules (g);
+ for (int n = 1; n <= nmods; n++)
+ printf ("%c", ((1 << (n - 1)) & g) == 0 ? 'G' : 'M');
+ printf ("\n");
+}
+
+static void
+do_test_dependency (void)
+{
+ /* Maps the module and its dependencies, use thread to access the TLS on
+ each loaded module. */
+ static const int tlsmanydeps0[] = { 1 };
+ static const int tlsmanydeps1[] = { 3, 4 };
+ static const int tlsmanydeps2[] = { 6, 7, 8 };
+ static const int tlsmanydeps3[] = { 10, 11, 12 };
+ static const int tlsmanydeps4[] = { 15, 16, 17, 18, 19 };
+ static const struct tlsmanydeps_t
+ {
+ int modi;
+ int ndeps;
+ const int *deps;
+ } tlsmanydeps[] =
+ {
+ { 0, array_length (tlsmanydeps0), tlsmanydeps0 },
+ { 2, array_length (tlsmanydeps1), tlsmanydeps1 },
+ { 5, array_length (tlsmanydeps2), tlsmanydeps2 },
+ { 9, array_length (tlsmanydeps3), tlsmanydeps3 },
+ { 14, array_length (tlsmanydeps4), tlsmanydeps4 },
+ };
+
+ /* The gap configuration is defined as a bitmap: the bit set represents a
+ loaded module prior the tests execution, while a bit unsed is a module
+ unloaded. Not all permtation will show gaps, but it is simpler than
+ define each one independently. */
+ for (uint32_t g = 0; g < 64; g++)
+ {
+ print_gap (g);
+ int nmods = nmodules (g);
+
+ int mods[nmods];
+ /* We use '0' as indication for a gap, to avoid the dlclose on iteration
+ cleanup. */
+ for (int n = 1; n <= nmods; n++)
+ {
+ load_mod (n);
+ mods[n] = n;
+ }
+ for (int n = 1; n <= nmods; n++)
+ {
+ if (!is_mod_set (g, n))
+ {
+ unload_mod (n);
+ mods[n] = 0;
+ }
+ }
+
+ for (int t = 0; t < array_length (tlsmanydeps); t++)
+ {
+ char *moddepname = xasprintf ("tst-tls-manydynamic%dmod-dep.so",
+ tlsmanydeps[t].modi);
+ void *moddep = xdlopen (moddepname, RTLD_LAZY);
+
+ /* Access TLS in all loaded modules. */
+ struct start_args args =
+ {
+ moddepname,
+ moddep,
+ tlsmanydeps[t].modi,
+ tlsmanydeps[t].ndeps,
+ tlsmanydeps[t].deps
+ };
+ pthread_t t = xpthread_create (0, start, &args);
+ xpthread_join (t);
+
+ free (moddepname);
+ xdlclose (moddep);
+ }
+
+ for (int n = 1; n <= nmods; n++)
+ if (mods[n] != 0)
+ unload_mod (n);
+ }
+}
+
+/* The following test check DTV gaps handling with shared libraries that has
+ invalid dependencies. It defines 5 different sets:
+
+ 1. Single dependency:
+ mod0 -> invalid
+ 2. Double dependency:
+ mod1 -> [mod2,invalid]
+ 3. Double dependency with each dependency depent of another module:
+ mod3 -> [mod4,mod5] -> invalid
+ 4. Long chain with one double dependency in the middle:
+ mod6 -> [mod7, mod8] -> mod12 -> invalid
+ 5. Long chain with two double depedencies in the middle:
+ mod10 -> mod11 -> [mod12, mod13]
+ mod12 -> [mod14, invalid]
+
+ This does not cover all the possible gaps and configuration, but it
+ should check if different dynamic shared sets are placed correctly in
+ different gaps configurations. */
+
+static void
+do_test_invalid_dependency (bool bind_now)
+{
+ static const int tlsmanydeps[] = { 0, 1, 3, 6, 10 };
+
+ /* The gap configuration is defined as a bitmap: the bit set represents a
+ loaded module prior the tests execution, while a bit unsed is a module
+ unloaded. Not all permtation will show gaps, but it is simpler than
+ define each one independently. */
+ for (uint32_t g = 0; g < 64; g++)
+ {
+ print_gap (g);
+ int nmods = nmodules (g);
+
+ int mods[nmods];
+ /* We use '0' as indication for a gap, to avoid the dlclose on iteration
+ cleanup. */
+ for (int n = 1; n <= nmods; n++)
+ {
+ load_mod (n);
+ mods[n] = n;
+ }
+ for (int n = 1; n <= nmods; n++)
+ {
+ if (!is_mod_set (g, n))
+ {
+ unload_mod (n);
+ mods[n] = 0;
+ }
+ }
+
+ for (int t = 0; t < array_length (tlsmanydeps); t++)
+ {
+ char *moddepname = xasprintf ("tst-tls-manydynamic%dmod-dep-bad.so",
+ tlsmanydeps[t]);
+ void *moddep;
+ if (bind_now)
+ {
+ moddep = dlopen (moddepname, RTLD_NOW);
+ TEST_VERIFY (moddep == 0);
+ }
+ else
+ moddep = dlopen (moddepname, RTLD_LAZY);
+
+ /* Access TLS in all loaded modules. */
+ pthread_t t = xpthread_create (0, start, NULL);
+ xpthread_join (t);
+
+ free (moddepname);
+ if (!bind_now)
+ xdlclose (moddep);
+ }
+
+ for (int n = 1; n <= nmods; n++)
+ if (mods[n] != 0)
+ unload_mod (n);
+ }
+}
+
+static int
+do_test (void)
+{
+ do_test_no_depedency ();
+ do_test_dependency ();
+ do_test_invalid_dependency (true);
+ do_test_invalid_dependency (false);
+
return 0;
}
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 6cbbaa808a596f77..0138353ccb41c5f1 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -1111,8 +1111,8 @@ extern ElfW(Addr) _dl_sysdep_start (void **start_argptr,
extern void _dl_sysdep_start_cleanup (void) attribute_hidden;
-/* Determine next available module ID. */
-extern size_t _dl_next_tls_modid (void) attribute_hidden;
+/* Determine next available module ID and set the L l_tls_modid. */
+extern void _dl_assign_tls_modid (struct link_map *l) attribute_hidden;
/* Count the modules with TLS segments. */
extern size_t _dl_count_modids (void) attribute_hidden;

View File

@ -0,0 +1,42 @@
commit 881b68e45c3a518319dcf5a3c4a2b3ec59e1c1e5
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Fri Jul 16 08:32:05 2021 -0300
elf: Fix a wrong array access on tst-tls20
Check on x86_64-linux-gnu with --enable-stack-protector=all.
diff --git a/elf/tst-tls20.c b/elf/tst-tls20.c
index d8d04fe574597f35..831c3336c914790d 100644
--- a/elf/tst-tls20.c
+++ b/elf/tst-tls20.c
@@ -226,12 +226,12 @@ do_test_dependency (void)
int mods[nmods];
/* We use '0' as indication for a gap, to avoid the dlclose on iteration
cleanup. */
- for (int n = 1; n <= nmods; n++)
+ for (int n = 1; n < nmods; n++)
{
load_mod (n);
mods[n] = n;
}
- for (int n = 1; n <= nmods; n++)
+ for (int n = 1; n < nmods; n++)
{
if (!is_mod_set (g, n))
{
@@ -304,12 +304,12 @@ do_test_invalid_dependency (bool bind_now)
int mods[nmods];
/* We use '0' as indication for a gap, to avoid the dlclose on iteration
cleanup. */
- for (int n = 1; n <= nmods; n++)
+ for (int n = 1; n < nmods; n++)
{
load_mod (n);
mods[n] = n;
}
- for (int n = 1; n <= nmods; n++)
+ for (int n = 1; n < nmods; n++)
{
if (!is_mod_set (g, n))
{

View File

@ -0,0 +1,468 @@
commit 83b5323261bb72313bffcf37476c1b8f0847c736
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Wed Sep 15 15:16:19 2021 +0100
elf: Avoid deadlock between pthread_create and ctors [BZ #28357]
The fix for bug 19329 caused a regression such that pthread_create can
deadlock when concurrent ctors from dlopen are waiting for it to finish.
Use a new GL(dl_load_tls_lock) in pthread_create that is not taken
around ctors in dlopen.
The new lock is also used in __tls_get_addr instead of GL(dl_load_lock).
The new lock is held in _dl_open_worker and _dl_close_worker around
most of the logic before/after the init/fini routines. When init/fini
routines are running then TLS is in a consistent, usable state.
In _dl_open_worker the new lock requires catching and reraising dlopen
failures that happen in the critical section.
The new lock is reinitialized in a fork child, to keep the existing
behaviour and it is kept recursive in case malloc interposition or TLS
access from signal handlers can retake it. It is not obvious if this
is necessary or helps, but avoids changing the preexisting behaviour.
The new lock may be more appropriate for dl_iterate_phdr too than
GL(dl_load_write_lock), since TLS state of an incompletely loaded
module may be accessed. If the new lock can replace the old one,
that can be a separate change.
Fixes bug 28357.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Conflicts:
posix/fork.c
(reworked due to file rename upstream and libpthread integration)
sysdeps/pthread/Makefile
(htl testing support was missing downstream, reconstituted here;
added $(libdl) required downstream)
diff --git a/elf/dl-close.c b/elf/dl-close.c
index 18227fe992029364..7fe91bdd9aaf694e 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -549,6 +549,9 @@ _dl_close_worker (struct link_map *map, bool force)
size_t tls_free_end;
tls_free_start = tls_free_end = NO_TLS_OFFSET;
+ /* Protects global and module specitic TLS state. */
+ __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
+
/* We modify the list of loaded objects. */
__rtld_lock_lock_recursive (GL(dl_load_write_lock));
@@ -784,6 +787,9 @@ _dl_close_worker (struct link_map *map, bool force)
GL(dl_tls_static_used) = tls_free_start;
}
+ /* TLS is cleaned up for the unloaded modules. */
+ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
+
#ifdef SHARED
/* Auditing checkpoint: we have deleted all objects. */
if (__glibc_unlikely (do_audit))
diff --git a/elf/dl-open.c b/elf/dl-open.c
index 54727402750f4c0c..736df62ce6e46d34 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -65,6 +65,9 @@ struct dl_open_args
libc_map value in the namespace in case of a dlopen failure. */
bool libc_already_loaded;
+ /* Set to true if the end of dl_open_worker_begin was reached. */
+ bool worker_continue;
+
/* Original parameters to the program and the current environment. */
int argc;
char **argv;
@@ -481,7 +484,7 @@ call_dl_init (void *closure)
}
static void
-dl_open_worker (void *a)
+dl_open_worker_begin (void *a)
{
struct dl_open_args *args = a;
const char *file = args->file;
@@ -772,6 +775,36 @@ dl_open_worker (void *a)
DL_STATIC_INIT (new);
#endif
+ args->worker_continue = true;
+}
+
+static void
+dl_open_worker (void *a)
+{
+ struct dl_open_args *args = a;
+
+ args->worker_continue = false;
+
+ {
+ /* Protects global and module specific TLS state. */
+ __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
+
+ struct dl_exception ex;
+ int err = _dl_catch_exception (&ex, dl_open_worker_begin, args);
+
+ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
+
+ if (__glibc_unlikely (ex.errstring != NULL))
+ /* Reraise the error. */
+ _dl_signal_exception (err, &ex, NULL);
+ }
+
+ if (!args->worker_continue)
+ return;
+
+ int mode = args->mode;
+ struct link_map *new = args->map;
+
/* Run the initializer functions of new objects. Temporarily
disable the exception handler, so that lazy binding failures are
fatal. */
diff --git a/elf/dl-support.c b/elf/dl-support.c
index 34be8e5babfb6af3..3e5531138eaa18f8 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -212,6 +212,13 @@ __rtld_lock_define_initialized_recursive (, _dl_load_lock)
list of loaded objects while an object is added to or removed from
that list. */
__rtld_lock_define_initialized_recursive (, _dl_load_write_lock)
+ /* This lock protects global and module specific TLS related data.
+ E.g. it is held in dlopen and dlclose when GL(dl_tls_generation),
+ GL(dl_tls_max_dtv_idx) or GL(dl_tls_dtv_slotinfo_list) are
+ accessed and when TLS related relocations are processed for a
+ module. It was introduced to keep pthread_create accessing TLS
+ state that is being set up. */
+__rtld_lock_define_initialized_recursive (, _dl_load_tls_lock)
#ifdef HAVE_AUX_VECTOR
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 8c0f9e972d7a0eac..7865fc390c3f3f0a 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -527,7 +527,7 @@ _dl_allocate_tls_init (void *result)
size_t maxgen = 0;
/* Protects global dynamic TLS related state. */
- __rtld_lock_lock_recursive (GL(dl_load_lock));
+ __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
/* Check if the current dtv is big enough. */
if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
@@ -601,7 +601,7 @@ _dl_allocate_tls_init (void *result)
listp = listp->next;
assert (listp != NULL);
}
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
+ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
/* The DTV version is up-to-date now. */
dtv[0].counter = maxgen;
@@ -740,7 +740,7 @@ _dl_update_slotinfo (unsigned long int req_modid)
Here the dtv needs to be updated to new_gen generation count.
- This code may be called during TLS access when GL(dl_load_lock)
+ This code may be called during TLS access when GL(dl_load_tls_lock)
is not held. In that case the user code has to synchronize with
dlopen and dlclose calls of relevant modules. A module m is
relevant if the generation of m <= new_gen and dlclose of m is
@@ -862,11 +862,11 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
if (__glibc_unlikely (the_map->l_tls_offset
!= FORCED_DYNAMIC_TLS_OFFSET))
{
- __rtld_lock_lock_recursive (GL(dl_load_lock));
+ __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET))
{
the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
+ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
}
else if (__glibc_likely (the_map->l_tls_offset
!= FORCED_DYNAMIC_TLS_OFFSET))
@@ -878,7 +878,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
#else
# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
#endif
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
+ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
dtv[GET_ADDR_MODULE].pointer.val = p;
@@ -886,7 +886,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
return (char *) p + GET_ADDR_OFFSET;
}
else
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
+ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
}
struct dtv_pointer result = allocate_and_init (the_map);
dtv[GET_ADDR_MODULE].pointer = result;
@@ -957,7 +957,7 @@ _dl_tls_get_addr_soft (struct link_map *l)
return NULL;
dtv_t *dtv = THREAD_DTV ();
- /* This may be called without holding the GL(dl_load_lock). Reading
+ /* This may be called without holding the GL(dl_load_tls_lock). Reading
arbitrary gen value is fine since this is best effort code. */
size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
if (__glibc_unlikely (dtv[0].counter != gen))
diff --git a/elf/rtld.c b/elf/rtld.c
index 118c454a2329573f..9e09896da078274d 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -317,6 +317,7 @@ struct rtld_global _rtld_global =
#ifdef _LIBC_REENTRANT
._dl_load_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER,
._dl_load_write_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER,
+ ._dl_load_tls_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER,
#endif
._dl_nns = 1,
._dl_ns =
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 0138353ccb41c5f1..7b0a667629ddc06a 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -373,6 +373,13 @@ struct rtld_global
list of loaded objects while an object is added to or removed
from that list. */
__rtld_lock_define_recursive (EXTERN, _dl_load_write_lock)
+ /* This lock protects global and module specific TLS related data.
+ E.g. it is held in dlopen and dlclose when GL(dl_tls_generation),
+ GL(dl_tls_max_dtv_idx) or GL(dl_tls_dtv_slotinfo_list) are
+ accessed and when TLS related relocations are processed for a
+ module. It was introduced to keep pthread_create accessing TLS
+ state that is being set up. */
+ __rtld_lock_define_recursive (EXTERN, _dl_load_tls_lock)
/* Incremented whenever something may have been added to dl_loaded. */
EXTERN unsigned long long _dl_load_adds;
@@ -1192,7 +1199,7 @@ extern int _dl_scope_free (void *) attribute_hidden;
/* Add module to slot information data. If DO_ADD is false, only the
required memory is allocated. Must be called with GL
- (dl_load_lock) acquired. If the function has already been called
+ (dl_load_tls_lock) acquired. If the function has already been called
for the link map L with !do_add, then this function will not raise
an exception, otherwise it is possible that it encounters a memory
allocation failure. */
diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c
index 37db30f3d1e846b6..b4d20fa652f4ba3b 100644
--- a/sysdeps/nptl/fork.c
+++ b/sysdeps/nptl/fork.c
@@ -125,6 +125,9 @@ __libc_fork (void)
/* Reset the lock the dynamic loader uses to protect its data. */
__rtld_lock_initialize (GL(dl_load_lock));
+ /* Reset the lock protecting dynamic TLS related data. */
+ __rtld_lock_initialize (GL(dl_load_tls_lock));
+
/* Run the handlers registered for the child. */
__run_fork_handlers (atfork_run_child, multiple_threads);
}
diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
index ea4f8894891b2636..98a92f8d6bb119ba 100644
--- a/sysdeps/pthread/Makefile
+++ b/sysdeps/pthread/Makefile
@@ -25,3 +25,24 @@ $(objpfx)tst-timer: $(objpfx)librt.a $(static-thread-library)
endif
endif
+
+ifneq (,$(filter $(subdir),htl nptl))
+ifeq ($(build-shared),yes)
+tests += tst-create1
+endif
+
+tst-create1mod.so-no-z-defs = yes
+
+ifeq ($(build-shared),yes)
+# Build all the modules even when not actually running test programs.
+tests: $(test-modules)
+endif
+
+modules-names += tst-create1mod
+test-modules = $(addprefix $(objpfx),$(addsuffix .so,$(modules-names)))
+
+LDFLAGS-tst-create1 = -Wl,-export-dynamic
+$(objpfx)tst-create1: $(libdl) $(shared-thread-library)
+$(objpfx)tst-create1.out: $(objpfx)tst-create1mod.so
+
+endif
diff --git a/sysdeps/pthread/tst-create1.c b/sysdeps/pthread/tst-create1.c
new file mode 100644
index 0000000000000000..932586c30990d1d4
--- /dev/null
+++ b/sysdeps/pthread/tst-create1.c
@@ -0,0 +1,119 @@
+/* Verify that pthread_create does not deadlock when ctors take locks.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <support/xdlfcn.h>
+#include <support/xthread.h>
+
+/*
+Check if ctor and pthread_create deadlocks in
+
+thread 1: dlopen -> ctor -> lock(user_lock)
+thread 2: lock(user_lock) -> pthread_create
+
+or in
+
+thread 1: dlclose -> dtor -> lock(user_lock)
+thread 2: lock(user_lock) -> pthread_create
+*/
+
+static pthread_barrier_t bar_ctor;
+static pthread_barrier_t bar_dtor;
+static pthread_mutex_t user_lock = PTHREAD_MUTEX_INITIALIZER;
+
+void
+ctor (void)
+{
+ xpthread_barrier_wait (&bar_ctor);
+ dprintf (1, "thread 1: in ctor: started.\n");
+ xpthread_mutex_lock (&user_lock);
+ dprintf (1, "thread 1: in ctor: locked user_lock.\n");
+ xpthread_mutex_unlock (&user_lock);
+ dprintf (1, "thread 1: in ctor: unlocked user_lock.\n");
+ dprintf (1, "thread 1: in ctor: done.\n");
+}
+
+void
+dtor (void)
+{
+ xpthread_barrier_wait (&bar_dtor);
+ dprintf (1, "thread 1: in dtor: started.\n");
+ xpthread_mutex_lock (&user_lock);
+ dprintf (1, "thread 1: in dtor: locked user_lock.\n");
+ xpthread_mutex_unlock (&user_lock);
+ dprintf (1, "thread 1: in dtor: unlocked user_lock.\n");
+ dprintf (1, "thread 1: in dtor: done.\n");
+}
+
+static void *
+thread3 (void *a)
+{
+ dprintf (1, "thread 3: started.\n");
+ dprintf (1, "thread 3: done.\n");
+ return 0;
+}
+
+static void *
+thread2 (void *a)
+{
+ pthread_t t3;
+ dprintf (1, "thread 2: started.\n");
+
+ xpthread_mutex_lock (&user_lock);
+ dprintf (1, "thread 2: locked user_lock.\n");
+ xpthread_barrier_wait (&bar_ctor);
+ t3 = xpthread_create (0, thread3, 0);
+ xpthread_mutex_unlock (&user_lock);
+ dprintf (1, "thread 2: unlocked user_lock.\n");
+ xpthread_join (t3);
+
+ xpthread_mutex_lock (&user_lock);
+ dprintf (1, "thread 2: locked user_lock.\n");
+ xpthread_barrier_wait (&bar_dtor);
+ t3 = xpthread_create (0, thread3, 0);
+ xpthread_mutex_unlock (&user_lock);
+ dprintf (1, "thread 2: unlocked user_lock.\n");
+ xpthread_join (t3);
+
+ dprintf (1, "thread 2: done.\n");
+ return 0;
+}
+
+static void
+thread1 (void)
+{
+ dprintf (1, "thread 1: started.\n");
+ xpthread_barrier_init (&bar_ctor, NULL, 2);
+ xpthread_barrier_init (&bar_dtor, NULL, 2);
+ pthread_t t2 = xpthread_create (0, thread2, 0);
+ void *p = xdlopen ("tst-create1mod.so", RTLD_NOW | RTLD_GLOBAL);
+ dprintf (1, "thread 1: dlopen done.\n");
+ xdlclose (p);
+ dprintf (1, "thread 1: dlclose done.\n");
+ xpthread_join (t2);
+ dprintf (1, "thread 1: done.\n");
+}
+
+static int
+do_test (void)
+{
+ thread1 ();
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/pthread/tst-create1mod.c b/sysdeps/pthread/tst-create1mod.c
new file mode 100644
index 0000000000000000..62c9006961683177
--- /dev/null
+++ b/sysdeps/pthread/tst-create1mod.c
@@ -0,0 +1,41 @@
+/* Verify that pthread_create does not deadlock when ctors take locks.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+
+/* Require TLS setup for the module. */
+__thread int tlsvar;
+
+void ctor (void);
+void dtor (void);
+
+static void __attribute__ ((constructor))
+do_init (void)
+{
+ dprintf (1, "constructor started: %d.\n", tlsvar++);
+ ctor ();
+ dprintf (1, "constructor done: %d.\n", tlsvar++);
+}
+
+static void __attribute__ ((destructor))
+do_end (void)
+{
+ dprintf (1, "destructor started: %d.\n", tlsvar++);
+ dtor ();
+ dprintf (1, "destructor done: %d.\n", tlsvar++);
+}

View File

@ -0,0 +1,28 @@
commit d2b997c7172e9a00895a9deb379f8782fbd2e36f
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Wed Dec 30 23:40:14 2020 +0000
elf: Fix a DTV setup issue [BZ #27136]
The max modid is a valid index in the dtv, it should not be skipped.
The bug is observable if the last module has modid == 64 and its
generation is same or less than the max generation of the previous
modules. Then dtv[0].counter implies dtv[64] is initialized but
it isn't. Fixes bug 27136.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index cccf74b33481b866..0b96b1dceed99d58 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -590,7 +590,7 @@ _dl_allocate_tls_init (void *result)
}
total += cnt;
- if (total >= GL(dl_tls_max_dtv_idx))
+ if (total > GL(dl_tls_max_dtv_idx))
break;
listp = listp->next;

View File

@ -0,0 +1,20 @@
commit 3c7c5117826816021f9d3f352f49e0dd0236cbad
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Nov 30 14:35:54 2021 +0100
elf: Include <stdint.h> in tst-tls20.c
The test uses standard integer types.
diff --git a/elf/tst-tls20.c b/elf/tst-tls20.c
index 831c3336c914790d..18067e6b0a6093f9 100644
--- a/elf/tst-tls20.c
+++ b/elf/tst-tls20.c
@@ -19,6 +19,7 @@
#include <array_length.h>
#include <dlfcn.h>
#include <pthread.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <support/check.h>

View File

@ -0,0 +1,20 @@
commit df4cb2280e32187380520f71bd27ab32252cbc85
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Nov 30 15:39:17 2021 +0100
elf: Include <stdbool.h> in tst-tls20.c
The test uses the bool type.
diff --git a/elf/tst-tls20.c b/elf/tst-tls20.c
index 18067e6b0a6093f9..200dacb748af21a8 100644
--- a/elf/tst-tls20.c
+++ b/elf/tst-tls20.c
@@ -19,6 +19,7 @@
#include <array_length.h>
#include <dlfcn.h>
#include <pthread.h>
+#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

View File

@ -0,0 +1,62 @@
commit 5cc338565479a620244c2f8ff35956629c4dbf81
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Dec 10 05:14:24 2021 +0100
nptl: Add one more barrier to nptl/tst-create1
Without the bar_ctor_finish barrier, it was possible that thread2
re-locked user_lock before ctor had a chance to lock it. ctor then
blocked in its locking operation, xdlopen from the main thread
did not return, and thread2 was stuck waiting in bar_dtor:
thread 1: started.
thread 2: started.
thread 2: locked user_lock.
constructor started: 0.
thread 1: in ctor: started.
thread 3: started.
thread 3: done.
thread 2: unlocked user_lock.
thread 2: locked user_lock.
Fixes the test in commit 83b5323261bb72313bffcf37476c1b8f0847c736
("elf: Avoid deadlock between pthread_create and ctors [BZ #28357]").
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
diff --git a/sysdeps/pthread/tst-create1.c b/sysdeps/pthread/tst-create1.c
index 932586c30990d1d4..763ded8d7956f943 100644
--- a/sysdeps/pthread/tst-create1.c
+++ b/sysdeps/pthread/tst-create1.c
@@ -33,6 +33,7 @@ thread 2: lock(user_lock) -> pthread_create
*/
static pthread_barrier_t bar_ctor;
+static pthread_barrier_t bar_ctor_finish;
static pthread_barrier_t bar_dtor;
static pthread_mutex_t user_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -46,6 +47,7 @@ ctor (void)
xpthread_mutex_unlock (&user_lock);
dprintf (1, "thread 1: in ctor: unlocked user_lock.\n");
dprintf (1, "thread 1: in ctor: done.\n");
+ xpthread_barrier_wait (&bar_ctor_finish);
}
void
@@ -81,6 +83,7 @@ thread2 (void *a)
xpthread_mutex_unlock (&user_lock);
dprintf (1, "thread 2: unlocked user_lock.\n");
xpthread_join (t3);
+ xpthread_barrier_wait (&bar_ctor_finish);
xpthread_mutex_lock (&user_lock);
dprintf (1, "thread 2: locked user_lock.\n");
@@ -99,6 +102,7 @@ thread1 (void)
{
dprintf (1, "thread 1: started.\n");
xpthread_barrier_init (&bar_ctor, NULL, 2);
+ xpthread_barrier_init (&bar_ctor_finish, NULL, 2);
xpthread_barrier_init (&bar_dtor, NULL, 2);
pthread_t t2 = xpthread_create (0, thread2, 0);
void *p = xdlopen ("tst-create1mod.so", RTLD_NOW | RTLD_GLOBAL);

View File

@ -0,0 +1,163 @@
commit 8f85075a2e9c26ff7486d4bbaf358999807d215c
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Thu Dec 31 12:24:38 2020 +0000
elf: Add a DTV setup test [BZ #27136]
The test dlopens a large number of modules with TLS, they are reused
from an existing test.
The test relies on the reuse of slotinfo entries after dlclose, without
bug 27135 fixed this needs a failing dlopen. With a slotinfo list that
has non-monotone increasing generation counters, bug 27136 can trigger.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Conflicts:
elf/Makefile
(usual test differences)
diff --git a/elf/Makefile b/elf/Makefile
index 82fb019a634caf81..0995d810b57d0dda 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -209,7 +209,8 @@ tests += restest1 preloadtest loadfail multiload origtest resolvfail \
tst-audit14 tst-audit15 tst-audit16 \
tst-tls-ie tst-tls-ie-dlmopen \
argv0test \
- tst-glibc-hwcaps tst-glibc-hwcaps-prepend tst-glibc-hwcaps-mask
+ tst-glibc-hwcaps tst-glibc-hwcaps-prepend tst-glibc-hwcaps-mask \
+ tst-tls20
# reldep9
tests-internal += loadtest unload unload2 circleload1 \
neededtest neededtest2 neededtest3 neededtest4 \
@@ -332,6 +333,7 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \
libmarkermod2-1 libmarkermod2-2 \
libmarkermod3-1 libmarkermod3-2 libmarkermod3-3 \
libmarkermod4-1 libmarkermod4-2 libmarkermod4-3 libmarkermod4-4 \
+ tst-tls20mod-bad
# Most modules build with _ISOMAC defined, but those filtered out
# depend on internal headers.
@@ -1828,3 +1830,9 @@ $(objpfx)tst-rtld-help.out: $(objpfx)ld.so
fi; \
(exit $$status); \
$(evaluate-test)
+
+# Reuses tst-tls-many-dynamic-modules
+tst-tls20mod-bad.so-no-z-defs = yes
+$(objpfx)tst-tls20: $(libdl) $(shared-thread-library)
+$(objpfx)tst-tls20.out: $(objpfx)tst-tls20mod-bad.so \
+ $(tst-tls-many-dynamic-modules:%=$(objpfx)%.so)
diff --git a/elf/tst-tls20.c b/elf/tst-tls20.c
new file mode 100644
index 0000000000000000..ac5f8c8d39b66dd6
--- /dev/null
+++ b/elf/tst-tls20.c
@@ -0,0 +1,98 @@
+/* Test dtv setup if entries don't have monotone increasing generation.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <dlfcn.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <support/check.h>
+#include <support/xdlfcn.h>
+#include <support/xthread.h>
+
+#define NMOD 100
+static void *mod[NMOD];
+
+static void
+load_fail (void)
+{
+ /* Expected to fail because of a missing symbol. */
+ void *m = dlopen ("tst-tls20mod-bad.so", RTLD_NOW);
+ if (m != NULL)
+ FAIL_EXIT1 ("dlopen of tst-tls20mod-bad.so succeeded\n");
+}
+
+static void
+load_mod (int i)
+{
+ char *buf = xasprintf ("tst-tls-manydynamic%02dmod.so", i);
+ mod[i] = xdlopen (buf, RTLD_LAZY);
+ free (buf);
+}
+
+static void
+unload_mod (int i)
+{
+ if (mod[i] != NULL)
+ xdlclose (mod[i]);
+ mod[i] = NULL;
+}
+
+static void
+access (int i)
+{
+ char *buf = xasprintf ("tls_global_%02d", i);
+ dlerror ();
+ int *p = dlsym (mod[i], buf);
+ printf ("mod[%d]: &tls = %p\n", i, p);
+ if (p == NULL)
+ FAIL_EXIT1 ("dlsym failed: %s\n", dlerror ());
+ ++*p;
+ free (buf);
+}
+
+static void *
+start (void *a)
+{
+ for (int i = 0; i < NMOD; i++)
+ if (mod[i] != NULL)
+ access (i);
+ return 0;
+}
+
+static int
+do_test (void)
+{
+ int i;
+
+ for (i = 0; i < NMOD; i++)
+ {
+ load_mod (i);
+ /* Bump the generation of mod[0] without using new dtv slot. */
+ unload_mod (0);
+ load_fail (); /* Ensure GL(dl_tls_dtv_gaps) is true: see bug 27135. */
+ load_mod (0);
+ /* Access TLS in all loaded modules. */
+ pthread_t t = xpthread_create (0, start, 0);
+ xpthread_join (t);
+ }
+ for (i = 0; i < NMOD; i++)
+ unload_mod (i);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/elf/tst-tls20mod-bad.c b/elf/tst-tls20mod-bad.c
new file mode 100644
index 0000000000000000..c1aed8ea7deffd22
--- /dev/null
+++ b/elf/tst-tls20mod-bad.c
@@ -0,0 +1,2 @@
+void missing_symbol (void);
+void f (void) {missing_symbol ();}

View File

@ -0,0 +1,41 @@
commit c489c35054c39d7f2437ca61b369e3ede448f022
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Wed Nov 30 11:44:25 2016 +0000
elf: Fix comments and logic in _dl_add_to_slotinfo
Since
commit a509eb117fac1d764b15eba64993f4bdb63d7f3c
Avoid late dlopen failure due to scope, TLS slotinfo updates [BZ #25112]
the generation counter update is not needed in the failure path.
That commit ensures allocation in _dl_add_to_slotinfo happens before
the demarcation point in dlopen (it is called twice, first time is for
allocation only where dlopen can still be reverted on failure, then
second time actual dtv updates are done which then cannot fail).
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 0b96b1dceed99d58..9375650a3ab5247d 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -998,16 +998,7 @@ _dl_add_to_slotinfo (struct link_map *l, bool do_add)
+ TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
if (listp == NULL)
{
- /* We ran out of memory. We will simply fail this
- call but don't undo anything we did so far. The
- application will crash or be terminated anyway very
- soon. */
-
- /* We have to do this since some entries in the dtv
- slotinfo array might already point to this
- generation. */
- ++GL(dl_tls_generation);
-
+ /* We ran out of memory while resizing the dtv slotinfo list. */
_dl_signal_error (ENOMEM, "dlopen", NULL, N_("\
cannot create TLS data structures"));
}

View File

@ -0,0 +1,58 @@
commit c0669ae1a629e16b536bf11cdd0865e0dbcf4bee
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Wed Dec 30 21:52:38 2020 +0000
elf: Refactor _dl_update_slotinfo to avoid use after free
map is not valid to access here because it can be freed by a concurrent
dlclose: during tls access (via __tls_get_addr) _dl_update_slotinfo is
called without holding dlopen locks. So don't check the modid of map.
The map == 0 and map != 0 code paths can be shared (avoiding the dtv
resize in case of map == 0 is just an optimization: larger dtv than
necessary would be fine too).
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 9375650a3ab5247d..15ed01d795a8627a 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -743,6 +743,8 @@ _dl_update_slotinfo (unsigned long int req_modid)
{
for (size_t cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
{
+ size_t modid = total + cnt;
+
size_t gen = listp->slotinfo[cnt].gen;
if (gen > new_gen)
@@ -758,25 +760,12 @@ _dl_update_slotinfo (unsigned long int req_modid)
/* If there is no map this means the entry is empty. */
struct link_map *map = listp->slotinfo[cnt].map;
- if (map == NULL)
- {
- if (dtv[-1].counter >= total + cnt)
- {
- /* If this modid was used at some point the memory
- might still be allocated. */
- free (dtv[total + cnt].pointer.to_free);
- dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED;
- dtv[total + cnt].pointer.to_free = NULL;
- }
-
- continue;
- }
-
/* Check whether the current dtv array is large enough. */
- size_t modid = map->l_tls_modid;
- assert (total + cnt == modid);
if (dtv[-1].counter < modid)
{
+ if (map == NULL)
+ continue;
+
/* Resize the dtv. */
dtv = _dl_resize_dtv (dtv);

View File

@ -0,0 +1,48 @@
commit 8f7e09f4dbdb5c815a18b8285fbc5d5d7bc17d86
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Thu Feb 11 11:29:23 2021 +0000
x86_64: Avoid lazy relocation of tlsdesc [BZ #27137]
Lazy tlsdesc relocation is racy because the static tls optimization and
tlsdesc management operations are done without holding the dlopen lock.
This similar to the commit b7cf203b5c17dd6d9878537d41e0c7cc3d270a67
for aarch64, but it fixes a different race: bug 27137.
Another issue is that ld auditing ignores DT_BIND_NOW and thus tries to
relocate tlsdesc lazily, but that does not work in a BIND_NOW module
due to missing DT_TLSDESC_PLT. Unconditionally relocating tlsdesc at
load time fixes this bug 27721 too.
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index e308b662d245cc63..ef5740ba281c7282 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -563,12 +563,21 @@ elf_machine_lazy_rel (struct link_map *map,
}
else if (__glibc_likely (r_type == R_X86_64_TLSDESC))
{
- struct tlsdesc volatile * __attribute__((__unused__)) td =
- (struct tlsdesc volatile *)reloc_addr;
+ const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+ const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+ const ElfW (Sym) *sym = &symtab[symndx];
+ const struct r_found_version *version = NULL;
- td->arg = (void*)reloc;
- td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
- + map->l_addr);
+ if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+ {
+ const ElfW (Half) *vernum =
+ (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+ version = &map->l_versions[vernum[symndx] & 0x7fff];
+ }
+
+ /* Always initialize TLS descriptors completely at load time, in
+ case static TLS is allocated for it that requires locking. */
+ elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc);
}
else if (__glibc_unlikely (r_type == R_X86_64_IRELATIVE))
{

View File

@ -0,0 +1,116 @@
commit ddcacd91cc10ff92d6201eda87047d029c14158d
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Thu Feb 11 11:40:11 2021 +0000
i386: Avoid lazy relocation of tlsdesc [BZ #27137]
Lazy tlsdesc relocation is racy because the static tls optimization and
tlsdesc management operations are done without holding the dlopen lock.
This similar to the commit b7cf203b5c17dd6d9878537d41e0c7cc3d270a67
for aarch64, but it fixes a different race: bug 27137.
On i386 the code is a bit more complicated than on x86_64 because both
rel and rela relocs are supported.
diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index e5776ef7bc8ad749..3a30671591284d79 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -679,50 +679,32 @@ elf_machine_lazy_rel (struct link_map *map,
}
else if (__glibc_likely (r_type == R_386_TLS_DESC))
{
- struct tlsdesc volatile * __attribute__((__unused__)) td =
- (struct tlsdesc volatile *)reloc_addr;
-
- /* Handle relocations that reference the local *ABS* in a simple
- way, so as to preserve a potential addend. */
- if (ELF32_R_SYM (reloc->r_info) == 0)
- td->entry = _dl_tlsdesc_resolve_abs_plus_addend;
- /* Given a known-zero addend, we can store a pointer to the
- reloc in the arg position. */
- else if (td->arg == 0)
- {
- td->arg = (void*)reloc;
- td->entry = _dl_tlsdesc_resolve_rel;
- }
- else
- {
- /* We could handle non-*ABS* relocations with non-zero addends
- by allocating dynamically an arg to hold a pointer to the
- reloc, but that sounds pointless. */
- const Elf32_Rel *const r = reloc;
- /* The code below was borrowed from elf_dynamic_do_rel(). */
- const ElfW(Sym) *const symtab =
- (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+ const Elf32_Rel *const r = reloc;
+ /* The code below was borrowed from elf_dynamic_do_rel(). */
+ const ElfW(Sym) *const symtab =
+ (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+ /* Always initialize TLS descriptors completely at load time, in
+ case static TLS is allocated for it that requires locking. */
# ifdef RTLD_BOOTSTRAP
- /* The dynamic linker always uses versioning. */
- assert (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL);
+ /* The dynamic linker always uses versioning. */
+ assert (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL);
# else
- if (map->l_info[VERSYMIDX (DT_VERSYM)])
+ if (map->l_info[VERSYMIDX (DT_VERSYM)])
# endif
- {
- const ElfW(Half) *const version =
- (const void *) D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
- ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff;
- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)],
- &map->l_versions[ndx],
- (void *) (l_addr + r->r_offset), skip_ifunc);
- }
+ {
+ const ElfW(Half) *const version =
+ (const void *) D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+ ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff;
+ elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)],
+ &map->l_versions[ndx],
+ (void *) (l_addr + r->r_offset), skip_ifunc);
+ }
# ifndef RTLD_BOOTSTRAP
- else
- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL,
- (void *) (l_addr + r->r_offset), skip_ifunc);
+ else
+ elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL,
+ (void *) (l_addr + r->r_offset), skip_ifunc);
# endif
- }
}
else if (__glibc_unlikely (r_type == R_386_IRELATIVE))
{
@@ -749,11 +731,21 @@ elf_machine_lazy_rela (struct link_map *map,
;
else if (__glibc_likely (r_type == R_386_TLS_DESC))
{
- struct tlsdesc volatile * __attribute__((__unused__)) td =
- (struct tlsdesc volatile *)reloc_addr;
+ const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+ const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+ const ElfW (Sym) *sym = &symtab[symndx];
+ const struct r_found_version *version = NULL;
+
+ if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+ {
+ const ElfW (Half) *vernum =
+ (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+ version = &map->l_versions[vernum[symndx] & 0x7fff];
+ }
- td->arg = (void*)reloc;
- td->entry = _dl_tlsdesc_resolve_rela;
+ /* Always initialize TLS descriptors completely at load time, in
+ case static TLS is allocated for it that requires locking. */
+ elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc);
}
else if (__glibc_unlikely (r_type == R_386_IRELATIVE))
{

View File

@ -0,0 +1,277 @@
commit 55c9f3238080e9aba733bc0902779c46cfa16446
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Thu Feb 11 11:52:24 2021 +0000
x86_64: Remove lazy tlsdesc relocation related code
_dl_tlsdesc_resolve_rela and _dl_tlsdesc_resolve_hold are only used for
lazy tlsdesc relocation processing which is no longer supported.
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index ef5740ba281c7282..b94d3b39ec1dca64 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -127,10 +127,6 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
}
}
- if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy)
- *(ElfW(Addr)*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_GOT)]) + l->l_addr)
- = (ElfW(Addr)) &_dl_tlsdesc_resolve_rela;
-
return lazy;
}
diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
index 80d771cd887dd626..77e78cf0a6d8babc 100644
--- a/sysdeps/x86_64/dl-tlsdesc.S
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -148,107 +148,3 @@ _dl_tlsdesc_dynamic:
cfi_endproc
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
#endif /* SHARED */
-
- /* This function is a wrapper for a lazy resolver for TLS_DESC
- RELA relocations. The incoming 0(%rsp) points to the caller's
- link map, pushed by the dynamic object's internal lazy TLS
- resolver front-end before tail-calling us. We need to pop it
- ourselves. %rax points to a TLS descriptor, such that 0(%rax)
- holds the address of the internal resolver front-end (unless
- some other thread beat us to resolving it) and 8(%rax) holds a
- pointer to the relocation.
-
- When the actual resolver returns, it will have adjusted the
- TLS descriptor such that we can tail-call it for it to return
- the TP offset of the symbol. */
-
- .hidden _dl_tlsdesc_resolve_rela
- .global _dl_tlsdesc_resolve_rela
- .type _dl_tlsdesc_resolve_rela,@function
- cfi_startproc
- .align 16
- /* The PLT entry will have pushed the link_map pointer. */
-_dl_tlsdesc_resolve_rela:
- _CET_ENDBR
- cfi_adjust_cfa_offset (8)
- /* Save all call-clobbered registers. Add 8 bytes for push in
- the PLT entry to align the stack. */
- subq $80, %rsp
- cfi_adjust_cfa_offset (80)
- movq %rax, (%rsp)
- movq %rdi, 8(%rsp)
- movq %rax, %rdi /* Pass tlsdesc* in %rdi. */
- movq %rsi, 16(%rsp)
- movq 80(%rsp), %rsi /* Pass link_map* in %rsi. */
- movq %r8, 24(%rsp)
- movq %r9, 32(%rsp)
- movq %r10, 40(%rsp)
- movq %r11, 48(%rsp)
- movq %rdx, 56(%rsp)
- movq %rcx, 64(%rsp)
- call _dl_tlsdesc_resolve_rela_fixup
- movq (%rsp), %rax
- movq 8(%rsp), %rdi
- movq 16(%rsp), %rsi
- movq 24(%rsp), %r8
- movq 32(%rsp), %r9
- movq 40(%rsp), %r10
- movq 48(%rsp), %r11
- movq 56(%rsp), %rdx
- movq 64(%rsp), %rcx
- addq $88, %rsp
- cfi_adjust_cfa_offset (-88)
- jmp *(%rax)
- cfi_endproc
- .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
-
- /* This function is a placeholder for lazy resolving of TLS
- relocations. Once some thread starts resolving a TLS
- relocation, it sets up the TLS descriptor to use this
- resolver, such that other threads that would attempt to
- resolve it concurrently may skip the call to the original lazy
- resolver and go straight to a condition wait.
-
- When the actual resolver returns, it will have adjusted the
- TLS descriptor such that we can tail-call it for it to return
- the TP offset of the symbol. */
-
- .hidden _dl_tlsdesc_resolve_hold
- .global _dl_tlsdesc_resolve_hold
- .type _dl_tlsdesc_resolve_hold,@function
- cfi_startproc
- .align 16
-_dl_tlsdesc_resolve_hold:
-0:
- _CET_ENDBR
- /* Save all call-clobbered registers. */
- subq $72, %rsp
- cfi_adjust_cfa_offset (72)
- movq %rax, (%rsp)
- movq %rdi, 8(%rsp)
- movq %rax, %rdi /* Pass tlsdesc* in %rdi. */
- movq %rsi, 16(%rsp)
- /* Pass _dl_tlsdesc_resolve_hold's address in %rsi. */
- leaq . - _dl_tlsdesc_resolve_hold(%rip), %rsi
- movq %r8, 24(%rsp)
- movq %r9, 32(%rsp)
- movq %r10, 40(%rsp)
- movq %r11, 48(%rsp)
- movq %rdx, 56(%rsp)
- movq %rcx, 64(%rsp)
- call _dl_tlsdesc_resolve_hold_fixup
-1:
- movq (%rsp), %rax
- movq 8(%rsp), %rdi
- movq 16(%rsp), %rsi
- movq 24(%rsp), %r8
- movq 32(%rsp), %r9
- movq 40(%rsp), %r10
- movq 48(%rsp), %r11
- movq 56(%rsp), %rdx
- movq 64(%rsp), %rcx
- addq $72, %rsp
- cfi_adjust_cfa_offset (-72)
- jmp *(%rax)
- cfi_endproc
- .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
diff --git a/sysdeps/x86_64/dl-tlsdesc.h b/sysdeps/x86_64/dl-tlsdesc.h
index 66e659bb5c7ede74..1cde1ee9664f4908 100644
--- a/sysdeps/x86_64/dl-tlsdesc.h
+++ b/sysdeps/x86_64/dl-tlsdesc.h
@@ -55,9 +55,7 @@ struct tlsdesc_dynamic_arg
extern ptrdiff_t attribute_hidden
_dl_tlsdesc_return(struct tlsdesc *on_rax),
- _dl_tlsdesc_undefweak(struct tlsdesc *on_rax),
- _dl_tlsdesc_resolve_rela(struct tlsdesc *on_rax),
- _dl_tlsdesc_resolve_hold(struct tlsdesc *on_rax);
+ _dl_tlsdesc_undefweak(struct tlsdesc *on_rax);
# ifdef SHARED
extern void *_dl_make_tlsdesc_dynamic (struct link_map *map,
diff --git a/sysdeps/x86_64/tlsdesc.c b/sysdeps/x86_64/tlsdesc.c
index 302d097dbb0c4f1e..61a19ae26944c84f 100644
--- a/sysdeps/x86_64/tlsdesc.c
+++ b/sysdeps/x86_64/tlsdesc.c
@@ -16,120 +16,13 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <link.h>
#include <ldsodefs.h>
-#include <elf/dynamic-link.h>
#include <tls.h>
#include <dl-tlsdesc.h>
#include <dl-unmap-segments.h>
+#define _dl_tlsdesc_resolve_hold 0
#include <tlsdeschtab.h>
-/* The following 2 functions take a caller argument, that contains the
- address expected to be in the TLS descriptor. If it's changed, we
- want to return immediately. */
-
-/* This function is used to lazily resolve TLS_DESC RELA relocations.
- The argument location is used to hold a pointer to the relocation. */
-
-void
-attribute_hidden
-_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
- struct link_map *l)
-{
- const ElfW(Rela) *reloc = td->arg;
-
- if (_dl_tlsdesc_resolve_early_return_p
- (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
- return;
-
- /* The code below was borrowed from _dl_fixup(). */
- const ElfW(Sym) *const symtab
- = (const void *) D_PTR (l, l_info[DT_SYMTAB]);
- const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
- const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
- lookup_t result;
-
- /* Look up the target symbol. If the normal lookup rules are not
- used don't look in the global scope. */
- if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
- && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
- {
- const struct r_found_version *version = NULL;
-
- if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
- {
- const ElfW(Half) *vernum =
- (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
- ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff;
- version = &l->l_versions[ndx];
- if (version->hash == 0)
- version = NULL;
- }
-
- result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
- l->l_scope, version, ELF_RTYPE_CLASS_PLT,
- DL_LOOKUP_ADD_DEPENDENCY, NULL);
- }
- else
- {
- /* We already found the symbol. The module (and therefore its load
- address) is also known. */
- result = l;
- }
-
- if (! sym)
- {
- td->arg = (void*)reloc->r_addend;
- td->entry = _dl_tlsdesc_undefweak;
- }
- else
- {
-# ifndef SHARED
- CHECK_STATIC_TLS (l, result);
-# else
- if (!TRY_STATIC_TLS (l, result))
- {
- td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value
- + reloc->r_addend);
- td->entry = _dl_tlsdesc_dynamic;
- }
- else
-# endif
- {
- td->arg = (void*)(sym->st_value - result->l_tls_offset
- + reloc->r_addend);
- td->entry = _dl_tlsdesc_return;
- }
- }
-
- _dl_tlsdesc_wake_up_held_fixups ();
-}
-
-/* This function is used to avoid busy waiting for other threads to
- complete the lazy relocation. Once another thread wins the race to
- relocate a TLS descriptor, it sets the descriptor up such that this
- function is called to wait until the resolver releases the
- lock. */
-
-void
-attribute_hidden
-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td,
- void *caller)
-{
- /* Maybe we're lucky and can return early. */
- if (caller != td->entry)
- return;
-
- /* Locking here will stop execution until the running resolver runs
- _dl_tlsdesc_wake_up_held_fixups(), releasing the lock.
-
- FIXME: We'd be better off waiting on a condition variable, such
- that we didn't have to hold the lock throughout the relocation
- processing. */
- __rtld_lock_lock_recursive (GL(dl_load_lock));
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
-}
-
/* Unmap the dynamic object, but also release its TLS descriptor table
if there is one. */

View File

@ -0,0 +1,443 @@
commit a75a02a696f9f869d77b17b99964823aa8833a8b
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Thu Feb 11 11:58:20 2021 +0000
i386: Remove lazy tlsdesc relocation related code
Like in commit e75711ebfa976d5468ec292282566a18b07e4d67 for x86_64,
remove unused lazy tlsdesc relocation processing code:
_dl_tlsdesc_resolve_abs_plus_addend
_dl_tlsdesc_resolve_rel
_dl_tlsdesc_resolve_rela
_dl_tlsdesc_resolve_hold
diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S
index 128f0af3188f46bb..22ecb2c6adc6cc6e 100644
--- a/sysdeps/i386/dl-tlsdesc.S
+++ b/sysdeps/i386/dl-tlsdesc.S
@@ -138,159 +138,3 @@ _dl_tlsdesc_dynamic:
cfi_endproc
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
#endif /* SHARED */
-
- /* This function is a wrapper for a lazy resolver for TLS_DESC
- REL relocations that reference the *ABS* segment in their own
- link maps. %ebx points to the caller's GOT. %eax points to a
- TLS descriptor, such that 0(%eax) holds the address of the
- resolver wrapper itself (unless some other thread beat us to
- it) and 4(%eax) holds the addend in the relocation.
-
- When the actual resolver returns, it will have adjusted the
- TLS descriptor such that we can tail-call it for it to return
- the TP offset of the symbol. */
-
- .hidden _dl_tlsdesc_resolve_abs_plus_addend
- .global _dl_tlsdesc_resolve_abs_plus_addend
- .type _dl_tlsdesc_resolve_abs_plus_addend,@function
- cfi_startproc
- .align 16
-_dl_tlsdesc_resolve_abs_plus_addend:
-0:
- _CET_ENDBR
- pushl %eax
- cfi_adjust_cfa_offset (4)
- pushl %ecx
- cfi_adjust_cfa_offset (4)
- pushl %edx
- cfi_adjust_cfa_offset (4)
- movl $1f - 0b, %ecx
- movl 4(%ebx), %edx
- call _dl_tlsdesc_resolve_abs_plus_addend_fixup
-1:
- popl %edx
- cfi_adjust_cfa_offset (-4)
- popl %ecx
- cfi_adjust_cfa_offset (-4)
- popl %eax
- cfi_adjust_cfa_offset (-4)
- jmp *(%eax)
- cfi_endproc
- .size _dl_tlsdesc_resolve_abs_plus_addend, .-_dl_tlsdesc_resolve_abs_plus_addend
-
- /* This function is a wrapper for a lazy resolver for TLS_DESC
- REL relocations that had zero addends. %ebx points to the
- caller's GOT. %eax points to a TLS descriptor, such that
- 0(%eax) holds the address of the resolver wrapper itself
- (unless some other thread beat us to it) and 4(%eax) holds a
- pointer to the relocation.
-
- When the actual resolver returns, it will have adjusted the
- TLS descriptor such that we can tail-call it for it to return
- the TP offset of the symbol. */
-
- .hidden _dl_tlsdesc_resolve_rel
- .global _dl_tlsdesc_resolve_rel
- .type _dl_tlsdesc_resolve_rel,@function
- cfi_startproc
- .align 16
-_dl_tlsdesc_resolve_rel:
-0:
- _CET_ENDBR
- pushl %eax
- cfi_adjust_cfa_offset (4)
- pushl %ecx
- cfi_adjust_cfa_offset (4)
- pushl %edx
- cfi_adjust_cfa_offset (4)
- movl $1f - 0b, %ecx
- movl 4(%ebx), %edx
- call _dl_tlsdesc_resolve_rel_fixup
-1:
- popl %edx
- cfi_adjust_cfa_offset (-4)
- popl %ecx
- cfi_adjust_cfa_offset (-4)
- popl %eax
- cfi_adjust_cfa_offset (-4)
- jmp *(%eax)
- cfi_endproc
- .size _dl_tlsdesc_resolve_rel, .-_dl_tlsdesc_resolve_rel
-
- /* This function is a wrapper for a lazy resolver for TLS_DESC
- RELA relocations. %ebx points to the caller's GOT. %eax
- points to a TLS descriptor, such that 0(%eax) holds the
- address of the resolver wrapper itself (unless some other
- thread beat us to it) and 4(%eax) holds a pointer to the
- relocation.
-
- When the actual resolver returns, it will have adjusted the
- TLS descriptor such that we can tail-call it for it to return
- the TP offset of the symbol. */
-
- .hidden _dl_tlsdesc_resolve_rela
- .global _dl_tlsdesc_resolve_rela
- .type _dl_tlsdesc_resolve_rela,@function
- cfi_startproc
- .align 16
-_dl_tlsdesc_resolve_rela:
-0:
- _CET_ENDBR
- pushl %eax
- cfi_adjust_cfa_offset (4)
- pushl %ecx
- cfi_adjust_cfa_offset (4)
- pushl %edx
- cfi_adjust_cfa_offset (4)
- movl $1f - 0b, %ecx
- movl 4(%ebx), %edx
- call _dl_tlsdesc_resolve_rela_fixup
-1:
- popl %edx
- cfi_adjust_cfa_offset (-4)
- popl %ecx
- cfi_adjust_cfa_offset (-4)
- popl %eax
- cfi_adjust_cfa_offset (-4)
- jmp *(%eax)
- cfi_endproc
- .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
-
- /* This function is a placeholder for lazy resolving of TLS
- relocations. Once some thread starts resolving a TLS
- relocation, it sets up the TLS descriptor to use this
- resolver, such that other threads that would attempt to
- resolve it concurrently may skip the call to the original lazy
- resolver and go straight to a condition wait.
-
- When the actual resolver returns, it will have adjusted the
- TLS descriptor such that we can tail-call it for it to return
- the TP offset of the symbol. */
-
- .hidden _dl_tlsdesc_resolve_hold
- .global _dl_tlsdesc_resolve_hold
- .type _dl_tlsdesc_resolve_hold,@function
- cfi_startproc
- .align 16
-_dl_tlsdesc_resolve_hold:
-0:
- _CET_ENDBR
- pushl %eax
- cfi_adjust_cfa_offset (4)
- pushl %ecx
- cfi_adjust_cfa_offset (4)
- pushl %edx
- cfi_adjust_cfa_offset (4)
- movl $1f - 0b, %ecx
- movl 4(%ebx), %edx
- call _dl_tlsdesc_resolve_hold_fixup
-1:
- popl %edx
- cfi_adjust_cfa_offset (-4)
- popl %ecx
- cfi_adjust_cfa_offset (-4)
- popl %eax
- cfi_adjust_cfa_offset (-4)
- jmp *(%eax)
- cfi_endproc
- .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
diff --git a/sysdeps/i386/dl-tlsdesc.h b/sysdeps/i386/dl-tlsdesc.h
index c8a1e056150dc418..1a1a22c303baf85b 100644
--- a/sysdeps/i386/dl-tlsdesc.h
+++ b/sysdeps/i386/dl-tlsdesc.h
@@ -43,11 +43,7 @@ struct tlsdesc_dynamic_arg
extern ptrdiff_t attribute_hidden __attribute__ ((regparm (1)))
_dl_tlsdesc_return (struct tlsdesc *),
- _dl_tlsdesc_undefweak (struct tlsdesc *),
- _dl_tlsdesc_resolve_abs_plus_addend (struct tlsdesc *),
- _dl_tlsdesc_resolve_rel (struct tlsdesc *),
- _dl_tlsdesc_resolve_rela (struct tlsdesc *),
- _dl_tlsdesc_resolve_hold (struct tlsdesc *);
+ _dl_tlsdesc_undefweak (struct tlsdesc *);
# ifdef SHARED
extern void *_dl_make_tlsdesc_dynamic (struct link_map *map,
diff --git a/sysdeps/i386/tlsdesc.c b/sysdeps/i386/tlsdesc.c
index 82fa8a1d35fd1912..1b4227c8381e1b3d 100644
--- a/sysdeps/i386/tlsdesc.c
+++ b/sysdeps/i386/tlsdesc.c
@@ -16,242 +16,13 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <link.h>
#include <ldsodefs.h>
-#include <elf/dynamic-link.h>
#include <tls.h>
#include <dl-tlsdesc.h>
#include <dl-unmap-segments.h>
+#define _dl_tlsdesc_resolve_hold 0
#include <tlsdeschtab.h>
-/* The following 4 functions take an entry_check_offset argument.
- It's computed by the caller as an offset between its entry point
- and the call site, such that by adding the built-in return address
- that is implicitly passed to the function with this offset, we can
- easily obtain the caller's entry point to compare with the entry
- point given in the TLS descriptor. If it's changed, we want to
- return immediately. */
-
-/* This function is used to lazily resolve TLS_DESC REL relocations
- that reference the *ABS* segment in their own link maps. The
- argument is the addend originally stored there. */
-
-void
-__attribute__ ((regparm (3))) attribute_hidden
-_dl_tlsdesc_resolve_abs_plus_addend_fixup (struct tlsdesc volatile *td,
- struct link_map *l,
- ptrdiff_t entry_check_offset)
-{
- ptrdiff_t addend = (ptrdiff_t) td->arg;
-
- if (_dl_tlsdesc_resolve_early_return_p (td, __builtin_return_address (0)
- - entry_check_offset))
- return;
-
-#ifndef SHARED
- CHECK_STATIC_TLS (l, l);
-#else
- if (!TRY_STATIC_TLS (l, l))
- {
- td->arg = _dl_make_tlsdesc_dynamic (l, addend);
- td->entry = _dl_tlsdesc_dynamic;
- }
- else
-#endif
- {
- td->arg = (void*) (addend - l->l_tls_offset);
- td->entry = _dl_tlsdesc_return;
- }
-
- _dl_tlsdesc_wake_up_held_fixups ();
-}
-
-/* This function is used to lazily resolve TLS_DESC REL relocations
- that originally had zero addends. The argument location, that
- originally held the addend, is used to hold a pointer to the
- relocation, but it has to be restored before we call the function
- that applies relocations. */
-
-void
-__attribute__ ((regparm (3))) attribute_hidden
-_dl_tlsdesc_resolve_rel_fixup (struct tlsdesc volatile *td,
- struct link_map *l,
- ptrdiff_t entry_check_offset)
-{
- const ElfW(Rel) *reloc = td->arg;
-
- if (_dl_tlsdesc_resolve_early_return_p (td, __builtin_return_address (0)
- - entry_check_offset))
- return;
-
- /* The code below was borrowed from _dl_fixup(),
- except for checking for STB_LOCAL. */
- const ElfW(Sym) *const symtab
- = (const void *) D_PTR (l, l_info[DT_SYMTAB]);
- const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
- const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
- lookup_t result;
-
- /* Look up the target symbol. If the normal lookup rules are not
- used don't look in the global scope. */
- if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
- && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
- {
- const struct r_found_version *version = NULL;
-
- if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
- {
- const ElfW(Half) *vernum =
- (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
- ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff;
- version = &l->l_versions[ndx];
- if (version->hash == 0)
- version = NULL;
- }
-
- result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
- l->l_scope, version, ELF_RTYPE_CLASS_PLT,
- DL_LOOKUP_ADD_DEPENDENCY, NULL);
- }
- else
- {
- /* We already found the symbol. The module (and therefore its load
- address) is also known. */
- result = l;
- }
-
- if (!sym)
- {
- td->arg = 0;
- td->entry = _dl_tlsdesc_undefweak;
- }
- else
- {
-# ifndef SHARED
- CHECK_STATIC_TLS (l, result);
-# else
- if (!TRY_STATIC_TLS (l, result))
- {
- td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value);
- td->entry = _dl_tlsdesc_dynamic;
- }
- else
-# endif
- {
- td->arg = (void*)(sym->st_value - result->l_tls_offset);
- td->entry = _dl_tlsdesc_return;
- }
- }
-
- _dl_tlsdesc_wake_up_held_fixups ();
-}
-
-/* This function is used to lazily resolve TLS_DESC RELA relocations.
- The argument location is used to hold a pointer to the relocation. */
-
-void
-__attribute__ ((regparm (3))) attribute_hidden
-_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc volatile *td,
- struct link_map *l,
- ptrdiff_t entry_check_offset)
-{
- const ElfW(Rela) *reloc = td->arg;
-
- if (_dl_tlsdesc_resolve_early_return_p (td, __builtin_return_address (0)
- - entry_check_offset))
- return;
-
- /* The code below was borrowed from _dl_fixup(),
- except for checking for STB_LOCAL. */
- const ElfW(Sym) *const symtab
- = (const void *) D_PTR (l, l_info[DT_SYMTAB]);
- const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
- const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
- lookup_t result;
-
- /* Look up the target symbol. If the normal lookup rules are not
- used don't look in the global scope. */
- if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
- && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
- {
- const struct r_found_version *version = NULL;
-
- if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
- {
- const ElfW(Half) *vernum =
- (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
- ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff;
- version = &l->l_versions[ndx];
- if (version->hash == 0)
- version = NULL;
- }
-
- result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
- l->l_scope, version, ELF_RTYPE_CLASS_PLT,
- DL_LOOKUP_ADD_DEPENDENCY, NULL);
- }
- else
- {
- /* We already found the symbol. The module (and therefore its load
- address) is also known. */
- result = l;
- }
-
- if (!sym)
- {
- td->arg = (void*) reloc->r_addend;
- td->entry = _dl_tlsdesc_undefweak;
- }
- else
- {
-# ifndef SHARED
- CHECK_STATIC_TLS (l, result);
-# else
- if (!TRY_STATIC_TLS (l, result))
- {
- td->arg = _dl_make_tlsdesc_dynamic (result, sym->st_value
- + reloc->r_addend);
- td->entry = _dl_tlsdesc_dynamic;
- }
- else
-# endif
- {
- td->arg = (void*) (sym->st_value - result->l_tls_offset
- + reloc->r_addend);
- td->entry = _dl_tlsdesc_return;
- }
- }
-
- _dl_tlsdesc_wake_up_held_fixups ();
-}
-
-/* This function is used to avoid busy waiting for other threads to
- complete the lazy relocation. Once another thread wins the race to
- relocate a TLS descriptor, it sets the descriptor up such that this
- function is called to wait until the resolver releases the
- lock. */
-
-void
-__attribute__ ((regparm (3))) attribute_hidden
-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td,
- struct link_map *l __attribute__((__unused__)),
- ptrdiff_t entry_check_offset)
-{
- /* Maybe we're lucky and can return early. */
- if (__builtin_return_address (0) - entry_check_offset != td->entry)
- return;
-
- /* Locking here will stop execution until the running resolver runs
- _dl_tlsdesc_wake_up_held_fixups(), releasing the lock.
-
- FIXME: We'd be better off waiting on a condition variable, such
- that we didn't have to hold the lock throughout the relocation
- processing. */
- __rtld_lock_lock_recursive (GL(dl_load_lock));
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
-}
-
-
/* Unmap the dynamic object, but also release its TLS descriptor table
if there is one. */

View File

@ -0,0 +1,304 @@
commit 86f65dffc2396d408beb628f1cad2b8f63e197bd
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sun Jul 12 06:04:53 2020 -0700
ld.so: Add --list-tunables to print tunable values
Pass --list-tunables to ld.so to print tunables with min and max values.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Conflicts:
elf/Makefile
(different backporting order)
diff --git a/elf/Makefile b/elf/Makefile
index 3e71939d3234c4c3..aa65ec59f143bccf 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -44,6 +44,10 @@ dl-routines += dl-tunables
tunables-type = $(addprefix TUNABLES_FRONTEND_,$(have-tunables))
CPPFLAGS-dl-tunables.c += -DTUNABLES_FRONTEND=$(tunables-type)
+ifeq (yesyes,$(build-shared)$(run-built-tests))
+tests-special += $(objpfx)list-tunables.out
+endif
+
# Make sure that the compiler does not insert any library calls in tunables
# code paths.
ifeq (yes,$(have-loop-to-function))
@@ -1825,6 +1829,13 @@ $(objpfx)tst-glibc-hwcaps-mask.out: \
# tst-glibc-hwcaps-cache.
$(objpfx)tst-glibc-hwcaps-cache.out: $(objpfx)tst-glibc-hwcaps
+$(objpfx)list-tunables.out: tst-rtld-list-tunables.sh $(objpfx)ld.so
+ $(SHELL) $< $(objpfx)ld.so '$(test-wrapper-env)' \
+ '$(run_program_env)' > $(objpfx)/tst-rtld-list-tunables.out
+ cmp tst-rtld-list-tunables.exp \
+ $(objpfx)/tst-rtld-list-tunables.out > $@; \
+ $(evaluate-test)
+
tst-dst-static-ENV = LD_LIBRARY_PATH='$$ORIGIN'
$(objpfx)tst-rtld-help.out: $(objpfx)ld.so
diff --git a/elf/dl-main.h b/elf/dl-main.h
index 566713a0d10cfdb7..9e7b51d8f010e904 100644
--- a/elf/dl-main.h
+++ b/elf/dl-main.h
@@ -63,7 +63,7 @@ struct audit_list
enum rtld_mode
{
rtld_mode_normal, rtld_mode_list, rtld_mode_verify, rtld_mode_trace,
- rtld_mode_help,
+ rtld_mode_list_tunables, rtld_mode_help,
};
/* Aggregated state information extracted from environment variables
diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c
index bbc3679e3564a766..3c84809d44381241 100644
--- a/elf/dl-tunables.c
+++ b/elf/dl-tunables.c
@@ -26,6 +26,7 @@
#include <sysdep.h>
#include <fcntl.h>
#include <ldsodefs.h>
+#include <array_length.h>
#define TUNABLES_INTERNAL 1
#include "dl-tunables.h"
@@ -359,6 +360,48 @@ __tunables_init (char **envp)
}
}
+void
+__tunables_print (void)
+{
+ for (int i = 0; i < array_length (tunable_list); i++)
+ {
+ const tunable_t *cur = &tunable_list[i];
+ if (cur->type.type_code == TUNABLE_TYPE_STRING
+ && cur->val.strval == NULL)
+ _dl_printf ("%s:\n", cur->name);
+ else
+ {
+ _dl_printf ("%s: ", cur->name);
+ switch (cur->type.type_code)
+ {
+ case TUNABLE_TYPE_INT_32:
+ _dl_printf ("%d (min: %d, max: %d)\n",
+ (int) cur->val.numval,
+ (int) cur->type.min,
+ (int) cur->type.max);
+ break;
+ case TUNABLE_TYPE_UINT_64:
+ _dl_printf ("0x%lx (min: 0x%lx, max: 0x%lx)\n",
+ (long int) cur->val.numval,
+ (long int) cur->type.min,
+ (long int) cur->type.max);
+ break;
+ case TUNABLE_TYPE_SIZE_T:
+ _dl_printf ("0x%Zx (min: 0x%Zx, max: 0x%Zx)\n",
+ (size_t) cur->val.numval,
+ (size_t) cur->type.min,
+ (size_t) cur->type.max);
+ break;
+ case TUNABLE_TYPE_STRING:
+ _dl_printf ("%s\n", cur->val.strval);
+ break;
+ default:
+ __builtin_unreachable ();
+ }
+ }
+ }
+}
+
/* Set the tunable value. This is called by the module that the tunable exists
in. */
void
diff --git a/elf/dl-tunables.h b/elf/dl-tunables.h
index 7f181f3316cd9fc1..f4f2cfaeb9828599 100644
--- a/elf/dl-tunables.h
+++ b/elf/dl-tunables.h
@@ -69,9 +69,11 @@ typedef struct _tunable tunable_t;
# include "dl-tunable-list.h"
extern void __tunables_init (char **);
+extern void __tunables_print (void);
extern void __tunable_get_val (tunable_id_t, void *, tunable_callback_t);
extern void __tunable_set_val (tunable_id_t, void *);
rtld_hidden_proto (__tunables_init)
+rtld_hidden_proto (__tunables_print)
rtld_hidden_proto (__tunable_get_val)
/* Define TUNABLE_GET and TUNABLE_SET in short form if TOP_NAMESPACE and
diff --git a/elf/dl-usage.c b/elf/dl-usage.c
index e22a9c39427187d1..908b4894b3014b2d 100644
--- a/elf/dl-usage.c
+++ b/elf/dl-usage.c
@@ -255,7 +255,12 @@ setting environment variables (which would be inherited by subprocesses).\n\
in LIST\n\
--audit LIST use objects named in LIST as auditors\n\
--preload LIST preload objects named in LIST\n\
- --argv0 STRING set argv[0] to STRING before running\n\
+ --argv0 STRING set argv[0] to STRING before running\n"
+#if HAVE_TUNABLES
+"\
+ --list-tunables list all tunables with minimum and maximum values\n"
+#endif
+"\
--help display this help and exit\n\
--version output version information and exit\n\
\n\
diff --git a/elf/rtld.c b/elf/rtld.c
index 9e09896da078274d..54b621ec5ca014fa 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -47,6 +47,7 @@
#include <libc-early-init.h>
#include <dl-main.h>
#include <gnu/lib-names.h>
+#include <dl-tunables.h>
#include <assert.h>
@@ -1262,6 +1263,16 @@ dl_main (const ElfW(Phdr) *phdr,
_dl_argc -= 2;
_dl_argv += 2;
}
+#if HAVE_TUNABLES
+ else if (! strcmp (_dl_argv[1], "--list-tunables"))
+ {
+ state.mode = rtld_mode_list_tunables;
+
+ ++_dl_skip_args;
+ --_dl_argc;
+ ++_dl_argv;
+ }
+#endif
else if (strcmp (_dl_argv[1], "--help") == 0)
{
state.mode = rtld_mode_help;
@@ -1282,6 +1293,14 @@ dl_main (const ElfW(Phdr) *phdr,
else
break;
+#if HAVE_TUNABLES
+ if (__glibc_unlikely (state.mode == rtld_mode_list_tunables))
+ {
+ __tunables_print ();
+ _exit (0);
+ }
+#endif
+
/* If we have no further argument the program was called incorrectly.
Grant the user some education. */
if (_dl_argc < 2)
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
new file mode 100644
index 0000000000000000..4f3f7ee4e30a2b42
--- /dev/null
+++ b/elf/tst-rtld-list-tunables.exp
@@ -0,0 +1,14 @@
+glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
+glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
+glibc.malloc.check: 0 (min: 0, max: 3)
+glibc.malloc.mmap_max: 0 (min: -2147483648, max: 2147483647)
+glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
+glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
+glibc.malloc.perturb: 0 (min: 0, max: 255)
+glibc.malloc.tcache_count: 0x0 (min: 0x0, max: 0x[f]+)
+glibc.malloc.tcache_max: 0x0 (min: 0x0, max: 0x[f]+)
+glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0x[f]+)
+glibc.malloc.top_pad: 0x0 (min: 0x0, max: 0x[f]+)
+glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0x[f]+)
+glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10)
+glibc.rtld.optional_static_tls: 0x200 (min: 0x0, max: 0x[f]+)
diff --git a/elf/tst-rtld-list-tunables.sh b/elf/tst-rtld-list-tunables.sh
new file mode 100755
index 0000000000000000..e7bbdde94952b872
--- /dev/null
+++ b/elf/tst-rtld-list-tunables.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+# Test for --list-tunables option ld.so.
+# Copyright (C) 2021 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+set -e
+
+rtld=$1
+test_wrapper_env=$2
+run_program_env=$3
+
+LC_ALL=C
+export LC_ALL
+
+${test_wrapper_env} \
+${run_program_env} \
+$rtld --list-tunables \
+| sort -u \
+| egrep "(rtld|malloc)" \
+| sed -e "s/0xf\+/0x[f]+/"
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 07887981748bc44b..43272cf885d1e3e6 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -28,6 +28,44 @@ Finally, the set of tunables available may vary between distributions as
the tunables feature allows distributions to add their own tunables under
their own namespace.
+Passing @option{--list-tunables} to the dynamic loader to print all
+tunables with minimum and maximum values:
+
+@example
+$ /lib64/ld-linux-x86-64.so.2 --list-tunables
+glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10)
+glibc.elision.skip_lock_after_retries: 3 (min: -2147483648, max: 2147483647)
+glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0xffffffffffffffff)
+glibc.malloc.perturb: 0 (min: 0, max: 255)
+glibc.cpu.x86_shared_cache_size: 0x100000 (min: 0x0, max: 0xffffffffffffffff)
+glibc.mem.tagging: 0 (min: 0, max: 255)
+glibc.elision.tries: 3 (min: -2147483648, max: 2147483647)
+glibc.elision.enable: 0 (min: 0, max: 1)
+glibc.cpu.x86_rep_movsb_threshold: 0x1000 (min: 0x100, max: 0xffffffffffffffff)
+glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0xffffffffffffffff)
+glibc.elision.skip_lock_busy: 3 (min: -2147483648, max: 2147483647)
+glibc.malloc.top_pad: 0x0 (min: 0x0, max: 0xffffffffffffffff)
+glibc.cpu.x86_rep_stosb_threshold: 0x800 (min: 0x1, max: 0xffffffffffffffff)
+glibc.cpu.x86_non_temporal_threshold: 0xc0000 (min: 0x0, max: 0xffffffffffffffff)
+glibc.cpu.x86_shstk:
+glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff)
+glibc.malloc.mmap_max: 0 (min: -2147483648, max: 2147483647)
+glibc.elision.skip_trylock_internal_abort: 3 (min: -2147483648, max: 2147483647)
+glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff)
+glibc.cpu.x86_ibt:
+glibc.cpu.hwcaps:
+glibc.elision.skip_lock_internal_abort: 3 (min: -2147483648, max: 2147483647)
+glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0xffffffffffffffff)
+glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0xffffffffffffffff)
+glibc.cpu.x86_data_cache_size: 0x8000 (min: 0x0, max: 0xffffffffffffffff)
+glibc.malloc.tcache_count: 0x0 (min: 0x0, max: 0xffffffffffffffff)
+glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0xffffffffffffffff)
+glibc.pthread.mutex_spin_count: 100 (min: 0, max: 32767)
+glibc.rtld.optional_static_tls: 0x200 (min: 0x0, max: 0xffffffffffffffff)
+glibc.malloc.tcache_max: 0x0 (min: 0x0, max: 0xffffffffffffffff)
+glibc.malloc.check: 0 (min: 0, max: 3)
+@end example
+
@menu
* Tunable names:: The structure of a tunable name
* Memory Allocation Tunables:: Tunables in the memory allocation subsystem

View File

@ -0,0 +1,30 @@
commit d2d12c7a988a9a04aec23b5e4af549db61b0a005
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Tue Feb 2 09:31:56 2021 -0800
tst-rtld-list-tunables.sh: Unset glibc tunables
Unset glibc tunables and their aliases for --list-tunables test.
diff --git a/elf/tst-rtld-list-tunables.sh b/elf/tst-rtld-list-tunables.sh
index e7bbdde94952b872..78f4ed2ebbd3db2c 100755
--- a/elf/tst-rtld-list-tunables.sh
+++ b/elf/tst-rtld-list-tunables.sh
@@ -26,6 +26,17 @@ run_program_env=$3
LC_ALL=C
export LC_ALL
+# Unset tunables and their aliases.
+GLIBC_TUNABLES=
+MALLOC_ARENA_MAX=
+MALLOC_ARENA_TEST=
+MALLOC_CHECK_=
+MALLOC_MMAP_MAX_=
+MALLOC_MMAP_THRESHOLD_=
+MALLOC_PERTURB_=
+MALLOC_TOP_PAD_=
+MALLOC_TRIM_THRESHOLD_=
+
${test_wrapper_env} \
${run_program_env} \
$rtld --list-tunables \

View File

@ -0,0 +1,578 @@
commit 851f32cf7bf7067f73b991610778915edd57d7b4
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Mar 2 14:38:42 2021 +0100
ld.so: Implement the --list-diagnostics option
diff --git a/elf/Makefile b/elf/Makefile
index aa65ec59f143bccf..d246f1c0d9e019fd 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -64,7 +64,7 @@ elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \
# interpreter and operating independent of libc.
rtld-routines = rtld $(all-dl-routines) dl-sysdep dl-environ dl-minimal \
dl-error-minimal dl-conflict dl-hwcaps dl-hwcaps_split dl-hwcaps-subdirs \
- dl-usage
+ dl-usage dl-diagnostics dl-diagnostics-kernel dl-diagnostics-cpu
all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)
CFLAGS-dl-runtime.c += -fexceptions -fasynchronous-unwind-tables
@@ -672,6 +672,9 @@ CFLAGS-cache.c += $(SYSCONF-FLAGS)
CFLAGS-rtld.c += $(SYSCONF-FLAGS)
CFLAGS-dl-usage.c += $(SYSCONF-FLAGS) \
-D'RTLD="$(rtlddir)/$(rtld-installed-name)"'
+CFLAGS-dl-diagnostics.c += $(SYSCONF-FLAGS) \
+ -D'PREFIX="$(prefix)"' \
+ -D'RTLD="$(rtlddir)/$(rtld-installed-name)"'
cpp-srcs-left := $(all-rtld-routines:=.os)
lib := rtld
diff --git a/elf/dl-diagnostics-cpu.c b/elf/dl-diagnostics-cpu.c
new file mode 100644
index 0000000000000000..f7d149764bcb35a1
--- /dev/null
+++ b/elf/dl-diagnostics-cpu.c
@@ -0,0 +1,24 @@
+/* Print CPU diagnostics data in ld.so. Stub version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dl-diagnostics.h>
+
+void
+_dl_diagnostics_cpu (void)
+{
+}
diff --git a/elf/dl-diagnostics-kernel.c b/elf/dl-diagnostics-kernel.c
new file mode 100644
index 0000000000000000..831c358f1463cbf4
--- /dev/null
+++ b/elf/dl-diagnostics-kernel.c
@@ -0,0 +1,24 @@
+/* Print kernel diagnostics data in ld.so. Stub version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dl-diagnostics.h>
+
+void
+_dl_diagnostics_kernel (void)
+{
+}
diff --git a/elf/dl-diagnostics.c b/elf/dl-diagnostics.c
new file mode 100644
index 0000000000000000..bef224b36cbf5fc3
--- /dev/null
+++ b/elf/dl-diagnostics.c
@@ -0,0 +1,265 @@
+/* Print diagnostics data in ld.so.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <gnu/lib-names.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <unistd.h>
+
+#include <dl-diagnostics.h>
+#include <dl-hwcaps.h>
+#include <dl-main.h>
+#include <dl-procinfo.h>
+#include <dl-sysdep.h>
+#include <ldsodefs.h>
+#include "trusted-dirs.h"
+#include "version.h"
+
+/* Write CH to standard output. */
+static void
+_dl_putc (char ch)
+{
+ _dl_write (STDOUT_FILENO, &ch, 1);
+}
+
+/* Print CH to standard output, quoting it if necessary. */
+static void
+print_quoted_char (char ch)
+{
+ if (ch < ' ' || ch > '~')
+ {
+ char buf[4];
+ buf[0] = '\\';
+ buf[1] = '0' + ((ch >> 6) & 7);
+ buf[2] = '0' + ((ch >> 6) & 7);
+ buf[3] = '0' + (ch & 7);
+ _dl_write (STDOUT_FILENO, buf, 4);
+ }
+ else
+ {
+ if (ch == '\\' || ch == '"')
+ _dl_putc ('\\');
+ _dl_putc (ch);
+ }
+}
+
+/* Print S of LEN bytes to standard output, quoting characters as
+ needed. */
+static void
+print_string_length (const char *s, size_t len)
+{
+ _dl_putc ('"');
+ for (size_t i = 0; i < len; ++i)
+ print_quoted_char (s[i]);
+ _dl_putc ('"');
+}
+
+void
+_dl_diagnostics_print_string (const char *s)
+{
+ if (s == NULL)
+ {
+ _dl_printf ("0x0");
+ return;
+ }
+
+ _dl_putc ('"');
+ while (*s != '\0')
+ {
+ print_quoted_char (*s);
+ ++s;
+ }
+ _dl_putc ('"');
+}
+
+void
+_dl_diagnostics_print_labeled_string (const char *label, const char *s)
+{
+ _dl_printf ("%s=", label);
+ _dl_diagnostics_print_string (s);
+ _dl_putc ('\n');
+}
+
+void
+_dl_diagnostics_print_labeled_value (const char *label, uint64_t value)
+{
+ if (sizeof (value) == sizeof (unsigned long int))
+ /* _dl_printf can print 64-bit values directly. */
+ _dl_printf ("%s=0x%lx\n", label, (unsigned long int) value);
+ else
+ {
+ uint32_t high = value >> 32;
+ uint32_t low = value;
+ if (high == 0)
+ _dl_printf ("%s=0x%x\n", label, low);
+ else
+ _dl_printf ("%s=0x%x%08x\n", label, high, low);
+ }
+}
+
+/* Return true if ENV is an unfiltered environment variable. */
+static bool
+unfiltered_envvar (const char *env, size_t *name_length)
+{
+ char *env_equal = strchr (env, '=');
+ if (env_equal == NULL)
+ {
+ /* Always dump malformed entries. */
+ *name_length = strlen (env);
+ return true;
+ }
+ size_t envname_length = env_equal - env;
+ *name_length = envname_length;
+
+ /* LC_ and LD_ variables. */
+ if (env[0] == 'L' && (env[1] == 'C' || env[1] == 'D')
+ && env[2] == '_')
+ return true;
+
+ /* MALLOC_ variables. */
+ if (strncmp (env, "MALLOC_", strlen ("MALLOC_")) == 0)
+ return true;
+
+ static const char unfiltered[] =
+ "DATEMSK\0"
+ "GCONV_PATH\0"
+ "GETCONF_DIR\0"
+ "GETCONF_DIR\0"
+ "GLIBC_TUNABLES\0"
+ "GMON_OUTPUT_PREFIX\0"
+ "HESIOD_CONFIG\0"
+ "HES_DOMAIN\0"
+ "HOSTALIASES\0"
+ "I18NPATH\0"
+ "IFS\0"
+ "LANG\0"
+ "LOCALDOMAIN\0"
+ "LOCPATH\0"
+ "MSGVERB\0"
+ "NIS_DEFAULTS\0"
+ "NIS_GROUP\0"
+ "NIS_PATH\0"
+ "NLSPATH\0"
+ "PATH\0"
+ "POSIXLY_CORRECT\0"
+ "RESOLV_HOST_CONF\0"
+ "RES_OPTIONS\0"
+ "SEV_LEVEL\0"
+ "TMPDIR\0"
+ "TZ\0"
+ "TZDIR\0"
+ /* Two null bytes at the end to mark the end of the list via an
+ empty substring. */
+ ;
+ for (const char *candidate = unfiltered; *candidate != '\0'; )
+ {
+ size_t candidate_length = strlen (candidate);
+ if (candidate_length == envname_length
+ && memcmp (candidate, env, candidate_length) == 0)
+ return true;
+ candidate += candidate_length + 1;
+ }
+
+ return false;
+}
+
+/* Dump the process environment. */
+static void
+print_environ (char **environ)
+{
+ unsigned int index = 0;
+ for (char **envp = environ; *envp != NULL; ++envp)
+ {
+ char *env = *envp;
+ size_t name_length;
+ bool unfiltered = unfiltered_envvar (env, &name_length);
+ _dl_printf ("env%s[0x%x]=",
+ unfiltered ? "" : "_filtered", index);
+ if (unfiltered)
+ _dl_diagnostics_print_string (env);
+ else
+ print_string_length (env, name_length);
+ _dl_putc ('\n');
+ ++index;
+ }
+}
+
+/* Print configured paths and the built-in search path. */
+static void
+print_paths (void)
+{
+ _dl_diagnostics_print_labeled_string ("path.prefix", PREFIX);
+ _dl_diagnostics_print_labeled_string ("path.rtld", RTLD);
+ _dl_diagnostics_print_labeled_string ("path.sysconfdir", SYSCONFDIR);
+
+ unsigned int index = 0;
+ static const char *system_dirs = SYSTEM_DIRS "\0";
+ for (const char *e = system_dirs; *e != '\0'; )
+ {
+ size_t len = strlen (e);
+ _dl_printf ("path.system_dirs[0x%x]=", index);
+ print_string_length (e, len);
+ _dl_putc ('\n');
+ ++index;
+ e += len + 1;
+ }
+}
+
+/* Print information about the glibc version. */
+static void
+print_version (void)
+{
+ _dl_diagnostics_print_labeled_string ("version.release", RELEASE);
+ _dl_diagnostics_print_labeled_string ("version.version", VERSION);
+}
+
+void
+_dl_print_diagnostics (char **environ)
+{
+#ifdef HAVE_DL_DISCOVER_OSVERSION
+ _dl_diagnostics_print_labeled_value
+ ("dl_discover_osversion", _dl_discover_osversion ());
+#endif
+ _dl_diagnostics_print_labeled_string ("dl_dst_lib", DL_DST_LIB);
+ _dl_diagnostics_print_labeled_value ("dl_hwcap", GLRO (dl_hwcap));
+ _dl_diagnostics_print_labeled_value ("dl_hwcap_important", HWCAP_IMPORTANT);
+ _dl_diagnostics_print_labeled_value ("dl_hwcap2", GLRO (dl_hwcap2));
+ _dl_diagnostics_print_labeled_string
+ ("dl_hwcaps_subdirs", _dl_hwcaps_subdirs);
+ _dl_diagnostics_print_labeled_value
+ ("dl_hwcaps_subdirs_active", _dl_hwcaps_subdirs_active ());
+ _dl_diagnostics_print_labeled_value ("dl_osversion", GLRO (dl_osversion));
+ _dl_diagnostics_print_labeled_value ("dl_pagesize", GLRO (dl_pagesize));
+ _dl_diagnostics_print_labeled_string ("dl_platform", GLRO (dl_platform));
+ _dl_diagnostics_print_labeled_string
+ ("dl_profile_output", GLRO (dl_profile_output));
+ _dl_diagnostics_print_labeled_value
+ ("dl_string_platform", _dl_string_platform ( GLRO (dl_platform)));
+
+ _dl_diagnostics_print_labeled_string ("dso.ld", LD_SO);
+ _dl_diagnostics_print_labeled_string ("dso.libc", LIBC_SO);
+
+ print_environ (environ);
+ print_paths ();
+ print_version ();
+
+ _dl_diagnostics_kernel ();
+ _dl_diagnostics_cpu ();
+
+ _exit (EXIT_SUCCESS);
+}
diff --git a/elf/dl-diagnostics.h b/elf/dl-diagnostics.h
new file mode 100644
index 0000000000000000..27dcb12bca12e5b6
--- /dev/null
+++ b/elf/dl-diagnostics.h
@@ -0,0 +1,46 @@
+/* Interfaces for printing diagnostics in ld.so.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_DIAGNOSTICS_H
+#define _DL_DIAGNOSTICS_H
+
+#include <stdint.h>
+
+/* Write the null-terminated string to standard output, surrounded in
+ quotation marks. */
+void _dl_diagnostics_print_string (const char *s) attribute_hidden;
+
+/* Like _dl_diagnostics_print_string, but add a LABEL= prefix, and a
+ newline character as a suffix. */
+void _dl_diagnostics_print_labeled_string (const char *label, const char *s)
+ attribute_hidden;
+
+/* Print LABEL=VALUE to standard output, followed by a newline
+ character. */
+void _dl_diagnostics_print_labeled_value (const char *label, uint64_t value)
+ attribute_hidden;
+
+/* Print diagnostics data for the kernel. Called from
+ _dl_print_diagnostics. */
+void _dl_diagnostics_kernel (void) attribute_hidden;
+
+/* Print diagnostics data for the CPU(s). Called from
+ _dl_print_diagnostics. */
+void _dl_diagnostics_cpu (void) attribute_hidden;
+
+#endif /* _DL_DIAGNOSTICS_H */
diff --git a/elf/dl-main.h b/elf/dl-main.h
index 9e7b51d8f010e904..9fbbdb0fac09adf3 100644
--- a/elf/dl-main.h
+++ b/elf/dl-main.h
@@ -63,7 +63,7 @@ struct audit_list
enum rtld_mode
{
rtld_mode_normal, rtld_mode_list, rtld_mode_verify, rtld_mode_trace,
- rtld_mode_list_tunables, rtld_mode_help,
+ rtld_mode_list_tunables, rtld_mode_list_diagnostics, rtld_mode_help,
};
/* Aggregated state information extracted from environment variables
@@ -121,4 +121,7 @@ _Noreturn void _dl_version (void) attribute_hidden;
_Noreturn void _dl_help (const char *argv0, struct dl_main_state *state)
attribute_hidden;
+/* Print a diagnostics dump. */
+_Noreturn void _dl_print_diagnostics (char **environ) attribute_hidden;
+
#endif /* _DL_MAIN */
diff --git a/elf/dl-usage.c b/elf/dl-usage.c
index 908b4894b3014b2d..e19e1791d9169da2 100644
--- a/elf/dl-usage.c
+++ b/elf/dl-usage.c
@@ -261,6 +261,7 @@ setting environment variables (which would be inherited by subprocesses).\n\
--list-tunables list all tunables with minimum and maximum values\n"
#endif
"\
+ --list-diagnostics list diagnostics information\n\
--help display this help and exit\n\
--version output version information and exit\n\
\n\
diff --git a/elf/rtld.c b/elf/rtld.c
index 54b621ec5ca014fa..d14c388f548d6d51 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -138,6 +138,7 @@ static void dl_main_state_init (struct dl_main_state *state);
/* Process all environments variables the dynamic linker must recognize.
Since all of them start with `LD_' we are a bit smarter while finding
all the entries. */
+extern char **_environ attribute_hidden;
static void process_envvars (struct dl_main_state *state);
#ifdef DL_ARGV_NOT_RELRO
@@ -1273,6 +1274,14 @@ dl_main (const ElfW(Phdr) *phdr,
++_dl_argv;
}
#endif
+ else if (! strcmp (_dl_argv[1], "--list-diagnostics"))
+ {
+ state.mode = rtld_mode_list_diagnostics;
+
+ ++_dl_skip_args;
+ --_dl_argc;
+ ++_dl_argv;
+ }
else if (strcmp (_dl_argv[1], "--help") == 0)
{
state.mode = rtld_mode_help;
@@ -1301,6 +1310,9 @@ dl_main (const ElfW(Phdr) *phdr,
}
#endif
+ if (state.mode == rtld_mode_list_diagnostics)
+ _dl_print_diagnostics (_environ);
+
/* If we have no further argument the program was called incorrectly.
Grant the user some education. */
if (_dl_argc < 2)
@@ -2623,12 +2635,6 @@ a filename can be specified using the LD_DEBUG_OUTPUT environment variable.\n");
}
}
-/* Process all environments variables the dynamic linker must recognize.
- Since all of them start with `LD_' we are a bit smarter while finding
- all the entries. */
-extern char **_environ attribute_hidden;
-
-
static void
process_envvars (struct dl_main_state *state)
{
diff --git a/sysdeps/unix/sysv/linux/dl-diagnostics-kernel.c b/sysdeps/unix/sysv/linux/dl-diagnostics-kernel.c
new file mode 100644
index 0000000000000000..59f6402c547ba590
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/dl-diagnostics-kernel.c
@@ -0,0 +1,77 @@
+/* Print kernel diagnostics data in ld.so. Linux version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dl-diagnostics.h>
+#include <ldsodefs.h>
+#include <sys/utsname.h>
+
+/* Dump the auxiliary vector to standard output. */
+static void
+print_auxv (void)
+{
+ /* See _dl_show_auxv. The code below follows the general output
+ format for diagnostic dumps. */
+ unsigned int index = 0;
+ for (ElfW(auxv_t) *av = GLRO(dl_auxv); av->a_type != AT_NULL; ++av)
+ {
+ _dl_printf ("auxv[0x%x].a_type=0x%lx\n"
+ "auxv[0x%x].a_val=",
+ index, (unsigned long int) av->a_type, index);
+ if (av->a_type == AT_EXECFN
+ || av->a_type == AT_PLATFORM
+ || av->a_type == AT_BASE_PLATFORM)
+ /* The address of the strings is not useful at all, so print
+ the strings themselvs. */
+ _dl_diagnostics_print_string ((const char *) av->a_un.a_val);
+ else
+ _dl_printf ("0x%lx", (unsigned long int) av->a_un.a_val);
+ _dl_printf ("\n");
+ ++index;
+ }
+}
+
+/* Print one uname entry. */
+static void
+print_utsname_entry (const char *field, const char *value)
+{
+ _dl_printf ("uname.");
+ _dl_diagnostics_print_labeled_string (field, value);
+}
+
+/* Print information from uname, including the kernel version. */
+static void
+print_uname (void)
+{
+ struct utsname uts;
+ if (__uname (&uts) == 0)
+ {
+ print_utsname_entry ("sysname", uts.sysname);
+ print_utsname_entry ("nodename", uts.nodename);
+ print_utsname_entry ("release", uts.release);
+ print_utsname_entry ("version", uts.version);
+ print_utsname_entry ("machine", uts.machine);
+ print_utsname_entry ("domainname", uts.domainname);
+ }
+}
+
+void
+_dl_diagnostics_kernel (void)
+{
+ print_auxv ();
+ print_uname ();
+}

View File

@ -0,0 +1,117 @@
commit e4933c8a92ea08eecdf3ab45e7f76c95dc3d20ac
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Mar 2 14:58:05 2021 +0100
x86: Automate generation of PREFERRED_FEATURE_INDEX_1 bitfield
Use a .def file to define the bitfield layout, so that it is possible
to iterate over field members using the preprocessor.
Conflicts:
sysdeps/x86/include/cpu-features.h
(re-did the change from scratch)
sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
(adjusted to the downstream bits)
diff --git a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
new file mode 100644
index 0000000000000000..17a5cc428c1dabea
--- /dev/null
+++ b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def
@@ -0,0 +1,34 @@
+/* Bits in the PREFERRED_FEATURE_INDEX_1 bitfield of <cpu-features.h>.
+ Copyright (C) 2020-2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+BIT (I586)
+BIT (I686)
+BIT (Fast_Rep_String)
+BIT (Fast_Copy_Backward)
+BIT (Fast_Unaligned_Load)
+BIT (Fast_Unaligned_Copy)
+BIT (Slow_BSF)
+BIT (Slow_SSE4_2)
+BIT (AVX_Fast_Unaligned_Load)
+BIT (Prefer_MAP_32BIT_EXEC)
+BIT (Prefer_PMINUB_for_stringop)
+BIT (Prefer_No_VZEROUPPER)
+BIT (Prefer_ERMS)
+BIT (Prefer_FSRM)
+BIT (Prefer_No_AVX512)
+BIT (MathVec_Prefer_No_AVX512)
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index f62be0b9b3746675..f43e22f677b249a9 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -80,40 +80,23 @@ enum
# define HAS_ARCH_FEATURE(name) \
CPU_FEATURE_PREFERRED (name)
-/* PREFERRED_FEATURE_INDEX_1. */
-# define bit_arch_I586 (1u << 0)
-# define bit_arch_I686 (1u << 1)
-# define bit_arch_Fast_Rep_String (1u << 2)
-# define bit_arch_Fast_Copy_Backward (1u << 3)
-# define bit_arch_Fast_Unaligned_Load (1u << 4)
-# define bit_arch_Fast_Unaligned_Copy (1u << 5)
-# define bit_arch_Slow_BSF (1u << 6)
-# define bit_arch_Slow_SSE4_2 (1u << 7)
-# define bit_arch_AVX_Fast_Unaligned_Load (1u << 8)
-# define bit_arch_Prefer_MAP_32BIT_EXEC (1u << 9)
-# define bit_arch_Prefer_PMINUB_for_stringop (1u << 10)
-# define bit_arch_Prefer_No_VZEROUPPER (1u << 11)
-# define bit_arch_Prefer_ERMS (1u << 12)
-# define bit_arch_Prefer_FSRM (1u << 13)
-# define bit_arch_Prefer_No_AVX512 (1u << 14)
-# define bit_arch_MathVec_Prefer_No_AVX512 (1u << 15)
-
-# define index_arch_Fast_Rep_String PREFERRED_FEATURE_INDEX_1
-# define index_arch_Fast_Copy_Backward PREFERRED_FEATURE_INDEX_1
-# define index_arch_Slow_BSF PREFERRED_FEATURE_INDEX_1
-# define index_arch_Fast_Unaligned_Load PREFERRED_FEATURE_INDEX_1
-# define index_arch_Prefer_PMINUB_for_stringop PREFERRED_FEATURE_INDEX_1
-# define index_arch_Fast_Unaligned_Copy PREFERRED_FEATURE_INDEX_1
-# define index_arch_I586 PREFERRED_FEATURE_INDEX_1
-# define index_arch_I686 PREFERRED_FEATURE_INDEX_1
-# define index_arch_Slow_SSE4_2 PREFERRED_FEATURE_INDEX_1
-# define index_arch_AVX_Fast_Unaligned_Load PREFERRED_FEATURE_INDEX_1
-# define index_arch_Prefer_MAP_32BIT_EXEC PREFERRED_FEATURE_INDEX_1
-# define index_arch_Prefer_No_VZEROUPPER PREFERRED_FEATURE_INDEX_1
-# define index_arch_Prefer_ERMS PREFERRED_FEATURE_INDEX_1
-# define index_arch_Prefer_No_AVX512 PREFERRED_FEATURE_INDEX_1
-# define index_arch_MathVec_Prefer_No_AVX512 PREFERRED_FEATURE_INDEX_1
-# define index_arch_Prefer_FSRM PREFERRED_FEATURE_INDEX_1
+/* PREFERRED_FEATURE_INDEX_1. First define the bitindex values
+ sequentially, then define the bit_arch* and index_arch_* lookup
+ constants. */
+enum
+ {
+#define BIT(x) _bitindex_arch_##x ,
+#include "cpu-features-preferred_feature_index_1.def"
+#undef BIT
+ };
+enum
+ {
+#define BIT(x) \
+ bit_arch_##x = 1u << _bitindex_arch_##x , \
+ index_arch_##x = PREFERRED_FEATURE_INDEX_1,
+#include "cpu-features-preferred_feature_index_1.def"
+#undef BIT
+ };
/* XCR0 Feature flags. */
# define bit_XMM_state (1u << 1)

View File

@ -0,0 +1,131 @@
commit 01a5746b6c8a44dc29d33e056b63485075a6a3cc
Author: Florian Weimer <fweimer@redhat.com>
Date: Wed Feb 24 13:12:04 2021 +0100
x86: Add CPU-specific diagnostics to ld.so --list-diagnostics
Conflicts:
sysdeps/x86/dl-diagnostics-cpu.c
(reworked due to struct differences, different knobs
downstream)
diff --git a/sysdeps/x86/dl-diagnostics-cpu.c b/sysdeps/x86/dl-diagnostics-cpu.c
new file mode 100644
index 0000000000000000..0ba286a828b69937
--- /dev/null
+++ b/sysdeps/x86/dl-diagnostics-cpu.c
@@ -0,0 +1,101 @@
+/* Print CPU diagnostics data in ld.so. x86 version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dl-diagnostics.h>
+#include <ldsodefs.h>
+
+static void
+print_cpu_features_value (const char *label, uint64_t value)
+{
+ _dl_printf ("x86.cpu_features.");
+ _dl_diagnostics_print_labeled_value (label, value);
+}
+
+static void
+print_cpu_feature_internal (unsigned int index, const char *kind,
+ unsigned int reg, uint32_t value)
+{
+ _dl_printf ("x86.cpu_features.features[0x%x].%s[0x%x]=0x%x\n",
+ index, kind, reg, value);
+}
+
+static void
+print_cpu_feature_preferred (const char *label, unsigned int flag)
+{
+ _dl_printf("x86.cpu_features.preferred.%s=0x%x\n", label, flag);
+}
+
+void
+_dl_diagnostics_cpu (void)
+{
+ const struct cpu_features *cpu_features = __get_cpu_features ();
+
+ print_cpu_features_value ("basic.kind", cpu_features->basic.kind);
+ print_cpu_features_value ("basic.max_cpuid", cpu_features->basic.max_cpuid);
+ print_cpu_features_value ("basic.family", cpu_features->basic.family);
+ print_cpu_features_value ("basic.model", cpu_features->basic.model);
+ print_cpu_features_value ("basic.stepping", cpu_features->basic.stepping);
+
+ for (unsigned int index = 0; index < COMMON_CPUID_INDEX_MAX; ++index)
+ {
+ /* Downstream, these constants are not part of the ABI yet, so
+ analysis needs to take the precise glibc version into
+ account. */
+ print_cpu_feature_internal
+ (index, "cpuid", 0, cpu_features->features[index].cpuid.eax);
+ print_cpu_feature_internal
+ (index, "cpuid", 1, cpu_features->features[index].cpuid.ebx);
+ print_cpu_feature_internal
+ (index, "cpuid", 2, cpu_features->features[index].cpuid.ecx);
+ print_cpu_feature_internal
+ (index, "cpuid", 3, cpu_features->features[index].cpuid.edx);
+ print_cpu_feature_internal
+ (index, "usable", 0, cpu_features->features[index].usable.eax);
+ print_cpu_feature_internal
+ (index, "usable", 1, cpu_features->features[index].usable.ebx);
+ print_cpu_feature_internal
+ (index, "usable", 2, cpu_features->features[index].usable.ecx);
+ print_cpu_feature_internal
+ (index, "usable", 3, cpu_features->features[index].usable.edx);
+ }
+
+ /* The preferred indicators are not part of the ABI and need to be
+ translated. */
+#define BIT(x) \
+ print_cpu_feature_preferred (#x, CPU_FEATURE_PREFERRED_P (cpu_features, x));
+#include "cpu-features-preferred_feature_index_1.def"
+#undef BIT
+
+ print_cpu_features_value ("xsave_state_size",
+ cpu_features->xsave_state_size);
+ print_cpu_features_value ("xsave_state_full_size",
+ cpu_features->xsave_state_full_size);
+ print_cpu_features_value ("data_cache_size", cpu_features->data_cache_size);
+ print_cpu_features_value ("shared_cache_size",
+ cpu_features->shared_cache_size);
+ print_cpu_features_value ("non_temporal_threshold",
+ cpu_features->non_temporal_threshold);
+ print_cpu_features_value ("rep_movsb_threshold",
+ cpu_features->rep_movsb_threshold);
+ print_cpu_features_value ("rep_stosb_threshold",
+ cpu_features->rep_stosb_threshold);
+ _Static_assert (offsetof (struct cpu_features, rep_stosb_threshold)
+ + sizeof (cpu_features->rep_stosb_threshold)
+ == sizeof (*cpu_features),
+ "last cpu_features field has been printed");
+}
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index f43e22f677b249a9..536643b209425198 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -107,6 +107,8 @@ enum
# define bit_XTILECFG_state (1u << 17)
# define bit_XTILEDATA_state (1u << 18)
+/* NB: When adding new fields, update sysdeps/x86/dl-diagnostics-cpu.c
+ to print them. */
struct cpu_features
{
struct cpu_features_basic basic;

View File

@ -0,0 +1,255 @@
commit c1cb2deeca1a85c6fc5bd41b90816d48a95bc434
Author: Florian Weimer <fweimer@redhat.com>
Date: Sun Dec 5 11:28:34 2021 +0100
elf: execve statically linked programs instead of crashing [BZ #28648]
Programs without dynamic dependencies and without a program
interpreter are now run via execve.
Previously, the dynamic linker either crashed while attempting to
read a non-existing dynamic segment (looking for DT_AUDIT/DT_DEPAUDIT
data), or the self-relocated in the static PIE executable crashed
because the outer dynamic linker had already applied RELRO protection.
<dl-execve.h> is needed because execve is not available in the
dynamic loader on Hurd.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
Conflicts:
elf/Makefile
(some missing backports)
elf/rtld.c
(missing rework of ld.so self-relocation downstream,
always print error as a number due to missing
sterrorname_np, also fix errcode/errno glitch)
sysdeps/unix/sysv/linux/dl-execve.h
(missing INTERNAL_SYSCALL_CALL refactoring to Linux-like
calling convention)
diff --git a/elf/Makefile b/elf/Makefile
index d246f1c0d9e019fd..b3e8ab2792608de7 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -214,7 +214,8 @@ tests += restest1 preloadtest loadfail multiload origtest resolvfail \
tst-tls-ie tst-tls-ie-dlmopen \
argv0test \
tst-glibc-hwcaps tst-glibc-hwcaps-prepend tst-glibc-hwcaps-mask \
- tst-tls20 tst-tls21
+ tst-tls20 tst-tls21 \
+ tst-rtld-run-static \
# reldep9
tests-internal += loadtest unload unload2 circleload1 \
neededtest neededtest2 neededtest3 neededtest4 \
@@ -1917,3 +1918,5 @@ $(objpfx)tst-tls20.out: $(objpfx)tst-tls20mod-bad.so \
$(objpfx)tst-tls21: $(libdl) $(shared-thread-library)
$(objpfx)tst-tls21.out: $(objpfx)tst-tls21mod.so
$(objpfx)tst-tls21mod.so: $(tst-tls-many-dynamic-modules:%=$(objpfx)%.so)
+
+$(objpfx)tst-rtld-run-static.out: $(objpfx)/ldconfig
diff --git a/elf/rtld.c b/elf/rtld.c
index d14c388f548d6d51..461d8c114a875a9b 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -48,6 +48,7 @@
#include <dl-main.h>
#include <gnu/lib-names.h>
#include <dl-tunables.h>
+#include <dl-execve.h>
#include <assert.h>
@@ -1114,6 +1115,40 @@ load_audit_modules (struct link_map *main_map, struct audit_list *audit_list)
}
}
+/* Check if the executable is not actualy dynamically linked, and
+ invoke it directly in that case. */
+static void
+rtld_chain_load (struct link_map *main_map, char *argv0)
+{
+ /* The dynamic loader run against itself. */
+ const char *rtld_soname
+ = ((const char *) D_PTR (&GL(dl_rtld_map), l_info[DT_STRTAB])
+ + GL(dl_rtld_map).l_info[DT_SONAME]->d_un.d_val);
+ if (main_map->l_info[DT_SONAME] != NULL
+ && strcmp (rtld_soname,
+ ((const char *) D_PTR (main_map, l_info[DT_STRTAB])
+ + main_map->l_info[DT_SONAME]->d_un.d_val)) == 0)
+ _dl_fatal_printf ("%s: loader cannot load itself\n", rtld_soname);
+
+ /* With DT_NEEDED dependencies, the executable is dynamically
+ linked. */
+ if (__glibc_unlikely (main_map->l_info[DT_NEEDED] != NULL))
+ return;
+
+ /* If the executable has program interpreter, it is dynamically
+ linked. */
+ for (size_t i = 0; i < main_map->l_phnum; ++i)
+ if (main_map->l_phdr[i].p_type == PT_INTERP)
+ return;
+
+ const char *pathname = _dl_argv[0];
+ if (argv0 != NULL)
+ _dl_argv[0] = argv0;
+ int errcode = __rtld_execve (pathname, _dl_argv, _environ);
+ _dl_fatal_printf("%s: cannot execute %s: %d\n",
+ rtld_soname, pathname, errcode);
+}
+
static void
dl_main (const ElfW(Phdr) *phdr,
ElfW(Word) phnum,
@@ -1384,14 +1419,8 @@ dl_main (const ElfW(Phdr) *phdr,
/* Now the map for the main executable is available. */
main_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
- if (__glibc_likely (state.mode == rtld_mode_normal)
- && GL(dl_rtld_map).l_info[DT_SONAME] != NULL
- && main_map->l_info[DT_SONAME] != NULL
- && strcmp ((const char *) D_PTR (&GL(dl_rtld_map), l_info[DT_STRTAB])
- + GL(dl_rtld_map).l_info[DT_SONAME]->d_un.d_val,
- (const char *) D_PTR (main_map, l_info[DT_STRTAB])
- + main_map->l_info[DT_SONAME]->d_un.d_val) == 0)
- _dl_fatal_printf ("loader cannot load itself\n");
+ if (__glibc_likely (state.mode == rtld_mode_normal))
+ rtld_chain_load (main_map, argv0);
phdr = main_map->l_phdr;
phnum = main_map->l_phnum;
diff --git a/elf/tst-rtld-run-static.c b/elf/tst-rtld-run-static.c
new file mode 100644
index 0000000000000000..7281093504b675c4
--- /dev/null
+++ b/elf/tst-rtld-run-static.c
@@ -0,0 +1,62 @@
+/* Test running statically linked programs using ld.so.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/check.h>
+#include <support/support.h>
+#include <support/capture_subprocess.h>
+#include <string.h>
+#include <stdlib.h>
+
+static int
+do_test (void)
+{
+ char *ldconfig_path = xasprintf ("%s/elf/ldconfig", support_objdir_root);
+
+ {
+ char *argv[] = { (char *) "ld.so", ldconfig_path, (char *) "--help", NULL };
+ struct support_capture_subprocess cap
+ = support_capture_subprogram (support_objdir_elf_ldso, argv);
+ support_capture_subprocess_check (&cap, "no --argv0", 0, sc_allow_stdout);
+ puts ("info: output without --argv0:");
+ puts (cap.out.buffer);
+ TEST_VERIFY (strstr (cap.out.buffer, "Usage: ldconfig [OPTION...]\n")
+ == cap.out.buffer);
+ support_capture_subprocess_free (&cap);
+ }
+
+ {
+ char *argv[] =
+ {
+ (char *) "ld.so", (char *) "--argv0", (char *) "ldconfig-argv0",
+ ldconfig_path, (char *) "--help", NULL
+ };
+ struct support_capture_subprocess cap
+ = support_capture_subprogram (support_objdir_elf_ldso, argv);
+ support_capture_subprocess_check (&cap, "with --argv0", 0, sc_allow_stdout);
+ puts ("info: output with --argv0:");
+ puts (cap.out.buffer);
+ TEST_VERIFY (strstr (cap.out.buffer, "Usage: ldconfig-argv0 [OPTION...]\n")
+ == cap.out.buffer);
+ support_capture_subprocess_free (&cap);
+ }
+
+ free (ldconfig_path);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/generic/dl-execve.h b/sysdeps/generic/dl-execve.h
new file mode 100644
index 0000000000000000..5fd097df69e1770c
--- /dev/null
+++ b/sysdeps/generic/dl-execve.h
@@ -0,0 +1,25 @@
+/* execve for the dynamic linker. Generic stub version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+
+static int
+__rtld_execve (const char *path, char *const *argv, char *const *envp)
+{
+ return ENOSYS;
+}
diff --git a/sysdeps/unix/sysv/linux/dl-execve.h b/sysdeps/unix/sysv/linux/dl-execve.h
new file mode 100644
index 0000000000000000..9ec6539286bb0589
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/dl-execve.h
@@ -0,0 +1,30 @@
+/* execve for the dynamic linker. Linux version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+
+static inline int
+__rtld_execve (const char *path, char *const *argv, char *const *envp)
+{
+ INTERNAL_SYSCALL_DECL (err);
+ long int r = INTERNAL_SYSCALL_CALL (execve, err, path, argv, envp);
+ if (INTERNAL_SYSCALL_ERROR_P (r, err))
+ return INTERNAL_SYSCALL_ERRNO (r, err);
+ else
+ return 0;
+}

View File

@ -0,0 +1,41 @@
commit 2e75604f8337fa4332977f72a8f6726309679edf
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Dec 10 16:06:36 2021 +0100
elf: Install a symbolic link to ld.so as /usr/bin/ld.so
This makes ld.so features such as --preload, --audit,
and --list-diagnostics more accessible to end users because they
do not need to know the ABI name of the dynamic loader.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
Conflicts:
elf/Makefile
(versioned shared objects downstream)
diff --git a/elf/Makefile b/elf/Makefile
index b3e8ab2792608de7..c552aff350c2faac 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -99,7 +99,7 @@ endif
ifeq (yes,$(build-shared))
extra-objs = $(all-rtld-routines:%=%.os) soinit.os sofini.os interp.os
generated += librtld.os dl-allobjs.os ld.so ldd
-install-others = $(inst_rtlddir)/$(rtld-installed-name)
+install-others = $(inst_rtlddir)/$(rtld-installed-name) $(inst_bindir)/ld.so
install-bin-script = ldd
endif
@@ -622,6 +622,11 @@ $(inst_rtlddir)/$(rtld-installed-name): \
$(make-target-directory)
$(make-shlib-link)
+# Creates the relative /usr/bin/ld.so symbolic link.
+$(inst_bindir)/ld.so: $(inst_rtlddir)/$(rtld-installed-name)
+ $(make-target-directory)
+ $(make-link)
+
# Special target called by parent to install just the dynamic linker.
.PHONY: ldso_install
ldso_install: $(inst_rtlddir)/$(rtld-installed-name)

View File

@ -1,6 +1,6 @@
%define glibcsrcdir glibc-2.28
%define glibcversion 2.28
%define glibcrelease 174%{?dist}
%define glibcrelease 180%{?dist}
# Pre-release tarballs are pulled in from git using a command that is
# effectively:
#
@ -783,6 +783,43 @@ Patch605: glibc-rh1937515.patch
Patch606: glibc-rh1934162-1.patch
Patch607: glibc-rh1934162-2.patch
Patch608: glibc-rh2000374.patch
Patch609: glibc-rh1991001-1.patch
Patch610: glibc-rh1991001-2.patch
Patch611: glibc-rh1991001-3.patch
Patch612: glibc-rh1991001-4.patch
Patch613: glibc-rh1991001-5.patch
Patch614: glibc-rh1991001-6.patch
Patch615: glibc-rh1991001-7.patch
Patch616: glibc-rh1991001-8.patch
Patch617: glibc-rh1991001-9.patch
Patch618: glibc-rh1991001-10.patch
Patch619: glibc-rh1991001-11.patch
Patch620: glibc-rh1991001-12.patch
Patch621: glibc-rh1991001-13.patch
Patch622: glibc-rh1991001-14.patch
Patch623: glibc-rh1991001-15.patch
Patch624: glibc-rh1991001-16.patch
Patch625: glibc-rh1991001-17.patch
Patch626: glibc-rh1991001-18.patch
Patch627: glibc-rh1991001-19.patch
Patch628: glibc-rh1991001-20.patch
Patch629: glibc-rh1991001-21.patch
Patch630: glibc-rh1991001-22.patch
Patch631: glibc-rh1929928-1.patch
Patch632: glibc-rh1929928-2.patch
Patch633: glibc-rh1929928-3.patch
Patch634: glibc-rh1929928-4.patch
Patch635: glibc-rh1929928-5.patch
Patch636: glibc-rh1984802-1.patch
Patch637: glibc-rh1984802-2.patch
Patch638: glibc-rh1984802-3.patch
Patch639: glibc-rh2023420-1.patch
Patch640: glibc-rh2023420-2.patch
Patch641: glibc-rh2023420-3.patch
Patch642: glibc-rh2023420-4.patch
Patch643: glibc-rh2023420-5.patch
Patch644: glibc-rh2023420-6.patch
Patch645: glibc-rh2023420-7.patch
##############################################################################
# Continued list of core "glibc" package information:
@ -1851,6 +1888,7 @@ cp benchtests/scripts/benchout.schema.json %{glibc_sysroot}%{_prefix}/libexec/gl
cp benchtests/scripts/compare_bench.py %{glibc_sysroot}%{_prefix}/libexec/glibc-benchtests/
cp benchtests/scripts/import_bench.py %{glibc_sysroot}%{_prefix}/libexec/glibc-benchtests/
cp benchtests/scripts/validate_benchout.py %{glibc_sysroot}%{_prefix}/libexec/glibc-benchtests/
%endif
%if 0%{?_enable_debug_packages}
# The #line directives gperf generates do not give the proper
@ -2217,8 +2255,8 @@ cat > utils.filelist <<EOF
%if %{without bootstrap}
%{_prefix}/bin/memusage
%{_prefix}/bin/memusagestat
%endif
%{_prefix}/bin/mtrace
%endif
%{_prefix}/bin/pcprofiledump
%{_prefix}/bin/xtrace
EOF
@ -2250,6 +2288,7 @@ grep '/libnss_[a-z]*\.so$' master.filelist > nss-devel.filelist
grep '/libnsl-[0-9.]*.so$' master.filelist > libnsl.filelist
test $(wc -l < libnsl.filelist) -eq 1
%if %{with benchtests}
###############################################################################
# glibc-benchtests
###############################################################################
@ -2360,7 +2399,14 @@ exclude_common_dirs()
for d in $(echo $exclude_dirs | sed 's/ /\n/g'); do
sed -i "\|^%%dir $d/\?$|d" $1
done
# Special kludge: /usr/bin/ld.so is a symbolic link, so debuggers
# do not need it to locate debugging information (they can use
# the real path instead).
sed -i '\,^/usr/lib/debug/usr/bin/ld\.so\.debug$,d' $1
}
# The file does not exist on all architectures.
rm -f %{glibc_sysroot}/usr/lib/debug/usr/bin/ld.so.debug
%ifarch %{debuginfocommonarches}
exclude_common_dirs debuginfocommon.filelist
@ -2763,6 +2809,25 @@ fi
%files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared
%changelog
* Mon Dec 13 2021 Florian Weimer <fweimer@redhat.com> - 2.28-180
- Do not install /usr/lib/debug/usr/bin/ld.so.debug (#2023420)
* Fri Dec 10 2021 Florian Weimer <fweimer@redhat.com> - 2.28-179
- Add /usr/bin/ld.so --list-diagnostics (#2023420)
* Fri Dec 10 2021 Carlos O'Donell <carlos@redhat.com> - 2.28-178
- Preliminary support for new IBM zSeries hardware (#1984802)
* Fri Dec 10 2021 Carlos O'Donell <carlos@redhat.com> - 2.28-177
- Fix --with and --without builds for benchtests and bootstrap (#2020989)
* Wed Dec 1 2021 Florian Weimer <fweimer@redhat.com> - 2.28-176
- A64FX memcpy/memmove/memset optimizations (#1929928)
* Tue Nov 30 2021 Florian Weimer <fweimer@redhat.com> - 2.28-175
- Fix dl-tls.c assert failure with pthread_create & dlopen (#1991001)
- Fix x86_64 TLS lazy binding with auditors (#1950056)
* Thu Nov 25 2021 Arjun Shankar <arjun@redhat.com> - 2.28-174
- Introduce new glibc-doc.noarch subpackage (#2021671)
- Move the reference manual info pages from glibc-devel to glibc-doc