From 45033701c6f05ac47509b6eb500d3c78f0be2f18 Mon Sep 17 00:00:00 2001 From: Patsy Griffin Date: Thu, 11 Jul 2024 16:01:24 -0400 Subject: [PATCH] elf: Avoid some free (NULL) calls in _dl_update_slotinfo elf: Support recursive use of dynamic TLS in interposed malloc Resolves: RHEL-39992 --- glibc-RHEL-39992-1.patch | 43 ++++ glibc-RHEL-39992-2.patch | 499 +++++++++++++++++++++++++++++++++++++++ glibc.spec | 8 +- 3 files changed, 549 insertions(+), 1 deletion(-) create mode 100644 glibc-RHEL-39992-1.patch create mode 100644 glibc-RHEL-39992-2.patch diff --git a/glibc-RHEL-39992-1.patch b/glibc-RHEL-39992-1.patch new file mode 100644 index 0000000..040a499 --- /dev/null +++ b/glibc-RHEL-39992-1.patch @@ -0,0 +1,43 @@ +commit afe42e935b3ee97bac9a7064157587777259c60e +Author: Florian Weimer +Date: Mon Jun 3 10:49:40 2024 +0200 + + elf: Avoid some free (NULL) calls in _dl_update_slotinfo + + This has been confirmed to work around some interposed mallocs. Here + is a discussion of the impact test ust/libc-wrapper/test_libc-wrapper + in lttng-tools: + + New TLS usage in libgcc_s.so.1, compatibility impact + + + Reportedly, this patch also papers over a similar issue when tcmalloc + 2.9.1 is not compiled with -ftls-model=initial-exec. Of course the + goal really should be to compile mallocs with the initial-exec TLS + model, but this commit appears to be a useful interim workaround. + + Fixes commit d2123d68275acc0f061e73d5f86ca504e0d5a344 ("elf: Fix slow + tls access after dlopen [BZ #19924]"). + + Reviewed-by: Carlos O'Donell + +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index 7b3dd9ab60..670dbc42fc 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -819,7 +819,14 @@ _dl_update_slotinfo (unsigned long int req_modid, size_t new_gen) + dtv entry free it. Note: this is not AS-safe. */ + /* XXX Ideally we will at some point create a memory + pool. */ +- free (dtv[modid].pointer.to_free); ++ /* Avoid calling free on a null pointer. Some mallocs ++ incorrectly use dynamic TLS, and depending on how the ++ free function was compiled, it could call ++ __tls_get_addr before the null pointer check in the ++ free implementation. Checking here papers over at ++ least some dynamic TLS usage by interposed mallocs. */ ++ if (dtv[modid].pointer.to_free != NULL) ++ free (dtv[modid].pointer.to_free); + dtv[modid].pointer.val = TLS_DTV_UNALLOCATED; + dtv[modid].pointer.to_free = NULL; + diff --git a/glibc-RHEL-39992-2.patch b/glibc-RHEL-39992-2.patch new file mode 100644 index 0000000..df021c9 --- /dev/null +++ b/glibc-RHEL-39992-2.patch @@ -0,0 +1,499 @@ +commit 018f0fc3b818d4d1460a4e2384c24802504b1d20 +Author: Florian Weimer +Date: Mon Jul 1 17:42:04 2024 +0200 + + elf: Support recursive use of dynamic TLS in interposed malloc + + It turns out that quite a few applications use bundled mallocs that + have been built to use global-dynamic TLS (instead of the recommended + initial-exec TLS). The previous workaround from + commit afe42e935b3ee97bac9a7064157587777259c60e ("elf: Avoid some + free (NULL) calls in _dl_update_slotinfo") does not fix all + encountered cases unfortunatelly. + + This change avoids the TLS generation update for recursive use + of TLS from a malloc that was called during a TLS update. This + is possible because an interposed malloc has a fixed module ID and + TLS slot. (It cannot be unloaded.) If an initially-loaded module ID + is encountered in __tls_get_addr and the dynamic linker is already + in the middle of a TLS update, use the outdated DTV, thus avoiding + another call into malloc. It's still necessary to update the + DTV to the most recent generation, to get out of the slow path, + which is why the check for recursion is needed. + + The bookkeeping is done using a global counter instead of per-thread + flag because TLS access in the dynamic linker is tricky. + + All this will go away once the dynamic linker stops using malloc + for TLS, likely as part of a change that pre-allocates all TLS + during pthread_create/dlopen. + + Fixes commit d2123d68275acc0f061e73d5f86ca504e0d5a344 ("elf: Fix slow + tls access after dlopen [BZ #19924]"). + + Reviewed-by: Szabolcs Nagy + + Reworked for RHEL by: Patsy Griffin + +diff -Nrup a/elf/Makefile b/elf/Makefile +--- a/elf/Makefile 2024-07-09 22:06:30.237752048 -0400 ++++ b/elf/Makefile 2024-07-10 14:09:02.996759220 -0400 +@@ -423,6 +423,7 @@ tests += \ + tst-nodeps2 \ + tst-noload \ + tst-null-argv \ ++ tst-recursive-tls \ + tst-relsort1 \ + tst-ro-dynamic \ + tst-rtld-run-static \ +@@ -783,6 +784,23 @@ modules-names = \ + tst-nodeps1-mod \ + tst-nodeps2-mod \ + tst-null-argv-lib \ ++ tst-recursive-tlsmallocmod \ ++ tst-recursive-tlsmod0 \ ++ tst-recursive-tlsmod1 \ ++ tst-recursive-tlsmod2 \ ++ tst-recursive-tlsmod3 \ ++ tst-recursive-tlsmod4 \ ++ tst-recursive-tlsmod5 \ ++ tst-recursive-tlsmod6 \ ++ tst-recursive-tlsmod7 \ ++ tst-recursive-tlsmod8 \ ++ tst-recursive-tlsmod9 \ ++ tst-recursive-tlsmod10 \ ++ tst-recursive-tlsmod11 \ ++ tst-recursive-tlsmod12 \ ++ tst-recursive-tlsmod13 \ ++ tst-recursive-tlsmod14 \ ++ tst-recursive-tlsmod15 \ + tst-relsort1mod1 \ + tst-relsort1mod2 \ + tst-ro-dynamic-mod \ +@@ -2725,3 +2743,11 @@ CFLAGS-tst-tlsgap-mod0.c += -mtls-dialec + CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2 + CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2 + endif ++ ++$(objpfx)tst-recursive-tls: $(objpfx)tst-recursive-tlsmallocmod.so ++# More objects than DTV_SURPLUS, to trigger DTV reallocation. ++$(objpfx)tst-recursive-tls.out: \ ++ $(patsubst %,$(objpfx)tst-recursive-tlsmod%.so, \ ++ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) ++$(objpfx)tst-recursive-tlsmod%.os: tst-recursive-tlsmodN.c ++ $(compile-command.c) -DVAR=thread_$* -DFUNC=get_threadvar_$* +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index 670dbc42fc..3d221273f1 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -75,6 +75,31 @@ + /* Default for dl_tls_static_optional. */ + #define OPTIONAL_TLS 512 + ++/* Used to count the number of threads currently executing dynamic TLS ++ updates. Used to avoid recursive malloc calls in __tls_get_addr ++ for an interposed malloc that uses global-dynamic TLS (which is not ++ recommended); see _dl_tls_allocate_active checks. This could be a ++ per-thread flag, but would need TLS access in the dynamic linker. */ ++unsigned int _dl_tls_threads_in_update; ++ ++static inline void ++_dl_tls_allocate_begin (void) ++{ ++ atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, 1); ++} ++ ++static inline void ++_dl_tls_allocate_end (void) ++{ ++ atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, -1); ++} ++ ++static inline bool ++_dl_tls_allocate_active (void) ++{ ++ return atomic_load_relaxed (&_dl_tls_threads_in_update) > 0; ++} ++ + /* Compute the static TLS surplus based on the namespace count and the + TLS space that can be used for optimizations. */ + static inline int +@@ -425,12 +450,18 @@ _dl_allocate_tls_storage (void) + size += TLS_PRE_TCB_SIZE; + #endif + +- /* Perform the allocation. Reserve space for the required alignment +- and the pointer to the original allocation. */ ++ /* Reserve space for the required alignment and the pointer to the ++ original allocation. */ + size_t alignment = GLRO (dl_tls_static_align); ++ ++ /* Perform the allocation. */ ++ _dl_tls_allocate_begin (); + void *allocated = malloc (size + alignment + sizeof (void *)); + if (__glibc_unlikely (allocated == NULL)) +- return NULL; ++ { ++ _dl_tls_allocate_end (); ++ return NULL; ++ } + + /* Perform alignment and allocate the DTV. */ + #if TLS_TCB_AT_TP +@@ -466,6 +497,8 @@ _dl_allocate_tls_storage (void) + result = allocate_dtv (result); + if (result == NULL) + free (allocated); ++ ++ _dl_tls_allocate_end (); + return result; + } + +@@ -483,6 +516,7 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid) + size_t newsize = max_modid + DTV_SURPLUS; + size_t oldsize = dtv[-1].counter; + ++ _dl_tls_allocate_begin (); + if (dtv == GL(dl_initial_dtv)) + { + /* This is the initial dtv that was either statically allocated in +@@ -502,6 +536,7 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid) + if (newp == NULL) + oom (); + } ++ _dl_tls_allocate_end (); + + newp[0].counter = newsize; + +@@ -676,7 +711,9 @@ allocate_dtv_entry (size_t alignment, size_t size) + if (powerof2 (alignment) && alignment <= _Alignof (max_align_t)) + { + /* The alignment is supported by malloc. */ ++ _dl_tls_allocate_begin (); + void *ptr = malloc (size); ++ _dl_tls_allocate_end (); + return (struct dtv_pointer) { ptr, ptr }; + } + +@@ -688,7 +725,10 @@ allocate_dtv_entry (size_t alignment, size_t size) + + /* Perform the allocation. This is the pointer we need to free + later. */ ++ _dl_tls_allocate_begin (); + void *start = malloc (alloc_size); ++ _dl_tls_allocate_end (); ++ + if (start == NULL) + return (struct dtv_pointer) {}; + +@@ -826,7 +866,11 @@ _dl_update_slotinfo (unsigned long int req_modid, size_t new_gen) + free implementation. Checking here papers over at + least some dynamic TLS usage by interposed mallocs. */ + if (dtv[modid].pointer.to_free != NULL) +- free (dtv[modid].pointer.to_free); ++ { ++ _dl_tls_allocate_begin (); ++ free (dtv[modid].pointer.to_free); ++ _dl_tls_allocate_end (); ++ } + dtv[modid].pointer.val = TLS_DTV_UNALLOCATED; + dtv[modid].pointer.to_free = NULL; + +@@ -956,10 +1000,22 @@ __tls_get_addr (GET_ADDR_ARGS) + size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); + if (__glibc_unlikely (dtv[0].counter != gen)) + { +- /* Update DTV up to the global generation, see CONCURRENCY NOTES +- in _dl_update_slotinfo. */ +- gen = atomic_load_acquire (&GL(dl_tls_generation)); +- return update_get_addr (GET_ADDR_PARAM, gen); ++ if (_dl_tls_allocate_active () ++ && GET_ADDR_MODULE < _dl_tls_initial_modid_limit) ++ /* This is a reentrant __tls_get_addr call, but we can ++ satisfy it because it's an initially-loaded module ID. ++ These TLS slotinfo slots do not change, so the ++ out-of-date generation counter does not matter. However, ++ if not in a TLS update, still update_get_addr below, to ++ get off the slow path eventually. */ ++ ; ++ else ++ { ++ /* Update DTV up to the global generation, see CONCURRENCY NOTES ++ in _dl_update_slotinfo. */ ++ gen = atomic_load_acquire (&GL(dl_tls_generation)); ++ return update_get_addr (GET_ADDR_PARAM, gen); ++ } + } + + void *p = dtv[GET_ADDR_MODULE].pointer.val; +@@ -969,7 +1025,7 @@ __tls_get_addr (GET_ADDR_ARGS) + + return (char *) p + GET_ADDR_OFFSET; + } +-#endif ++#endif /* SHARED */ + + + /* Look up the module's TLS block as for __tls_get_addr, +@@ -1018,6 +1074,25 @@ _dl_tls_get_addr_soft (struct link_map *l) + return data; + } + ++size_t _dl_tls_initial_modid_limit; ++ ++void ++_dl_tls_initial_modid_limit_setup (void) ++{ ++ struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list); ++ size_t idx; ++ for (idx = 0; idx < listp->len; ++idx) ++ { ++ struct link_map *l = listp->slotinfo[idx].map; ++ if (l == NULL ++ /* The object can be unloaded, so its modid can be ++ reassociated. */ ++ || !(l->l_type == lt_executable || l->l_type == lt_library)) ++ break; ++ } ++ _dl_tls_initial_modid_limit = idx; ++} ++ + + void + _dl_add_to_slotinfo (struct link_map *l, bool do_add) +@@ -1050,9 +1125,11 @@ _dl_add_to_slotinfo (struct link_map *l, bool do_add) + the first slot. */ + assert (idx == 0); + ++ _dl_tls_allocate_begin (); + listp = (struct dtv_slotinfo_list *) + malloc (sizeof (struct dtv_slotinfo_list) + + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); ++ _dl_tls_allocate_end (); + if (listp == NULL) + { + /* We ran out of memory while resizing the dtv slotinfo list. */ +diff --git a/elf/rtld.c b/elf/rtld.c +index e9525ea987..6352ba76c5 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -788,6 +788,8 @@ init_tls (size_t naudit) + _dl_fatal_printf ("\ + cannot allocate TLS data structures for initial thread\n"); + ++ _dl_tls_initial_modid_limit_setup (); ++ + /* Store for detection of the special case by __tls_get_addr + so it knows not to pass this dtv to the normal realloc. */ + GL(dl_initial_dtv) = GET_DTV (tcbp); +diff --git a/elf/tst-recursive-tls.c b/elf/tst-recursive-tls.c +new file mode 100644 +index 0000000000..716d1f783a +--- /dev/null ++++ b/elf/tst-recursive-tls.c +@@ -0,0 +1,60 @@ ++/* Test with interposed malloc with dynamic TLS. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++/* Defined in tst-recursive-tlsmallocmod.so. */ ++extern __thread unsigned int malloc_subsytem_counter; ++ ++static int ++do_test (void) ++{ ++ /* 16 is large enough to exercise the DTV resizing case. */ ++ void *handles[16]; ++ ++ for (unsigned int i = 0; i < array_length (handles); ++i) ++ { ++ /* Re-use the TLS slot for module 0. */ ++ if (i > 0) ++ xdlclose (handles[0]); ++ ++ char soname[30]; ++ snprintf (soname, sizeof (soname), "tst-recursive-tlsmod%u.so", i); ++ handles[i] = xdlopen (soname, RTLD_NOW); ++ ++ if (i > 0) ++ { ++ handles[0] = xdlopen ("tst-recursive-tlsmod0.so", RTLD_NOW); ++ int (*fptr) (void) = xdlsym (handles[0], "get_threadvar_0"); ++ /* May trigger TLS storage allocation using malloc. */ ++ TEST_COMPARE (fptr (), 0); ++ } ++ } ++ ++ for (unsigned int i = 0; i < array_length (handles); ++i) ++ xdlclose (handles[i]); ++ ++ printf ("info: malloc subsystem calls: %u\n", malloc_subsytem_counter); ++ TEST_VERIFY (malloc_subsytem_counter > 0); ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-recursive-tlsmallocmod.c b/elf/tst-recursive-tlsmallocmod.c +new file mode 100644 +index 0000000000..c24e9945d1 +--- /dev/null ++++ b/elf/tst-recursive-tlsmallocmod.c +@@ -0,0 +1,64 @@ ++/* Interposed malloc with dynamic TLS. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++__thread unsigned int malloc_subsytem_counter; ++ ++static __typeof (malloc) *malloc_fptr; ++static __typeof (free) *free_fptr; ++static __typeof (calloc) *calloc_fptr; ++static __typeof (realloc) *realloc_fptr; ++ ++static void __attribute__ ((constructor)) ++init (void) ++{ ++ malloc_fptr = dlsym (RTLD_NEXT, "malloc"); ++ free_fptr = dlsym (RTLD_NEXT, "free"); ++ calloc_fptr = dlsym (RTLD_NEXT, "calloc"); ++ realloc_fptr = dlsym (RTLD_NEXT, "realloc"); ++} ++ ++void * ++malloc (size_t size) ++{ ++ ++malloc_subsytem_counter; ++ return malloc_fptr (size); ++} ++ ++void ++free (void *ptr) ++{ ++ ++malloc_subsytem_counter; ++ return free_fptr (ptr); ++} ++ ++void * ++calloc (size_t a, size_t b) ++{ ++ ++malloc_subsytem_counter; ++ return calloc_fptr (a, b); ++} ++ ++void * ++realloc (void *ptr, size_t size) ++{ ++ ++malloc_subsytem_counter; ++ return realloc_fptr (ptr, size); ++} +diff --git a/elf/tst-recursive-tlsmodN.c b/elf/tst-recursive-tlsmodN.c +new file mode 100644 +index 0000000000..bb7592aee6 +--- /dev/null ++++ b/elf/tst-recursive-tlsmodN.c +@@ -0,0 +1,28 @@ ++/* Test module with global-dynamic TLS. Used to trigger DTV reallocation. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Compiled with VAR and FUNC set via -D. FUNC requires some ++ relocation against TLS variable VAR. */ ++ ++__thread int VAR; ++ ++int ++FUNC (void) ++{ ++ return VAR; ++} +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 50f58a60e3..656e8a3fa0 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1256,6 +1256,20 @@ extern struct link_map *_dl_update_slotinfo (unsigned long int req_modid, + size_t gen) + attribute_hidden; + ++/* The last TLS module ID that is initially loaded, plus 1. TLS ++ addresses for modules with IDs lower than that can be obtained from ++ the DTV even if its generation is outdated. */ ++extern size_t _dl_tls_initial_modid_limit attribute_hidden attribute_relro; ++ ++/* Compute _dl_tls_initial_modid_limit. To be called after initial ++ relocation. */ ++void _dl_tls_initial_modid_limit_setup (void) attribute_hidden; ++ ++/* Number of threads currently in a TLS update. This is used to ++ detect reentrant __tls_get_addr calls without a per-thread ++ flag. */ ++extern unsigned int _dl_tls_threads_in_update attribute_hidden; ++ + /* Look up the module's TLS block as for __tls_get_addr, + but never touch anything. Return null if it's not allocated yet. */ + extern void *_dl_tls_get_addr_soft (struct link_map *l) attribute_hidden; +diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c +index 869023bbba..b3c1e4fcd7 100644 +--- a/sysdeps/x86_64/dl-tls.c ++++ b/sysdeps/x86_64/dl-tls.c +@@ -41,7 +41,10 @@ __tls_get_addr_slow (GET_ADDR_ARGS) + dtv_t *dtv = THREAD_DTV (); + + size_t gen = atomic_load_acquire (&GL(dl_tls_generation)); +- if (__glibc_unlikely (dtv[0].counter != gen)) ++ if (__glibc_unlikely (dtv[0].counter != gen) ++ /* See comment in __tls_get_addr in elf/dl-tls.c. */ ++ && !(_dl_tls_allocate_active () ++ && GET_ADDR_MODULE < _dl_tls_initial_modid_limit)) + return update_get_addr (GET_ADDR_PARAM, gen); + + return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL); diff --git a/glibc.spec b/glibc.spec index 22fd006..b5800e3 100644 --- a/glibc.spec +++ b/glibc.spec @@ -155,7 +155,7 @@ end \ Summary: The GNU libc libraries Name: glibc Version: %{glibcversion} -Release: 114%{?dist} +Release: 115%{?dist} # In general, GPLv2+ is used by programs, LGPLv2+ is used for # libraries. @@ -835,6 +835,8 @@ Patch598: glibc-RHEL-34272-2.patch Patch599: glibc-RHEL-39000-1.patch Patch600: glibc-RHEL-39000-2.patch Patch601: glibc-RHEL-39000-3.patch +Patch602: glibc-RHEL-39992-1.patch +Patch603: glibc-RHEL-39992-2.patch ############################################################################## # Continued list of core "glibc" package information: @@ -2993,6 +2995,10 @@ update_gconv_modules_cache () %endif %changelog +* Wed Jul 03 2024 Patsy Griffin - 2.34-115 +- elf: Avoid some free (NULL) calls in _dl_update_slotinfo +- elf: Support recursive use of dynamic TLS in interposed malloc (RHEL-39992) + * Wed Jun 26 2024 Patsy Griffin - 2.34-114 - Update syscall list for Linux 6.9. (RHEL-39000)