From 87d167a4319a43b06807ec6b48f876d7b4153b5a Mon Sep 17 00:00:00 2001 From: Andrew Lukoshko Date: Fri, 5 Jul 2024 19:23:02 +0000 Subject: [PATCH] Avoid some free (NULL) calls during __tls_get_addr (RHEL-39415) --- SOURCES/glibc-RHEL-39415-2.patch | 282 +++++++++++++++++++++++++++++++ SOURCES/glibc-RHEL-39415.patch | 28 +++ SPECS/glibc.spec | 7 +- 3 files changed, 316 insertions(+), 1 deletion(-) create mode 100644 SOURCES/glibc-RHEL-39415-2.patch create mode 100644 SOURCES/glibc-RHEL-39415.patch diff --git a/SOURCES/glibc-RHEL-39415-2.patch b/SOURCES/glibc-RHEL-39415-2.patch new file mode 100644 index 0000000..ef62adf --- /dev/null +++ b/SOURCES/glibc-RHEL-39415-2.patch @@ -0,0 +1,282 @@ +commit 5397764c8c8435324294b885ad42ed9a73a68d1f +Author: Florian Weimer +Date: Mon Jun 24 16:00:43 2024 +0200 + + elf: Support recursive use of dynamic TLS in interposed malloc + + It turns out that quite a few applications use bundled mallocs that + have been built to use global-dynamic TLS (instead of the recommended + initial-exec TLS). The previous workaround from + commit afe42e935b3ee97bac9a7064157587777259c60e ("elf: Avoid some + free (NULL) calls in _dl_update_slotinfo") does not fix all + encountered cases unfortunatelly. + + This change avoids the TLS generation update for recursive use + of TLS from a malloc that was called during a TLS update. This + is possible because an interposed malloc has a fixed module ID and + TLS slot. (It cannot be unloaded.) If an initially-loaded module ID + is encountered in __tls_get_addr and the dynamic linker is already + in the middle of a TLS update, use the outdated DTV, thus avoiding + another call into malloc. It's still necessary to update the + DTV to the most recent generation, to get out of the slow path, + which is why the check for recursion is needed. + + The bookkeeping is done using a global counter instead of per-thread + flag because TLS access in the dynamic linker is tricky. + + All this will go away once the dynamic linker stops using malloc + for TLS, likely as part of a change that pre-allocates all TLS + during pthread_create/dlopen. + + Fixes commit d2123d68275acc0f061e73d5f86ca504e0d5a344 ("elf: Fix slow + tls access after dlopen [BZ #19924]"). + +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index 231171b72c21828f..720921b4d3b9df17 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -75,6 +75,31 @@ + /* Default for dl_tls_static_optional. */ + #define OPTIONAL_TLS 512 + ++/* Used to count the number of threads currently executing dynamic TLS ++ updates. Used to avoid recursive malloc calls in __tls_get_addr ++ for an interposed malloc that uses global-dynamic TLS (which is not ++ recommended); see _dl_tls_allocate_active checks. This could be a ++ per-thread flag, but would need TLS access in the dynamic linker. */ ++unsigned int _dl_tls_threads_in_update; ++ ++static inline void ++_dl_tls_allocate_begin (void) ++{ ++ atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, 1); ++} ++ ++static inline void ++_dl_tls_allocate_end (void) ++{ ++ atomic_fetch_add_relaxed (&_dl_tls_threads_in_update, -1); ++} ++ ++static inline bool ++_dl_tls_allocate_active (void) ++{ ++ return atomic_load_relaxed (&_dl_tls_threads_in_update) > 0; ++} ++ + /* Compute the static TLS surplus based on the namespace count and the + TLS space that can be used for optimizations. */ + static inline int +@@ -431,12 +456,18 @@ _dl_allocate_tls_storage (void) + size += TLS_PRE_TCB_SIZE; + #endif + +- /* Perform the allocation. Reserve space for the required alignment +- and the pointer to the original allocation. */ ++ /* Reserve space for the required alignment and the pointer to the ++ original allocation. */ + size_t alignment = GLRO (dl_tls_static_align); ++ ++ /* Perform the allocation. */ ++ _dl_tls_allocate_begin (); + void *allocated = malloc (size + alignment + sizeof (void *)); + if (__glibc_unlikely (allocated == NULL)) +- return NULL; ++ { ++ _dl_tls_allocate_end (); ++ return NULL; ++ } + + /* Perform alignment and allocate the DTV. */ + #if TLS_TCB_AT_TP +@@ -472,6 +503,8 @@ _dl_allocate_tls_storage (void) + result = allocate_dtv (result); + if (result == NULL) + free (allocated); ++ ++ _dl_tls_allocate_end (); + return result; + } + +@@ -489,6 +522,7 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid) + size_t newsize = max_modid + DTV_SURPLUS; + size_t oldsize = dtv[-1].counter; + ++ _dl_tls_allocate_begin (); + if (dtv == GL(dl_initial_dtv)) + { + /* This is the initial dtv that was either statically allocated in +@@ -508,6 +542,7 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid) + if (newp == NULL) + oom (); + } ++ _dl_tls_allocate_end (); + + newp[0].counter = newsize; + +@@ -682,7 +717,9 @@ allocate_dtv_entry (size_t alignment, size_t size) + if (powerof2 (alignment) && alignment <= _Alignof (max_align_t)) + { + /* The alignment is supported by malloc. */ ++ _dl_tls_allocate_begin (); + void *ptr = malloc (size); ++ _dl_tls_allocate_end (); + return (struct dtv_pointer) { ptr, ptr }; + } + +@@ -694,7 +731,10 @@ allocate_dtv_entry (size_t alignment, size_t size) + + /* Perform the allocation. This is the pointer we need to free + later. */ ++ _dl_tls_allocate_begin (); + void *start = malloc (alloc_size); ++ _dl_tls_allocate_end (); ++ + if (start == NULL) + return (struct dtv_pointer) {}; + +@@ -832,7 +872,11 @@ _dl_update_slotinfo (unsigned long int req_modid, size_t new_gen) + free implementation. Checking here papers over at + least some dynamic TLS usage by interposed mallocs. */ + if (dtv[modid].pointer.to_free != NULL) +- free (dtv[modid].pointer.to_free); ++ { ++ _dl_tls_allocate_begin (); ++ free (dtv[modid].pointer.to_free); ++ _dl_tls_allocate_end (); ++ } + dtv[modid].pointer.val = TLS_DTV_UNALLOCATED; + dtv[modid].pointer.to_free = NULL; + +@@ -959,13 +1003,25 @@ __tls_get_addr (GET_ADDR_ARGS) + module, but the global generation counter is easier to check (which + must be synchronized up to the generation of the accessed module by + user code doing the TLS access so relaxed mo read is enough). */ +- size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); ++ size_t gen = atomic_load_relaxed (&GL(dl_tls_generation)); + if (__glibc_unlikely (dtv[0].counter != gen)) + { +- /* Update DTV up to the global generation, see CONCURRENCY NOTES +- in _dl_update_slotinfo. */ +- gen = atomic_load_acquire (&GL(dl_tls_generation)); +- return update_get_addr (GET_ADDR_PARAM, gen); ++ if (_dl_tls_allocate_active () ++ && GET_ADDR_MODULE < _dl_tls_initial_modid_limit) ++ /* This is a reentrant __tls_get_addr call, but we can ++ satisfy it because it's an initially-loaded module ID. ++ These TLS slotinfo slots do not change, so the ++ out-of-date generation counter does not matter. However, ++ if not in a TLS update, still update_get_addr below, to ++ get off the slow path eventually. */ ++ ; ++ else ++ { ++ /* Update DTV up to the global generation, see CONCURRENCY NOTES ++ in _dl_update_slotinfo. */ ++ gen = atomic_load_acquire (&GL(dl_tls_generation)); ++ return update_get_addr (GET_ADDR_PARAM, gen); ++ } + } + + void *p = dtv[GET_ADDR_MODULE].pointer.val; +@@ -975,7 +1031,7 @@ __tls_get_addr (GET_ADDR_ARGS) + + return (char *) p + GET_ADDR_OFFSET; + } +-#endif ++#endif /* SHARED */ + + + /* Look up the module's TLS block as for __tls_get_addr, +@@ -1024,6 +1080,25 @@ _dl_tls_get_addr_soft (struct link_map *l) + return data; + } + ++size_t _dl_tls_initial_modid_limit; ++ ++void ++_dl_tls_initial_modid_limit_setup (void) ++{ ++ struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list); ++ size_t idx; ++ for (idx = 0; idx < listp->len; ++idx) ++ { ++ struct link_map *l = listp->slotinfo[idx].map; ++ if (l == NULL ++ /* The object can be unloaded, so its modid can be ++ reassociated. */ ++ || !(l->l_type == lt_executable || l->l_type == lt_library)) ++ break; ++ } ++ _dl_tls_initial_modid_limit = idx; ++} ++ + + void + _dl_add_to_slotinfo (struct link_map *l, bool do_add) +@@ -1056,9 +1131,11 @@ _dl_add_to_slotinfo (struct link_map *l, bool do_add) + the first slot. */ + assert (idx == 0); + ++ _dl_tls_allocate_begin (); + listp = (struct dtv_slotinfo_list *) + malloc (sizeof (struct dtv_slotinfo_list) + + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); ++ _dl_tls_allocate_end (); + if (listp == NULL) + { + /* We ran out of memory while resizing the dtv slotinfo list. */ +diff --git a/elf/rtld.c b/elf/rtld.c +index d973c385b312ea16..15a01f3b175ac065 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -801,6 +801,8 @@ init_tls (size_t naudit) + _dl_fatal_printf ("\ + cannot allocate TLS data structures for initial thread\n"); + ++ _dl_tls_initial_modid_limit_setup (); ++ + /* Store for detection of the special case by __tls_get_addr + so it knows not to pass this dtv to the normal realloc. */ + GL(dl_initial_dtv) = GET_DTV (tcbp); +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 7964e133e4930e88..8bc42ba147ad4c47 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1308,6 +1308,20 @@ extern struct link_map *_dl_update_slotinfo (unsigned long int req_modid, + size_t gen) + attribute_hidden; + ++/* The last TLS module ID that is initially loaded, plus 1. TLS ++ addresses for modules with IDs lower than that can be obtained from ++ the DTV even if its generation is outdated. */ ++extern size_t _dl_tls_initial_modid_limit attribute_hidden attribute_relro; ++ ++/* Compute _dl_tls_initial_modid_limit. To be called after initial ++ relocation. */ ++void _dl_tls_initial_modid_limit_setup (void) attribute_hidden; ++ ++/* Number of threads currently in a TLS update. This is used to ++ detect reentrant __tls_get_addr calls without a per-thread ++ flag. */ ++extern unsigned int _dl_tls_threads_in_update attribute_hidden; ++ + /* Look up the module's TLS block as for __tls_get_addr, + but never touch anything. Return null if it's not allocated yet. */ + extern void *_dl_tls_get_addr_soft (struct link_map *l) attribute_hidden; +diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c +index 4ded8dd6b94edc81..7abfb18413b1d06c 100644 +--- a/sysdeps/x86_64/dl-tls.c ++++ b/sysdeps/x86_64/dl-tls.c +@@ -41,7 +41,10 @@ __tls_get_addr_slow (GET_ADDR_ARGS) + dtv_t *dtv = THREAD_DTV (); + + size_t gen = atomic_load_acquire (&GL(dl_tls_generation)); +- if (__glibc_unlikely (dtv[0].counter != gen)) ++ if (__glibc_unlikely (dtv[0].counter != gen) ++ /* See comment in __tls_get_addr in elf/dl-tls.c. */ ++ && !(_dl_tls_allocate_active () ++ && GET_ADDR_MODULE < _dl_tls_initial_modid_limit)) + return update_get_addr (GET_ADDR_PARAM, gen); + + return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL); diff --git a/SOURCES/glibc-RHEL-39415.patch b/SOURCES/glibc-RHEL-39415.patch new file mode 100644 index 0000000..4d5b841 --- /dev/null +++ b/SOURCES/glibc-RHEL-39415.patch @@ -0,0 +1,28 @@ +Try to cover for incorrect dynamic TLS usage in some malloc +implementations. + +Upstream discussion: + + New TLS usage in libgcc_s.so.1, compatibility impact + + +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index b9dc56e81a3b43db..231171b72c21828f 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -825,7 +825,14 @@ _dl_update_slotinfo (unsigned long int req_modid, size_t new_gen) + dtv entry free it. Note: this is not AS-safe. */ + /* XXX Ideally we will at some point create a memory + pool. */ +- free (dtv[modid].pointer.to_free); ++ /* Avoid calling free on a null pointer. Some mallocs ++ incorrectly use dynamic TLS, and depending on how the ++ free function was compiled, it could call ++ __tls_get_addr before the null pointer check in the ++ free implementation. Checking here papers over at ++ least some dynamic TLS usage by interposed mallocs. */ ++ if (dtv[modid].pointer.to_free != NULL) ++ free (dtv[modid].pointer.to_free); + dtv[modid].pointer.val = TLS_DTV_UNALLOCATED; + dtv[modid].pointer.to_free = NULL; + diff --git a/SPECS/glibc.spec b/SPECS/glibc.spec index 7677e8d..84bd9e8 100644 --- a/SPECS/glibc.spec +++ b/SPECS/glibc.spec @@ -155,7 +155,7 @@ end \ Summary: The GNU libc libraries Name: glibc Version: %{glibcversion} -Release: 100%{?dist}.2 +Release: 100%{?dist}.2.alma.1 # In general, GPLv2+ is used by programs, LGPLv2+ is used for # libraries. @@ -813,6 +813,8 @@ Patch576: glibc-RHEL-34318-1.patch Patch577: glibc-RHEL-34318-2.patch Patch578: glibc-RHEL-34318-3.patch Patch579: glibc-RHEL-34318-4.patch +Patch580: glibc-RHEL-39415.patch +Patch581: glibc-RHEL-39415-2.patch ############################################################################## # Continued list of core "glibc" package information: @@ -2971,6 +2973,9 @@ update_gconv_modules_cache () %endif %changelog +* Fri Jul 05 2024 Andrew Lukoshko - 2.34-100.2.alma.1 +- Avoid some free (NULL) calls during __tls_get_addr (RHEL-39415) + * Mon Apr 29 2024 Florian Weimer - 2.34-100.2 - CVE-2024-33599: nscd: buffer overflow in netgroup cache (RHEL-34318) - CVE-2024-33600: nscd: null pointer dereferences in netgroup cache