diff --git a/glibc-upstream-2.34-133.patch b/glibc-upstream-2.34-133.patch new file mode 100644 index 0000000..a0c4478 --- /dev/null +++ b/glibc-upstream-2.34-133.patch @@ -0,0 +1,178 @@ +commit 738ee53f0ce5e39b9b7a6777f5d3057afbaac498 +Author: John David Anglin +Date: Tue Mar 15 23:12:37 2022 +0000 + + hppa: Implement swapcontext in assembler (bug 28960) + + When swapcontext.c is compiled without -g, the following error occurs: + Error: CFI instruction used without previous .cfi_startproc + + Fix by converting swapcontext routine to assembler. + +diff --git a/sysdeps/unix/sysv/linux/hppa/swapcontext.S b/sysdeps/unix/sysv/linux/hppa/swapcontext.S +new file mode 100644 +index 0000000000000000..94b164dc6375563e +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/hppa/swapcontext.S +@@ -0,0 +1,72 @@ ++/* Swap to new context. ++ Copyright (C) 2008-2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include "ucontext_i.h" ++ ++ .text ++ENTRY(__swapcontext) ++ ++ /* Copy rp to ret0 (r28). */ ++ copy %rp,%ret0 ++ ++ /* Create a frame. */ ++ ldo 64(%sp),%sp ++ .cfi_def_cfa_offset -64 ++ ++ /* Save the current machine context to oucp. */ ++ bl __getcontext,%rp ++ ++ /* Copy oucp to register ret1 (r29). __getcontext saves and ++ restores it on a normal return. It is restored from oR29 ++ on reactivation. */ ++ copy %r26,%ret1 ++ ++ /* Pop frame. */ ++ ldo -64(%sp),%sp ++ .cfi_def_cfa_offset 0 ++ ++ /* Load return pointer from oR28. */ ++ ldw oR28(%ret1),%rp ++ ++ /* Return if error. */ ++ or,= %r0,%ret0,%r0 ++ bv,n %r0(%rp) ++ ++ /* Load sc_sar flag. */ ++ ldb oSAR(%ret1),%r20 ++ ++ /* Return if oucp context has been reactivated. */ ++ or,= %r0,%r20,%r0 ++ bv,n %r0(%rp) ++ ++ /* Mark sc_sar flag. */ ++ ldi 1,%r20 ++ stb %r20,oSAR(%ret1) ++ ++ /* Activate the machine context in ucp. */ ++ bl __setcontext,%rp ++ ldw oR25(%ret1),%r26 ++ ++ /* Load return pointer. */ ++ ldw oR28(%ret1),%rp ++ bv,n %r0(%rp) ++ ++PSEUDO_END(__swapcontext) ++ ++weak_alias (__swapcontext, swapcontext) +diff --git a/sysdeps/unix/sysv/linux/hppa/swapcontext.c b/sysdeps/unix/sysv/linux/hppa/swapcontext.c +deleted file mode 100644 +index 1664f68c7b9982e8..0000000000000000 +--- a/sysdeps/unix/sysv/linux/hppa/swapcontext.c ++++ /dev/null +@@ -1,83 +0,0 @@ +-/* Swap to new context. +- Copyright (C) 2008-2021 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Helge Deller , 2008. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library. If not, see +- . */ +- +-#include +-#include "ucontext_i.h" +- +-extern int __getcontext (ucontext_t *ucp); +-extern int __setcontext (const ucontext_t *ucp); +- +-int +-__swapcontext (ucontext_t *oucp, const ucontext_t *ucp) +-{ +- /* Save rp for debugger. */ +- asm ("stw %rp,-20(%sp)"); +- asm (".cfi_offset 2, -20"); +- +- /* Copy rp to ret0 (r28). */ +- asm ("copy %rp,%ret0"); +- +- /* Create a frame. */ +- asm ("ldo 64(%sp),%sp"); +- asm (".cfi_def_cfa_offset -64"); +- +- /* Save the current machine context to oucp. */ +- asm ("bl __getcontext,%rp"); +- +- /* Copy oucp to register ret1 (r29). __getcontext saves and restores it +- on a normal return. It is restored from oR29 on reactivation. */ +- asm ("copy %r26,%ret1"); +- +- /* Pop frame. */ +- asm ("ldo -64(%sp),%sp"); +- asm (".cfi_def_cfa_offset 0"); +- +- /* Load return pointer from oR28. */ +- asm ("ldw %0(%%ret1),%%rp" : : "i" (oR28)); +- +- /* Return if error. */ +- asm ("or,= %r0,%ret0,%r0"); +- asm ("bv,n %r0(%rp)"); +- +- /* Load sc_sar flag. */ +- asm ("ldb %0(%%ret1),%%r20" : : "i" (oSAR)); +- +- /* Return if oucp context has been reactivated. */ +- asm ("or,= %r0,%r20,%r0"); +- asm ("bv,n %r0(%rp)"); +- +- /* Mark sc_sar flag. */ +- asm ("1: ldi 1,%r20"); +- asm ("stb %%r20,%0(%%ret1)" : : "i" (oSAR)); +- +- /* Activate the machine context in ucp. */ +- asm ("bl __setcontext,%rp"); +- asm ("ldw %0(%%ret1),%%r26" : : "i" (oR25)); +- +- /* Load return pointer. */ +- asm ("ldw %0(%%ret1),%%rp" : : "i" (oR28)); +- +- /* A successful call to setcontext does not return. */ +- asm ("bv,n %r0(%rp)"); +- +- /* Make gcc happy. */ +- return 0; +-} +- +-weak_alias (__swapcontext, swapcontext) diff --git a/glibc-upstream-2.34-134.patch b/glibc-upstream-2.34-134.patch new file mode 100644 index 0000000..79d45ee --- /dev/null +++ b/glibc-upstream-2.34-134.patch @@ -0,0 +1,20 @@ +commit d53b9cc391c72a1011ea8fe7a9f70dc5060a0db2 +Author: John David Anglin +Date: Tue Mar 15 23:04:39 2022 +0000 + + hppa: Use END instead of PSEUDO_END in swapcontext.S + + (cherry picked from commit 7a5c440102d4ec7fafd9bbd98eca9bd90ecaaafd) + +diff --git a/sysdeps/unix/sysv/linux/hppa/swapcontext.S b/sysdeps/unix/sysv/linux/hppa/swapcontext.S +index 94b164dc6375563e..fbc22586d1195a0d 100644 +--- a/sysdeps/unix/sysv/linux/hppa/swapcontext.S ++++ b/sysdeps/unix/sysv/linux/hppa/swapcontext.S +@@ -67,6 +67,6 @@ ENTRY(__swapcontext) + ldw oR28(%ret1),%rp + bv,n %r0(%rp) + +-PSEUDO_END(__swapcontext) ++END(__swapcontext) + + weak_alias (__swapcontext, swapcontext) diff --git a/glibc-upstream-2.34-135.patch b/glibc-upstream-2.34-135.patch new file mode 100644 index 0000000..e5b3840 --- /dev/null +++ b/glibc-upstream-2.34-135.patch @@ -0,0 +1,35 @@ +commit 4b5b8a1cdf39bed02b8b973717796eccde455ed6 +Author: Fangrui Song +Date: Mon Sep 27 10:12:50 2021 -0700 + + powerpc: Delete unneeded ELF_MACHINE_BEFORE_RTLD_RELOC + + Reviewed-by: Raphael M Zinsly + (cherry picked from commit 8e2557a2b85b2eb0ed50a9016a4ffc6b859b97e6) + +diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h +index ced3a7b659cfcff1..b93cf486b6cda5fd 100644 +--- a/sysdeps/powerpc/powerpc32/dl-machine.h ++++ b/sysdeps/powerpc/powerpc32/dl-machine.h +@@ -109,8 +109,6 @@ elf_machine_load_address (void) + return runtime_dynamic - elf_machine_dynamic (); + } + +-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */ +- + /* The PLT uses Elf32_Rela relocs. */ + #define elf_machine_relplt elf_machine_rela + +diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h +index b90f407119efd431..b3f3352bcf5a52b0 100644 +--- a/sysdeps/powerpc/powerpc64/dl-machine.h ++++ b/sysdeps/powerpc/powerpc64/dl-machine.h +@@ -116,8 +116,6 @@ elf_machine_dynamic (void) + return runtime_dynamic - elf_machine_load_address() ; + } + +-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */ +- + /* The PLT uses Elf64_Rela relocs. */ + #define elf_machine_relplt elf_machine_rela + diff --git a/glibc-upstream-2.34-136.patch b/glibc-upstream-2.34-136.patch new file mode 100644 index 0000000..0906c1c --- /dev/null +++ b/glibc-upstream-2.34-136.patch @@ -0,0 +1,2020 @@ +commit b19de59d620b3a9e6adf937f322f4281b67fc712 +Author: Fangrui Song +Date: Thu Oct 7 11:55:02 2021 -0700 + + elf: Avoid nested functions in the loader [BZ #27220] + + dynamic-link.h is included more than once in some elf/ files (rtld.c, + dl-conflict.c, dl-reloc.c, dl-reloc-static-pie.c) and uses GCC nested + functions. This harms readability and the nested functions usage + is the biggest obstacle prevents Clang build (Clang doesn't support GCC + nested functions). + + The key idea for unnesting is to add extra parameters (struct link_map + *and struct r_scope_elm *[]) to RESOLVE_MAP, + ELF_MACHINE_BEFORE_RTLD_RELOC, ELF_DYNAMIC_RELOCATE, elf_machine_rel[a], + elf_machine_lazy_rel, and elf_machine_runtime_setup. (This is inspired + by Stan Shebs' ppc64/x86-64 implementation in the + google/grte/v5-2.27/master which uses mixed extra parameters and static + variables.) + + Future simplification: + * If mips elf_machine_runtime_setup no longer needs RESOLVE_GOTSYM, + elf_machine_runtime_setup can drop the `scope` parameter. + * If TLSDESC no longer need to be in elf_machine_lazy_rel, + elf_machine_lazy_rel can drop the `scope` parameter. + + Tested on aarch64, i386, x86-64, powerpc64le, powerpc64, powerpc32, + sparc64, sparcv9, s390x, s390, hppa, ia64, armhf, alpha, and mips64. + In addition, tested build-many-glibcs.py with {arc,csky,microblaze,nios2}-linux-gnu + and riscv64-linux-gnu-rv64imafdc-lp64d. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 490e6c62aa31a8aa5c4a059f6e646ede121edf0a) + +diff --git a/elf/dl-conflict.c b/elf/dl-conflict.c +index 31a2f90770ce2a55..5c8e51d19ae095d6 100644 +--- a/elf/dl-conflict.c ++++ b/elf/dl-conflict.c +@@ -27,20 +27,12 @@ + #include + #include "dynamic-link.h" + +-void +-_dl_resolve_conflicts (struct link_map *l, ElfW(Rela) *conflict, +- ElfW(Rela) *conflictend) +-{ +-#if ! ELF_MACHINE_NO_RELA +- if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_RELOC)) +- _dl_debug_printf ("\nconflict processing: %s\n", DSO_FILENAME (l->l_name)); +- +- { +- /* Do the conflict relocation of the object and library GOT and other +- data. */ ++/* Used at loading time solely for prelink executable. It is not called ++ concurrently so it is be safe to defined as static. */ ++static struct link_map *resolve_conflict_map __attribute__ ((__unused__)); + + /* This macro is used as a callback from the ELF_DYNAMIC_RELOCATE code. */ +-#define RESOLVE_MAP(ref, version, flags) (*ref = NULL, NULL) ++#define RESOLVE_MAP(map, scope, ref, version, flags) (*ref = NULL, NULL) + #define RESOLVE(ref, version, flags) (*ref = NULL, 0) + #define RESOLVE_CONFLICT_FIND_MAP(map, r_offset) \ + do { \ +@@ -51,12 +43,23 @@ _dl_resolve_conflicts (struct link_map *l, ElfW(Rela) *conflict, + (map) = resolve_conflict_map; \ + } while (0) + ++#include "dynamic-link.h" ++ ++void ++_dl_resolve_conflicts (struct link_map *l, ElfW(Rela) *conflict, ++ ElfW(Rela) *conflictend) ++{ ++#if ! ELF_MACHINE_NO_RELA ++ if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_RELOC)) ++ _dl_debug_printf ("\nconflict processing: %s\n", DSO_FILENAME (l->l_name)); ++ ++ { ++ /* Do the conflict relocation of the object and library GOT and other ++ data. */ ++ + /* Prelinking makes no sense for anything but the main namespace. */ + assert (l->l_ns == LM_ID_BASE); +- struct link_map *resolve_conflict_map __attribute__ ((__unused__)) +- = GL(dl_ns)[LM_ID_BASE]._ns_loaded; +- +-#include "dynamic-link.h" ++ resolve_conflict_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded; + + /* Override these, defined in dynamic-link.h. */ + #undef CHECK_STATIC_TLS +@@ -67,8 +70,8 @@ _dl_resolve_conflicts (struct link_map *l, ElfW(Rela) *conflict, + GL(dl_num_cache_relocations) += conflictend - conflict; + + for (; conflict < conflictend; ++conflict) +- elf_machine_rela (l, conflict, NULL, NULL, (void *) conflict->r_offset, +- 0); ++ elf_machine_rela (l, NULL, conflict, NULL, NULL, ++ (void *) conflict->r_offset, 0); + } + #endif + } +diff --git a/elf/dl-reloc-static-pie.c b/elf/dl-reloc-static-pie.c +index 2fb02d727654c87d..a52ba8aeb8b573cb 100644 +--- a/elf/dl-reloc-static-pie.c ++++ b/elf/dl-reloc-static-pie.c +@@ -19,8 +19,14 @@ + #if ENABLE_STATIC_PIE + /* Mark symbols hidden in static PIE for early self relocation to work. */ + # pragma GCC visibility push(hidden) ++#include + #include + #include ++ ++#include ++ ++#define STATIC_PIE_BOOTSTRAP ++#define RESOLVE_MAP(map, scope, sym, version, flags) map + #include "dynamic-link.h" + + /* Relocate static executable with PIE. */ +@@ -30,11 +36,6 @@ _dl_relocate_static_pie (void) + { + struct link_map *main_map = _dl_get_dl_main_map (); + +-# define STATIC_PIE_BOOTSTRAP +-# define BOOTSTRAP_MAP (main_map) +-# define RESOLVE_MAP(sym, version, flags) BOOTSTRAP_MAP +-# include "dynamic-link.h" +- + /* Figure out the run-time load address of static PIE. */ + main_map->l_addr = elf_machine_load_address (); + +@@ -53,12 +54,12 @@ _dl_relocate_static_pie (void) + elf_get_dynamic_info (main_map); + + # ifdef ELF_MACHINE_BEFORE_RTLD_RELOC +- ELF_MACHINE_BEFORE_RTLD_RELOC (main_map->l_info); ++ ELF_MACHINE_BEFORE_RTLD_RELOC (main_map, main_map->l_info); + # endif + + /* Relocate ourselves so we can do normal function calls and + data access using the global offset table. */ +- ELF_DYNAMIC_RELOCATE (main_map, 0, 0, 0); ++ ELF_DYNAMIC_RELOCATE (main_map, NULL, 0, 0, 0); + main_map->l_relocated = 1; + + /* Initialize _r_debug. */ +diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c +index e13a672ade6d7a28..3447de7f3536cd70 100644 +--- a/elf/dl-reloc.c ++++ b/elf/dl-reloc.c +@@ -162,6 +162,32 @@ _dl_nothread_init_static_tls (struct link_map *map) + } + #endif /* !THREAD_GSCOPE_IN_TCB */ + ++/* This macro is used as a callback from the ELF_DYNAMIC_RELOCATE code. */ ++#define RESOLVE_MAP(l, scope, ref, version, r_type) \ ++ ((ELFW(ST_BIND) ((*ref)->st_info) != STB_LOCAL \ ++ && __glibc_likely (!dl_symbol_visibility_binds_local_p (*ref))) \ ++ ? ((__glibc_unlikely ((*ref) == l->l_lookup_cache.sym) \ ++ && elf_machine_type_class (r_type) == l->l_lookup_cache.type_class) \ ++ ? (bump_num_cache_relocations (), \ ++ (*ref) = l->l_lookup_cache.ret, \ ++ l->l_lookup_cache.value) \ ++ : ({ lookup_t _lr; \ ++ int _tc = elf_machine_type_class (r_type); \ ++ l->l_lookup_cache.type_class = _tc; \ ++ l->l_lookup_cache.sym = (*ref); \ ++ const struct r_found_version *v = NULL; \ ++ if ((version) != NULL && (version)->hash != 0) \ ++ v = (version); \ ++ _lr = _dl_lookup_symbol_x ((const char *) D_PTR (l, l_info[DT_STRTAB]) + (*ref)->st_name, \ ++ l, (ref), scope, v, _tc, \ ++ DL_LOOKUP_ADD_DEPENDENCY \ ++ | DL_LOOKUP_FOR_RELOCATE, NULL); \ ++ l->l_lookup_cache.ret = (*ref); \ ++ l->l_lookup_cache.value = _lr; })) \ ++ : l) ++ ++#include "dynamic-link.h" ++ + void + _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + int reloc_mode, int consider_profiling) +@@ -243,36 +269,7 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + { + /* Do the actual relocation of the object's GOT and other data. */ + +- /* String table object symbols. */ +- const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]); +- +- /* This macro is used as a callback from the ELF_DYNAMIC_RELOCATE code. */ +-#define RESOLVE_MAP(ref, version, r_type) \ +- ((ELFW(ST_BIND) ((*ref)->st_info) != STB_LOCAL \ +- && __glibc_likely (!dl_symbol_visibility_binds_local_p (*ref))) \ +- ? ((__builtin_expect ((*ref) == l->l_lookup_cache.sym, 0) \ +- && elf_machine_type_class (r_type) == l->l_lookup_cache.type_class) \ +- ? (bump_num_cache_relocations (), \ +- (*ref) = l->l_lookup_cache.ret, \ +- l->l_lookup_cache.value) \ +- : ({ lookup_t _lr; \ +- int _tc = elf_machine_type_class (r_type); \ +- l->l_lookup_cache.type_class = _tc; \ +- l->l_lookup_cache.sym = (*ref); \ +- const struct r_found_version *v = NULL; \ +- if ((version) != NULL && (version)->hash != 0) \ +- v = (version); \ +- _lr = _dl_lookup_symbol_x (strtab + (*ref)->st_name, l, (ref), \ +- scope, v, _tc, \ +- DL_LOOKUP_ADD_DEPENDENCY \ +- | DL_LOOKUP_FOR_RELOCATE, NULL); \ +- l->l_lookup_cache.ret = (*ref); \ +- l->l_lookup_cache.value = _lr; })) \ +- : l) +- +-#include "dynamic-link.h" +- +- ELF_DYNAMIC_RELOCATE (l, lazy, consider_profiling, skip_ifunc); ++ ELF_DYNAMIC_RELOCATE (l, scope, lazy, consider_profiling, skip_ifunc); + + #ifndef PROF + if (__glibc_unlikely (consider_profiling) +diff --git a/elf/do-rel.h b/elf/do-rel.h +index 321ac2b359c1028c..f441b749190c2641 100644 +--- a/elf/do-rel.h ++++ b/elf/do-rel.h +@@ -37,8 +37,8 @@ + relocations; they should be set up to call _dl_runtime_resolve, rather + than fully resolved now. */ + +-auto inline void __attribute__ ((always_inline)) +-elf_dynamic_do_Rel (struct link_map *map, ++static inline void __attribute__ ((always_inline)) ++elf_dynamic_do_Rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) reladdr, ElfW(Addr) relsize, + __typeof (((ElfW(Dyn) *) 0)->d_un.d_val) nrelative, + int lazy, int skip_ifunc) +@@ -68,13 +68,13 @@ elf_dynamic_do_Rel (struct link_map *map, + } + else + # endif +- elf_machine_lazy_rel (map, l_addr, r, skip_ifunc); ++ elf_machine_lazy_rel (map, scope, l_addr, r, skip_ifunc); + + # ifdef ELF_MACHINE_IRELATIVE + if (r2 != NULL) + for (; r2 <= end2; ++r2) + if (ELFW(R_TYPE) (r2->r_info) == ELF_MACHINE_IRELATIVE) +- elf_machine_lazy_rel (map, l_addr, r2, skip_ifunc); ++ elf_machine_lazy_rel (map, scope, l_addr, r2, skip_ifunc); + # endif + } + else +@@ -134,7 +134,7 @@ elf_dynamic_do_Rel (struct link_map *map, + #endif + + ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff; +- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], ++ elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], + &map->l_versions[ndx], + (void *) (l_addr + r->r_offset), skip_ifunc); + } +@@ -146,7 +146,7 @@ elf_dynamic_do_Rel (struct link_map *map, + { + ElfW(Half) ndx + = version[ELFW(R_SYM) (r2->r_info)] & 0x7fff; +- elf_machine_rel (map, r2, ++ elf_machine_rel (map, scope, r2, + &symtab[ELFW(R_SYM) (r2->r_info)], + &map->l_versions[ndx], + (void *) (l_addr + r2->r_offset), +@@ -167,14 +167,14 @@ elf_dynamic_do_Rel (struct link_map *map, + } + else + # endif +- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, ++ elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, + (void *) (l_addr + r->r_offset), skip_ifunc); + + # ifdef ELF_MACHINE_IRELATIVE + if (r2 != NULL) + for (; r2 <= end2; ++r2) + if (ELFW(R_TYPE) (r2->r_info) == ELF_MACHINE_IRELATIVE) +- elf_machine_rel (map, r2, &symtab[ELFW(R_SYM) (r2->r_info)], ++ elf_machine_rel (map, scope, r2, &symtab[ELFW(R_SYM) (r2->r_info)], + NULL, (void *) (l_addr + r2->r_offset), + skip_ifunc); + # endif +diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h +index 3eb24ba3a6cee40b..7cc30211649d3820 100644 +--- a/elf/dynamic-link.h ++++ b/elf/dynamic-link.h +@@ -59,31 +59,33 @@ int _dl_try_allocate_static_tls (struct link_map *map, bool optional) + copying memory, breaking the very code written to handle the + unaligned cases. */ + # if ! ELF_MACHINE_NO_REL +-auto inline void __attribute__((always_inline)) +-elf_machine_rel (struct link_map *map, const ElfW(Rel) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, ++static inline void __attribute__((always_inline)) ++elf_machine_rel (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rel) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, + void *const reloc_addr, int skip_ifunc); +-auto inline void __attribute__((always_inline)) ++static inline void __attribute__((always_inline)) + elf_machine_rel_relative (ElfW(Addr) l_addr, const ElfW(Rel) *reloc, + void *const reloc_addr); + # endif + # if ! ELF_MACHINE_NO_RELA +-auto inline void __attribute__((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, +- void *const reloc_addr, int skip_ifunc); +-auto inline void __attribute__((always_inline)) ++static inline void __attribute__((always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rela) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, void *const reloc_addr, ++ int skip_ifunc); ++static inline void __attribute__((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr); + # endif + # if ELF_MACHINE_NO_RELA || defined ELF_MACHINE_PLT_REL +-auto inline void __attribute__((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__((always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, const ElfW(Rel) *reloc, + int skip_ifunc); + # else +-auto inline void __attribute__((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__((always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + int skip_ifunc); + # endif +@@ -114,7 +116,7 @@ elf_machine_lazy_rel (struct link_map *map, + consumes precisely the very end of the DT_REL*, or DT_JMPREL and DT_REL* + are completely separate and there is a gap between them. */ + +-# define _ELF_DYNAMIC_DO_RELOC(RELOC, reloc, map, do_lazy, skip_ifunc, test_rel) \ ++# define _ELF_DYNAMIC_DO_RELOC(RELOC, reloc, map, scope, do_lazy, skip_ifunc, test_rel) \ + do { \ + struct { ElfW(Addr) start, size; \ + __typeof (((ElfW(Dyn) *) 0)->d_un.d_val) nrelative; int lazy; } \ +@@ -152,18 +154,18 @@ elf_machine_lazy_rel (struct link_map *map, + } \ + \ + if (ELF_DURING_STARTUP) \ +- elf_dynamic_do_##reloc ((map), ranges[0].start, ranges[0].size, \ +- ranges[0].nrelative, 0, skip_ifunc); \ ++ elf_dynamic_do_##reloc ((map), scope, ranges[0].start, ranges[0].size, \ ++ ranges[0].nrelative, 0, skip_ifunc); \ + else \ + { \ + int ranges_index; \ + for (ranges_index = 0; ranges_index < 2; ++ranges_index) \ +- elf_dynamic_do_##reloc ((map), \ ++ elf_dynamic_do_##reloc ((map), scope, \ + ranges[ranges_index].start, \ + ranges[ranges_index].size, \ + ranges[ranges_index].nrelative, \ + ranges[ranges_index].lazy, \ +- skip_ifunc); \ ++ skip_ifunc); \ + } \ + } while (0) + +@@ -175,29 +177,29 @@ elf_machine_lazy_rel (struct link_map *map, + + # if ! ELF_MACHINE_NO_REL + # include "do-rel.h" +-# define ELF_DYNAMIC_DO_REL(map, lazy, skip_ifunc) \ +- _ELF_DYNAMIC_DO_RELOC (REL, Rel, map, lazy, skip_ifunc, _ELF_CHECK_REL) ++# define ELF_DYNAMIC_DO_REL(map, scope, lazy, skip_ifunc) \ ++ _ELF_DYNAMIC_DO_RELOC (REL, Rel, map, scope, lazy, skip_ifunc, _ELF_CHECK_REL) + # else +-# define ELF_DYNAMIC_DO_REL(map, lazy, skip_ifunc) /* Nothing to do. */ ++# define ELF_DYNAMIC_DO_REL(map, scope, lazy, skip_ifunc) /* Nothing to do. */ + # endif + + # if ! ELF_MACHINE_NO_RELA + # define DO_RELA + # include "do-rel.h" +-# define ELF_DYNAMIC_DO_RELA(map, lazy, skip_ifunc) \ +- _ELF_DYNAMIC_DO_RELOC (RELA, Rela, map, lazy, skip_ifunc, _ELF_CHECK_REL) ++# define ELF_DYNAMIC_DO_RELA(map, scope, lazy, skip_ifunc) \ ++ _ELF_DYNAMIC_DO_RELOC (RELA, Rela, map, scope, lazy, skip_ifunc, _ELF_CHECK_REL) + # else +-# define ELF_DYNAMIC_DO_RELA(map, lazy, skip_ifunc) /* Nothing to do. */ ++# define ELF_DYNAMIC_DO_RELA(map, scope, lazy, skip_ifunc) /* Nothing to do. */ + # endif + + /* This can't just be an inline function because GCC is too dumb + to inline functions containing inlines themselves. */ +-# define ELF_DYNAMIC_RELOCATE(map, lazy, consider_profile, skip_ifunc) \ ++# define ELF_DYNAMIC_RELOCATE(map, scope, lazy, consider_profile, skip_ifunc) \ + do { \ +- int edr_lazy = elf_machine_runtime_setup ((map), (lazy), \ ++ int edr_lazy = elf_machine_runtime_setup ((map), (scope), (lazy), \ + (consider_profile)); \ +- ELF_DYNAMIC_DO_REL ((map), edr_lazy, skip_ifunc); \ +- ELF_DYNAMIC_DO_RELA ((map), edr_lazy, skip_ifunc); \ ++ ELF_DYNAMIC_DO_REL ((map), (scope), edr_lazy, skip_ifunc); \ ++ ELF_DYNAMIC_DO_RELA ((map), (scope), edr_lazy, skip_ifunc); \ + } while (0) + + #endif +diff --git a/elf/get-dynamic-info.h b/elf/get-dynamic-info.h +index 4aa2058abf6443c9..15c316b38c05a90c 100644 +--- a/elf/get-dynamic-info.h ++++ b/elf/get-dynamic-info.h +@@ -16,18 +16,15 @@ + License along with the GNU C Library; if not, see + . */ + +-/* This file is included multiple times and therefore lacks a header +- file inclusion guard. */ ++/* Populate dynamic tags in l_info. */ ++ ++#ifndef _GET_DYNAMIC_INFO_H ++#define _GET_DYNAMIC_INFO_H + + #include + #include + +-#ifndef RESOLVE_MAP +-static +-#else +-auto +-#endif +-inline void __attribute__ ((unused, always_inline)) ++static inline void __attribute__ ((unused, always_inline)) + elf_get_dynamic_info (struct link_map *l) + { + #if __ELF_NATIVE_CLASS == 32 +@@ -165,3 +162,5 @@ elf_get_dynamic_info (struct link_map *l) + info[DT_RPATH] = NULL; + #endif + } ++ ++#endif +diff --git a/elf/rtld.c b/elf/rtld.c +index 84eac9a8df7125a6..ee45657aeac14f3c 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -502,13 +502,9 @@ _dl_start_final (void *arg, struct dl_start_final_info *info) + return start_addr; + } + +-static ElfW(Addr) __attribute_used__ +-_dl_start (void *arg) +-{ + #ifdef DONT_USE_BOOTSTRAP_MAP + # define bootstrap_map GL(dl_rtld_map) + #else +- struct dl_start_final_info info; + # define bootstrap_map info.l + #endif + +@@ -517,13 +513,16 @@ _dl_start (void *arg) + Since ld.so must not have any undefined symbols the result + is trivial: always the map of ld.so itself. */ + #define RTLD_BOOTSTRAP +-#define BOOTSTRAP_MAP (&bootstrap_map) +-#define RESOLVE_MAP(sym, version, flags) BOOTSTRAP_MAP ++#define RESOLVE_MAP(map, scope, sym, version, flags) map + #include "dynamic-link.h" + ++static ElfW(Addr) __attribute_used__ ++_dl_start (void *arg) ++{ + #ifdef DONT_USE_BOOTSTRAP_MAP + rtld_timer_start (&start_time); + #else ++ struct dl_start_final_info info; + rtld_timer_start (&info.start_time); + #endif + +@@ -557,7 +556,7 @@ _dl_start (void *arg) + #endif + + #ifdef ELF_MACHINE_BEFORE_RTLD_RELOC +- ELF_MACHINE_BEFORE_RTLD_RELOC (bootstrap_map.l_info); ++ ELF_MACHINE_BEFORE_RTLD_RELOC (&bootstrap_map, bootstrap_map.l_info); + #endif + + if (bootstrap_map.l_addr || ! bootstrap_map.l_info[VALIDX(DT_GNU_PRELINKED)]) +@@ -565,7 +564,7 @@ _dl_start (void *arg) + /* Relocate ourselves so we can do normal function calls and + data access using the global offset table. */ + +- ELF_DYNAMIC_RELOCATE (&bootstrap_map, 0, 0, 0); ++ ELF_DYNAMIC_RELOCATE (&bootstrap_map, NULL, 0, 0, 0); + } + bootstrap_map.l_relocated = 1; + +diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h +index d29d827ab32a78ee..34c0790b893a529b 100644 +--- a/sysdeps/aarch64/dl-machine.h ++++ b/sysdeps/aarch64/dl-machine.h +@@ -65,7 +65,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + if (l->l_info[DT_JMPREL] && lazy) + { +@@ -243,10 +244,11 @@ elf_machine_plt_value (struct link_map *map, + + #ifdef RESOLVE_MAP + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rela) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + ElfW(Addr) *const reloc_addr = reloc_addr_arg; +@@ -259,7 +261,8 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + else + { + const ElfW(Sym) *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -383,9 +386,9 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, + *reloc_addr = l_addr + reloc->r_addend; + } + +-inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, + const ElfW(Rela) *reloc, + int skip_ifunc) +@@ -412,7 +415,7 @@ elf_machine_lazy_rel (struct link_map *map, + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); + version = &map->l_versions[vernum[symndx] & 0x7fff]; + } +- elf_machine_rela (map, reloc, sym, version, reloc_addr, ++ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, + skip_ifunc); + return; + } +@@ -439,7 +442,8 @@ elf_machine_lazy_rel (struct link_map *map, + + /* Always initialize TLS descriptors completely, because lazy + initialization requires synchronization at every TLS access. */ +- elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc); ++ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, ++ skip_ifunc); + } + else if (__glibc_unlikely (r_type == AARCH64_R(IRELATIVE))) + { +diff --git a/sysdeps/alpha/dl-machine.h b/sysdeps/alpha/dl-machine.h +index 2cd2213d9ab25287..66e1db524bb378f6 100644 +--- a/sysdeps/alpha/dl-machine.h ++++ b/sysdeps/alpha/dl-machine.h +@@ -70,7 +70,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int +-elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern char _dl_runtime_resolve_new[] attribute_hidden; + extern char _dl_runtime_profile_new[] attribute_hidden; +@@ -361,9 +362,9 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], + const Elf64_Rela *reloc, + const Elf64_Sym *sym, + const struct r_found_version *version, +@@ -411,7 +412,8 @@ elf_machine_rela (struct link_map *map, + return; + else + { +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf64_Addr sym_value; + Elf64_Addr sym_raw_value; + +@@ -489,7 +491,7 @@ elf_machine_rela (struct link_map *map, + can be skipped. */ + #define ELF_MACHINE_REL_RELATIVE 1 + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +@@ -506,9 +508,9 @@ elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + memcpy (reloc_addr_arg, &reloc_addr_val, 8); + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/arc/dl-machine.h b/sysdeps/arc/dl-machine.h +index e6ce7f0ff6d9ac34..4b64ffec256b7f3b 100644 +--- a/sysdeps/arc/dl-machine.h ++++ b/sysdeps/arc/dl-machine.h +@@ -122,7 +122,8 @@ elf_machine_load_address (void) + + static inline int + __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern void _dl_runtime_resolve (void); + +@@ -228,10 +229,11 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t, + + #ifdef RESOLVE_MAP + +-inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rela) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + ElfW(Addr) r_info = reloc->r_info; +@@ -245,7 +247,8 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + else + { + const ElfW(Sym) *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true); + + switch (r_type) +@@ -326,8 +329,9 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + + inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ElfW(Addr) l_addr, +- const ElfW(Rela) *reloc, int skip_ifunc) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], ++ ElfW(Addr) l_addr, const ElfW(Rela) *reloc, ++ int skip_ifunc) + { + ElfW(Addr) *const reloc_addr = (void *) (l_addr + reloc->r_offset); + const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info); +diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h +index ff5e09e207f7986b..7e6761bbe87540d5 100644 +--- a/sysdeps/arm/dl-machine.h ++++ b/sysdeps/arm/dl-machine.h +@@ -84,7 +84,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *got; + extern void _dl_runtime_resolve (Elf32_Word); +@@ -303,7 +304,7 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rel *reloc, + + #ifdef RESOLVE_MAP + /* Handle a PC24 reloc, including the out-of-range case. */ +-auto void ++static void + relocate_pc24 (struct link_map *map, Elf32_Addr value, + Elf32_Addr *const reloc_addr, Elf32_Sword addend) + { +@@ -357,10 +358,11 @@ relocate_pc24 (struct link_map *map, Elf32_Addr value, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++elf_machine_rel (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rel *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -391,7 +393,8 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, + #endif + { + const Elf32_Sym *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -535,10 +538,11 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, + } + + # ifndef RTLD_BOOTSTRAP +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -553,7 +557,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + # ifndef RESOLVE_CONFLICT_FIND_MAP + const Elf32_Sym *const refsym = sym; + # endif +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -628,7 +632,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + # endif + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc, + void *const reloc_addr_arg) +@@ -638,7 +642,7 @@ elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc, + } + + # ifndef RTLD_BOOTSTRAP +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) +@@ -648,9 +652,9 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + } + # endif + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rel *reloc, + int skip_ifunc) + { +@@ -680,7 +684,7 @@ elf_machine_lazy_rel (struct link_map *map, + + /* Always initialize TLS descriptors completely, because lazy + initialization requires synchronization at every TLS access. */ +- elf_machine_rel (map, reloc, sym, version, reloc_addr, skip_ifunc); ++ elf_machine_rel (map, scope, reloc, sym, version, reloc_addr, skip_ifunc); + } + else + _dl_reloc_bad_type (map, r_type, 1); +diff --git a/sysdeps/csky/dl-machine.h b/sysdeps/csky/dl-machine.h +index b08f06d74ca6f8d1..ec22f875772b1291 100644 +--- a/sysdeps/csky/dl-machine.h ++++ b/sysdeps/csky/dl-machine.h +@@ -58,7 +58,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *got; + extern void _dl_runtime_resolve (Elf32_Word); +@@ -215,9 +216,10 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void __attribute__ ((unused, always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++static inline void __attribute__ ((unused, always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -230,7 +232,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + else + { + const Elf32_Sym *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true); + opcode16_addr = (unsigned short *)reloc_addr; + +@@ -331,7 +334,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void __attribute__ ((unused, always_inline)) ++static inline void __attribute__ ((unused, always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) + { +@@ -339,8 +342,8 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void __attribute__ ((unused, always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__ ((unused, always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h +index 24f0f47d8f1e25cd..088931f67065250c 100644 +--- a/sysdeps/hppa/dl-machine.h ++++ b/sysdeps/hppa/dl-machine.h +@@ -70,8 +70,8 @@ __hppa_init_bootstrap_fdesc_table (struct link_map *map) + map->l_mach.fptr_table = boot_table; + } + +-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) \ +- __hppa_init_bootstrap_fdesc_table (BOOTSTRAP_MAP); \ ++#define ELF_MACHINE_BEFORE_RTLD_RELOC(map, dynamic_info) \ ++ __hppa_init_bootstrap_fdesc_table (map); \ + _dl_fptr_init(); + + /* Return nonzero iff ELF header is compatible with the running host. */ +@@ -182,7 +182,8 @@ elf_machine_main_map (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *got = NULL; + Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type, r_sym; +@@ -564,8 +565,8 @@ dl_platform_init (void) + ( (((as14) & 0x1fff) << 1) \ + | (((as14) & 0x2000) >> 13)) + +-auto void __attribute__((always_inline)) +-elf_machine_rela (struct link_map *map, ++static void __attribute__((always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], + const Elf32_Rela *reloc, + const Elf32_Sym *sym, + const struct r_found_version *version, +@@ -594,11 +595,9 @@ elf_machine_rela (struct link_map *map, + zeros, and an all zero Elf32_Sym has a binding of STB_LOCAL.) + See RESOLVE_MAP definition in elf/dl-reloc.c */ + # ifdef RTLD_BOOTSTRAP +- /* RESOLVE_MAP in rtld.c doesn't have the local sym test. */ +- sym_map = (ELF32_ST_BIND (sym->st_info) != STB_LOCAL +- ? RESOLVE_MAP (&sym, version, r_type) : map); ++ sym_map = map; + # else +- sym_map = RESOLVE_MAP (&sym, version, r_type); ++ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + # endif + + if (sym_map) +@@ -756,7 +755,7 @@ elf_machine_rela (struct link_map *map, + + /* hppa doesn't have an R_PARISC_RELATIVE reloc, but uses relocs with + ELF32_R_SYM (info) == 0 for a similar purpose. */ +-auto void __attribute__((always_inline)) ++static void __attribute__((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, + const Elf32_Rela *reloc, + void *const reloc_addr_arg) +@@ -809,8 +808,8 @@ elf_machine_rela_relative (Elf32_Addr l_addr, + *reloc_addr = value; + } + +-auto void __attribute__((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static void __attribute__((always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h +index 590b41d8d7e35005..78ce890c0ff333ca 100644 +--- a/sysdeps/i386/dl-machine.h ++++ b/sysdeps/i386/dl-machine.h +@@ -61,7 +61,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused, always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *got; + extern void _dl_runtime_resolve (Elf32_Word) attribute_hidden; +@@ -291,9 +292,10 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rel *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute ((always_inline)) +-elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, ++elf_machine_rel (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rel *reloc, + const Elf32_Sym *sym, const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { +@@ -327,7 +329,8 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, + # ifndef RTLD_BOOTSTRAP + const Elf32_Sym *const refsym = sym; + # endif +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -498,10 +501,11 @@ and creates an unsatisfiable circular dependency.\n", + } + + # ifndef RTLD_BOOTSTRAP +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -514,7 +518,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + # ifndef RESOLVE_CONFLICT_FIND_MAP + const Elf32_Sym *const refsym = sym; + # endif +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -647,7 +652,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + # endif /* !RTLD_BOOTSTRAP */ + +-auto inline void ++static inline void + __attribute ((always_inline)) + elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc, + void *const reloc_addr_arg) +@@ -658,7 +663,7 @@ elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc, + } + + # ifndef RTLD_BOOTSTRAP +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) +@@ -668,9 +673,9 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + } + # endif /* !RTLD_BOOTSTRAP */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rel *reloc, + int skip_ifunc) + { +@@ -705,13 +710,13 @@ elf_machine_lazy_rel (struct link_map *map, + const ElfW(Half) *const version = + (const void *) D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); + ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff; +- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], ++ elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], + &map->l_versions[ndx], + (void *) (l_addr + r->r_offset), skip_ifunc); + } + # ifndef RTLD_BOOTSTRAP + else +- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, ++ elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, + (void *) (l_addr + r->r_offset), skip_ifunc); + # endif + } +@@ -728,9 +733,9 @@ elf_machine_lazy_rel (struct link_map *map, + + # ifndef RTLD_BOOTSTRAP + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rela (struct link_map *map, ++elf_machine_lazy_rela (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +@@ -754,7 +759,8 @@ elf_machine_lazy_rela (struct link_map *map, + + /* Always initialize TLS descriptors completely at load time, in + case static TLS is allocated for it that requires locking. */ +- elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc); ++ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, ++ skip_ifunc); + } + else if (__glibc_unlikely (r_type == R_386_IRELATIVE)) + { +diff --git a/sysdeps/ia64/dl-machine.h b/sysdeps/ia64/dl-machine.h +index 4403e7767af83546..2217d0b556c17683 100644 +--- a/sysdeps/ia64/dl-machine.h ++++ b/sysdeps/ia64/dl-machine.h +@@ -44,8 +44,8 @@ __ia64_init_bootstrap_fdesc_table (struct link_map *map) + map->l_mach.fptr_table = boot_table; + } + +-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) \ +- __ia64_init_bootstrap_fdesc_table (BOOTSTRAP_MAP); ++#define ELF_MACHINE_BEFORE_RTLD_RELOC(map, dynamic_info) \ ++ __ia64_init_bootstrap_fdesc_table (map); + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int __attribute__ ((unused)) +@@ -98,7 +98,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused, always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern void _dl_runtime_resolve (void); + extern void _dl_runtime_profile (void); +@@ -371,9 +372,9 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + + /* Perform the relocation specified by RELOC and SYM (which is fully + resolved). MAP is the object containing the reloc. */ +-auto inline void ++static inline void + __attribute ((always_inline)) +-elf_machine_rela (struct link_map *map, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], + const Elf64_Rela *reloc, + const Elf64_Sym *sym, + const struct r_found_version *version, +@@ -414,10 +415,11 @@ elf_machine_rela (struct link_map *map, + return; + else + { +- struct link_map *sym_map; ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + + /* RESOLVE_MAP() will return NULL if it fail to locate the symbol. */ +- if ((sym_map = RESOLVE_MAP (&sym, version, r_type))) ++ if (sym_map != NULL) + { + value = SYMBOL_ADDRESS (sym_map, sym, true) + reloc->r_addend; + +@@ -476,7 +478,7 @@ elf_machine_rela (struct link_map *map, + can be skipped. */ + #define ELF_MACHINE_REL_RELATIVE 1 + +-auto inline void ++static inline void + __attribute ((always_inline)) + elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +@@ -489,9 +491,9 @@ elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + } + + /* Perform a RELATIVE reloc on the .got entry that transfers to the .plt. */ +-auto inline void ++static inline void + __attribute ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/m68k/dl-machine.h b/sysdeps/m68k/dl-machine.h +index 86a8c67e2a1b9f77..5e34c4784e348b19 100644 +--- a/sysdeps/m68k/dl-machine.h ++++ b/sysdeps/m68k/dl-machine.h +@@ -68,7 +68,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *got; + extern void _dl_runtime_resolve (Elf32_Word); +@@ -215,9 +216,10 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void __attribute__ ((unused, always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++static inline void __attribute__ ((unused, always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -228,7 +230,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + else + { + const Elf32_Sym *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + switch (r_type) +@@ -303,7 +306,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void __attribute__ ((unused, always_inline)) ++static inline void __attribute__ ((unused, always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) + { +@@ -311,8 +314,8 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void __attribute__ ((unused, always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__ ((unused, always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/microblaze/dl-machine.h b/sysdeps/microblaze/dl-machine.h +index e460f6f195561da1..3fd4988e6093be1c 100644 +--- a/sysdeps/microblaze/dl-machine.h ++++ b/sysdeps/microblaze/dl-machine.h +@@ -69,7 +69,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern void _dl_runtime_resolve (Elf32_Word); + extern void _dl_runtime_profile (Elf32_Word); +@@ -207,9 +208,10 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + ((unsigned short *)(rel_addr))[3] = (val) & 0xffff; \ + } while (0) + +-auto inline void __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++static inline void __attribute__ ((always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -222,7 +224,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + else + { + const Elf32_Sym *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + value += reloc->r_addend; +@@ -277,7 +280,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void ++static inline void + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) + { +@@ -285,8 +288,8 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + PUT_REL_64 (reloc_addr, l_addr + reloc->r_addend); + } + +-auto inline void +-elf_machine_lazy_rel (struct link_map *map, ++static inline void ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/mips/dl-machine.h b/sysdeps/mips/dl-machine.h +index d9c6d33d0cbf1f50..7a821ceb8e518cef 100644 +--- a/sysdeps/mips/dl-machine.h ++++ b/sysdeps/mips/dl-machine.h +@@ -188,9 +188,9 @@ elf_machine_load_address (void) + + /* We can't rely on elf_machine_got_rel because _dl_object_relocation_scope + fiddles with global data. */ +-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) \ ++#define ELF_MACHINE_BEFORE_RTLD_RELOC(bootstrap_map, dynamic_info) \ + do { \ +- struct link_map *map = BOOTSTRAP_MAP; \ ++ struct link_map *map = bootstrap_map; \ + ElfW(Sym) *sym; \ + ElfW(Addr) *got; \ + int i, n; \ +@@ -475,11 +475,12 @@ elf_machine_plt_value (struct link_map *map, const ElfW(Rel) *reloc, + by RELOC_ADDR. SYM is the relocation symbol specified by R_INFO and + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_reloc (struct link_map *map, ElfW(Addr) r_info, +- const ElfW(Sym) *sym, const struct r_found_version *version, +- void *reloc_addr, ElfW(Addr) r_addend, int inplace_p) ++elf_machine_reloc (struct link_map *map, struct r_scope_elem *scope[], ++ ElfW(Addr) r_info, const ElfW(Sym) *sym, ++ const struct r_found_version *version, void *reloc_addr, ++ ElfW(Addr) r_addend, int inplace_p) + { + const unsigned long int r_type = ELFW(R_TYPE) (r_info); + ElfW(Addr) *addr_field = (ElfW(Addr) *) reloc_addr; +@@ -507,7 +508,8 @@ elf_machine_reloc (struct link_map *map, ElfW(Addr) r_info, + case R_MIPS_TLS_TPREL32: + # endif + { +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + + switch (r_type) + { +@@ -647,7 +649,7 @@ elf_machine_reloc (struct link_map *map, ElfW(Addr) r_info, + _dl_signal_error (0, map->l_name, NULL, + "found jump slot relocation with non-zero addend"); + +- sym_map = RESOLVE_MAP (&sym, version, r_type); ++ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + value = SYMBOL_ADDRESS (sym_map, sym, true); + *addr_field = value; + +@@ -661,7 +663,7 @@ elf_machine_reloc (struct link_map *map, ElfW(Addr) r_info, + ElfW(Addr) value; + + /* Calculate the address of the symbol. */ +- sym_map = RESOLVE_MAP (&sym, version, r_type); ++ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (__builtin_expect (sym == NULL, 0)) +@@ -708,16 +710,17 @@ elf_machine_reloc (struct link_map *map, ElfW(Addr) r_info, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rel (struct link_map *map, const ElfW(Rel) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, +- void *const reloc_addr, int skip_ifunc) ++elf_machine_rel (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rel) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, void *const reloc_addr, ++ int skip_ifunc) + { +- elf_machine_reloc (map, reloc->r_info, sym, version, reloc_addr, 0, 1); ++ elf_machine_reloc (map, scope, reloc->r_info, sym, version, reloc_addr, 0, 1); + } + +-auto inline void ++static inline void + __attribute__((always_inline)) + elf_machine_rel_relative (ElfW(Addr) l_addr, const ElfW(Rel) *reloc, + void *const reloc_addr) +@@ -725,9 +728,9 @@ elf_machine_rel_relative (ElfW(Addr) l_addr, const ElfW(Rel) *reloc, + /* XXX Nothing to do. There is no relative relocation, right? */ + } + +-auto inline void ++static inline void + __attribute__((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, const ElfW(Rel) *reloc, + int skip_ifunc) + { +@@ -748,17 +751,17 @@ elf_machine_lazy_rel (struct link_map *map, + _dl_reloc_bad_type (map, r_type, 1); + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], const ElfW(Rela) *reloc, + const ElfW(Sym) *sym, const struct r_found_version *version, + void *const reloc_addr, int skip_ifunc) + { +- elf_machine_reloc (map, reloc->r_info, sym, version, reloc_addr, ++ elf_machine_reloc (map, scope, reloc->r_info, sym, version, reloc_addr, + reloc->r_addend, 0); + } + +-auto inline void ++static inline void + __attribute__((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr) +@@ -767,9 +770,9 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + + #ifndef RTLD_BOOTSTRAP + /* Relocate GOT. */ +-auto inline void ++static inline void + __attribute__((always_inline)) +-elf_machine_got_rel (struct link_map *map, int lazy) ++elf_machine_got_rel (struct link_map *map, struct r_scope_elem *scope[], int lazy) + { + ElfW(Addr) *got; + ElfW(Sym) *sym; +@@ -782,7 +785,7 @@ elf_machine_got_rel (struct link_map *map, int lazy) + const struct r_found_version *version __attribute__ ((unused)) \ + = vernum ? &map->l_versions[vernum[sym_index] & 0x7fff] : NULL; \ + struct link_map *sym_map; \ +- sym_map = RESOLVE_MAP (&ref, version, reloc); \ ++ sym_map = RESOLVE_MAP (map, scope, &ref, version, reloc); \ + SYMBOL_ADDRESS (sym_map, ref, true); \ + }) + +@@ -868,9 +871,10 @@ elf_machine_got_rel (struct link_map *map, int lazy) + /* Set up the loaded object described by L so its stub function + will jump to the on-demand fixup code __dl_runtime_resolve. */ + +-auto inline int ++static inline int + __attribute__((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + # ifndef RTLD_BOOTSTRAP + ElfW(Addr) *got; +@@ -900,7 +904,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) + } + + /* Relocate global offset table. */ +- elf_machine_got_rel (l, lazy); ++ elf_machine_got_rel (l, scope, lazy); + + /* If using PLTs, fill in the first two entries of .got.plt. */ + if (l->l_info[DT_JMPREL] && lazy) +diff --git a/sysdeps/nios2/dl-machine.h b/sysdeps/nios2/dl-machine.h +index e000cd081f18a12b..4de602b13d5500f6 100644 +--- a/sysdeps/nios2/dl-machine.h ++++ b/sysdeps/nios2/dl-machine.h +@@ -67,7 +67,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern void _dl_runtime_resolve (Elf32_Word); + +@@ -234,10 +235,11 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + LOADADDR is the load address of the object; INFO is an array indexed + by DT_* of the .dynamic section info. */ + +-auto inline void __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, +- void *const reloc_addr_arg, int skip_ifunc) ++static inline void __attribute__ ((always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rela) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, ++ void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; + const unsigned int r_type = ELF32_R_TYPE (reloc->r_info); +@@ -249,7 +251,8 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + else + { + const Elf32_Sym *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + switch (r_type) +@@ -314,7 +317,7 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + } + } + +-auto inline void __attribute__((always_inline)) ++static inline void __attribute__((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr_arg) + { +@@ -322,8 +325,8 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void __attribute__((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__((always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h +index b93cf486b6cda5fd..cda012dc1b822254 100644 +--- a/sysdeps/powerpc/powerpc32/dl-machine.h ++++ b/sysdeps/powerpc/powerpc32/dl-machine.h +@@ -170,7 +170,7 @@ extern int __elf_machine_runtime_setup (struct link_map *map, + int lazy, int profile); + + static inline int +-elf_machine_runtime_setup (struct link_map *map, ++elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[], + int lazy, int profile) + { + if (map->l_info[DT_JMPREL] == 0) +@@ -284,9 +284,10 @@ extern void _dl_reloc_overflow (struct link_map *map, + LOADADDR is the load address of the object; INFO is an array indexed + by DT_* of the .dynamic section info. */ + +-auto inline void __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++static inline void __attribute__ ((always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -315,7 +316,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + else + { +- sym_map = RESOLVE_MAP (&sym, version, r_type); ++ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + value = SYMBOL_ADDRESS (sym_map, sym, true); + } + value += reloc->r_addend; +@@ -439,7 +440,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void __attribute__ ((always_inline)) ++static inline void __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) + { +@@ -447,8 +448,8 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__ ((always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h +index b3f3352bcf5a52b0..3f92fbb369eb5023 100644 +--- a/sysdeps/powerpc/powerpc64/dl-machine.h ++++ b/sysdeps/powerpc/powerpc64/dl-machine.h +@@ -343,7 +343,8 @@ dl_platform_init (void) + /* Set up the loaded object described by MAP so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ + static inline int __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *map, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + if (map->l_info[DT_JMPREL]) + { +@@ -618,7 +619,7 @@ extern void attribute_hidden _dl_reloc_overflow (struct link_map *map, + Elf64_Addr *const reloc_addr, + const Elf64_Sym *refsym); + +-auto inline void __attribute__ ((always_inline)) ++static inline void __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) + { +@@ -627,7 +628,7 @@ elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + } + + /* This computes the value used by TPREL* relocs. */ +-auto inline Elf64_Addr __attribute__ ((always_inline, const)) ++static inline Elf64_Addr __attribute__ ((always_inline, const)) + elf_machine_tprel (struct link_map *map, + struct link_map *sym_map, + const Elf64_Sym *sym, +@@ -646,7 +647,7 @@ elf_machine_tprel (struct link_map *map, + } + + /* Call function at address VALUE (an OPD entry) to resolve ifunc relocs. */ +-auto inline Elf64_Addr __attribute__ ((always_inline)) ++static inline Elf64_Addr __attribute__ ((always_inline)) + resolve_ifunc (Elf64_Addr value, + const struct link_map *map, const struct link_map *sym_map) + { +@@ -676,8 +677,8 @@ resolve_ifunc (Elf64_Addr value, + + /* Perform the relocation specified by RELOC and SYM (which is fully + resolved). MAP is the object containing the reloc. */ +-auto inline void __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, ++static inline void __attribute__ ((always_inline)) ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], + const Elf64_Rela *reloc, + const Elf64_Sym *sym, + const struct r_found_version *version, +@@ -705,7 +706,7 @@ elf_machine_rela (struct link_map *map, + + /* We need SYM_MAP even in the absence of TLS, for elf_machine_fixup_plt + and STT_GNU_IFUNC. */ +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + Elf64_Addr value = SYMBOL_ADDRESS (sym_map, sym, true) + reloc->r_addend; + + if (sym != NULL +@@ -1035,8 +1036,8 @@ elf_machine_rela (struct link_map *map, + MODIFIED_CODE_NOQUEUE (reloc_addr); + } + +-auto inline void __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++static inline void __attribute__ ((always_inline)) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/riscv/dl-machine.h b/sysdeps/riscv/dl-machine.h +index 951268923da26a37..343c0feb6b437001 100644 +--- a/sysdeps/riscv/dl-machine.h ++++ b/sysdeps/riscv/dl-machine.h +@@ -168,17 +168,18 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t, + by RELOC_ADDR. SYM is the relocation symbol specified by R_INFO and + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rela) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, + void *const reloc_addr, int skip_ifunc) + { + ElfW(Addr) r_info = reloc->r_info; + const unsigned long int r_type = ELFW (R_TYPE) (r_info); + ElfW(Addr) *addr_field = (ElfW(Addr) *) reloc_addr; + const ElfW(Sym) *const __attribute__ ((unused)) refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + ElfW(Addr) value = 0; + if (sym_map != NULL) + value = SYMBOL_ADDRESS (sym_map, sym, true) + reloc->r_addend; +@@ -286,7 +287,7 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + } + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr) +@@ -294,10 +295,11 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + *(ElfW(Addr) *) reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ElfW(Addr) l_addr, +- const ElfW(Rela) *reloc, int skip_ifunc) ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], ++ ElfW(Addr) l_addr, const ElfW(Rela) *reloc, ++ int skip_ifunc) + { + ElfW(Addr) *const reloc_addr = (void *) (l_addr + reloc->r_offset); + const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info); +@@ -327,9 +329,10 @@ elf_machine_lazy_rel (struct link_map *map, ElfW(Addr) l_addr, + /* Set up the loaded object described by L so its stub function + will jump to the on-demand fixup code __dl_runtime_resolve. */ + +-auto inline int ++static inline int + __attribute__ ((always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + #ifndef RTLD_BOOTSTRAP + /* If using PLTs, fill in the first two entries of .got.plt. */ +diff --git a/sysdeps/s390/s390-32/dl-machine.h b/sysdeps/s390/s390-32/dl-machine.h +index d0ccd69261c8f55b..96a5e80c846c816a 100644 +--- a/sysdeps/s390/s390-32/dl-machine.h ++++ b/sysdeps/s390/s390-32/dl-machine.h +@@ -85,7 +85,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern void _dl_runtime_resolve (Elf32_Word); + extern void _dl_runtime_profile (Elf32_Word); +@@ -321,10 +322,11 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -357,7 +359,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + /* Only needed for R_390_COPY below. */ + const Elf32_Sym *const refsym = sym; + #endif +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -484,7 +487,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) +@@ -493,9 +496,9 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/s390/s390-64/dl-machine.h b/sysdeps/s390/s390-64/dl-machine.h +index 543361c83637c071..c94d09b9c8512738 100644 +--- a/sysdeps/s390/s390-64/dl-machine.h ++++ b/sysdeps/s390/s390-64/dl-machine.h +@@ -75,7 +75,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + extern void _dl_runtime_resolve (Elf64_Word); + extern void _dl_runtime_profile (Elf64_Word); +@@ -268,10 +269,11 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, +- const Elf64_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf64_Rela *reloc, const Elf64_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf64_Addr *const reloc_addr = reloc_addr_arg; +@@ -304,7 +306,8 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, + /* Only needed for R_390_COPY below. */ + const Elf64_Sym *const refsym = sym; + #endif +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + Elf64_Addr value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -438,7 +441,7 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, + } + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +@@ -447,9 +450,9 @@ elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/sh/dl-machine.h b/sysdeps/sh/dl-machine.h +index 122b417a17e2ef9b..0c22dfd8487a516e 100644 +--- a/sysdeps/sh/dl-machine.h ++++ b/sysdeps/sh/dl-machine.h +@@ -69,7 +69,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused, always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *got; + extern void _dl_runtime_resolve (Elf32_Word); +@@ -259,10 +260,11 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -318,7 +320,8 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + else + { + const Elf32_Sym *const refsym = sym; +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + + value = SYMBOL_ADDRESS (sym_map, sym, true); + value += reloc->r_addend; +@@ -424,7 +427,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) +@@ -443,9 +446,9 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + #undef COPY_UNALIGNED_WORD + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h +index 0269e458ea2b3bca..6361cfae9eb8fa58 100644 +--- a/sysdeps/sparc/sparc32/dl-machine.h ++++ b/sysdeps/sparc/sparc32/dl-machine.h +@@ -97,7 +97,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf32_Addr *plt; + extern void _dl_runtime_resolve (Elf32_Word); +@@ -327,10 +328,11 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, +- const Elf32_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf32_Rela *reloc, const Elf32_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf32_Addr *const reloc_addr = reloc_addr_arg; +@@ -381,7 +383,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + else + { +- sym_map = RESOLVE_MAP (&sym, version, r_type); ++ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + value = SYMBOL_ADDRESS (sym_map, sym, true); + } + #else +@@ -536,7 +538,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, + } + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + void *const reloc_addr_arg) +@@ -545,9 +547,9 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc, + *reloc_addr += l_addr + reloc->r_addend; + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf32_Addr l_addr, const Elf32_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h +index bbd4566d8a595f93..3fd18c6e5ef21e38 100644 +--- a/sysdeps/sparc/sparc64/dl-machine.h ++++ b/sysdeps/sparc/sparc64/dl-machine.h +@@ -126,7 +126,8 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + if (l->l_info[DT_JMPREL] && lazy) + { +@@ -354,10 +355,11 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, +- const Elf64_Sym *sym, const struct r_found_version *version, ++elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], ++ const Elf64_Rela *reloc, const Elf64_Sym *sym, ++ const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) + { + Elf64_Addr *const reloc_addr = reloc_addr_arg; +@@ -408,7 +410,7 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, + } + else + { +- sym_map = RESOLVE_MAP (&sym, version, r_type); ++ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type); + value = SYMBOL_ADDRESS (sym_map, sym, true); + } + #else +@@ -646,7 +648,7 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, + } + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +@@ -655,9 +657,9 @@ elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + *reloc_addr = l_addr + reloc->r_addend; + } + +-auto inline void ++static inline void + __attribute__ ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) + { +diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h +index a8596aa3fa489eff..d3fcbb37bf1f4f7c 100644 +--- a/sysdeps/x86_64/dl-machine.h ++++ b/sysdeps/x86_64/dl-machine.h +@@ -62,7 +62,8 @@ elf_machine_load_address (void) + entries will jump to the on-demand fixup code in dl-runtime.c. */ + + static inline int __attribute__ ((unused, always_inline)) +-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) ++elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], ++ int lazy, int profile) + { + Elf64_Addr *got; + extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden; +@@ -258,12 +259,11 @@ elf_machine_plt_value (struct link_map *map, const ElfW(Rela) *reloc, + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +-auto inline void +-__attribute__ ((always_inline)) +-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, +- const ElfW(Sym) *sym, const struct r_found_version *version, +- void *const reloc_addr_arg, int skip_ifunc) +-{ ++static inline void __attribute__((always_inline)) ++elf_machine_rela(struct link_map *map, struct r_scope_elem *scope[], ++ const ElfW(Rela) *reloc, const ElfW(Sym) *sym, ++ const struct r_found_version *version, ++ void *const reloc_addr_arg, int skip_ifunc) { + ElfW(Addr) *const reloc_addr = reloc_addr_arg; + const unsigned long int r_type = ELFW(R_TYPE) (reloc->r_info); + +@@ -300,7 +300,8 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + # ifndef RTLD_BOOTSTRAP + const ElfW(Sym) *const refsym = sym; + # endif +- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type); ++ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, ++ r_type); + ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true); + + if (sym != NULL +@@ -525,7 +526,7 @@ and creates an unsatisfiable circular dependency.\n", + } + } + +-auto inline void ++static inline void + __attribute ((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr_arg) +@@ -544,9 +545,9 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + } + } + +-auto inline void ++static inline void + __attribute ((always_inline)) +-elf_machine_lazy_rel (struct link_map *map, ++elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + int skip_ifunc) + { +@@ -580,7 +581,7 @@ elf_machine_lazy_rel (struct link_map *map, + + /* Always initialize TLS descriptors completely at load time, in + case static TLS is allocated for it that requires locking. */ +- elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc); ++ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, skip_ifunc); + } + else if (__glibc_unlikely (r_type == R_X86_64_IRELATIVE)) + { diff --git a/glibc-upstream-2.34-137.patch b/glibc-upstream-2.34-137.patch new file mode 100644 index 0000000..06aaa91 --- /dev/null +++ b/glibc-upstream-2.34-137.patch @@ -0,0 +1,241 @@ +commit c6df39a0bd2aafd2a4280a0000ef201f30273bee +Author: Adhemerval Zanella +Date: Mon Oct 11 16:01:49 2021 -0300 + + elf: Fix elf_get_dynamic_info definition + + Before to 490e6c62aa31a8a ('elf: Avoid nested functions in the loader + [BZ #27220]'), elf_get_dynamic_info() was defined twice on rtld.c: on + the first dynamic-link.h include and later within _dl_start(). The + former definition did not define DONT_USE_BOOTSTRAP_MAP and it is used + on setup_vdso() (since it is a global definition), while the former does + define DONT_USE_BOOTSTRAP_MAP and it is used on loader self-relocation. + + With the commit change, the function is now included and defined once + instead of defined as a nested function. So rtld.c defines without + defining RTLD_BOOTSTRAP and it brokes at least powerpc32. + + This patch fixes by moving the get-dynamic-info.h include out of + dynamic-link.h, which then the caller can corirectly set the expected + semantic by defining STATIC_PIE_BOOTSTRAP, RTLD_BOOTSTRAP, and/or + RESOLVE_MAP. + + It also required to enable some asserts only for the loader bootstrap + to avoid issues when called from setup_vdso(). + + As a side note, this is another issues with nested functions: it is + not clear from pre-processed output (-E -dD) how the function will + be build and its semantic (since nested function will be local and + extra C defines may change it). + + I checked on x86_64-linux-gnu (w/o --enable-static-pie), + i686-linux-gnu, powerpc64-linux-gnu, powerpc-linux-gnu-power4, + aarch64-linux-gnu, arm-linux-gnu, sparc64-linux-gnu, and + s390x-linux-gnu. + + Reviewed-by: Fangrui Song + (cherry picked from commit 4af6982e4c9fc465ffb7a54b794aaaa134241f05) + + Resolved conflicts: + elf/rtld.c + +diff --git a/elf/dl-conflict.c b/elf/dl-conflict.c +index 5c8e51d19ae095d6..d54356dee3f86ae0 100644 +--- a/elf/dl-conflict.c ++++ b/elf/dl-conflict.c +@@ -17,6 +17,7 @@ + License along with the GNU C Library; see the file COPYING.LIB. If + not, see . */ + ++#include + #include + #include + #include +diff --git a/elf/dl-load.c b/elf/dl-load.c +index 0976977fbdf21902..eea06629a978aaf3 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -58,6 +58,7 @@ struct filebuf + }; + + #include "dynamic-link.h" ++#include "get-dynamic-info.h" + #include + #include + #include +@@ -1295,7 +1296,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, + if (l->l_ld != 0) + l->l_ld = (ElfW(Dyn) *) ((ElfW(Addr)) l->l_ld + l->l_addr); + +- elf_get_dynamic_info (l); ++ elf_get_dynamic_info (l, false); + + /* Make sure we are not dlopen'ing an object that has the + DF_1_NOOPEN flag set, or a PIE object. */ +diff --git a/elf/dl-reloc-static-pie.c b/elf/dl-reloc-static-pie.c +index a52ba8aeb8b573cb..f323b4dd0d5ba279 100644 +--- a/elf/dl-reloc-static-pie.c ++++ b/elf/dl-reloc-static-pie.c +@@ -28,6 +28,7 @@ + #define STATIC_PIE_BOOTSTRAP + #define RESOLVE_MAP(map, scope, sym, version, flags) map + #include "dynamic-link.h" ++#include "get-dynamic-info.h" + + /* Relocate static executable with PIE. */ + +@@ -51,7 +52,7 @@ _dl_relocate_static_pie (void) + break; + } + +- elf_get_dynamic_info (main_map); ++ elf_get_dynamic_info (main_map, false); + + # ifdef ELF_MACHINE_BEFORE_RTLD_RELOC + ELF_MACHINE_BEFORE_RTLD_RELOC (main_map, main_map->l_info); +diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c +index 9d0d941000f6114f..61c260ddb81b586c 100644 +--- a/elf/dl-runtime.c ++++ b/elf/dl-runtime.c +@@ -19,6 +19,7 @@ + #define IN_DL_RUNTIME 1 /* This can be tested in dl-machine.h. */ + + #include ++#include + #include + #include + #include +diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h +index 7cc30211649d3820..21cdfc88bbfb89ea 100644 +--- a/elf/dynamic-link.h ++++ b/elf/dynamic-link.h +@@ -93,7 +93,6 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + + #include + +-#include "get-dynamic-info.h" + + #ifdef RESOLVE_MAP + +diff --git a/elf/get-dynamic-info.h b/elf/get-dynamic-info.h +index 15c316b38c05a90c..d169099fbc9897cf 100644 +--- a/elf/get-dynamic-info.h ++++ b/elf/get-dynamic-info.h +@@ -25,7 +25,7 @@ + #include + + static inline void __attribute__ ((unused, always_inline)) +-elf_get_dynamic_info (struct link_map *l) ++elf_get_dynamic_info (struct link_map *l, bool check) + { + #if __ELF_NATIVE_CLASS == 32 + typedef Elf32_Word d_tag_utype; +@@ -112,16 +112,19 @@ elf_get_dynamic_info (struct link_map *l) + assert (info[DT_RELENT]->d_un.d_val == sizeof (ElfW(Rel))); + #endif + #ifdef RTLD_BOOTSTRAP +- /* Only the bind now flags are allowed. */ +- assert (info[VERSYMIDX (DT_FLAGS_1)] == NULL +- || (info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val & ~DF_1_NOW) == 0); +- /* Flags must not be set for ld.so. */ +- assert (info[DT_FLAGS] == NULL +- || (info[DT_FLAGS]->d_un.d_val & ~DF_BIND_NOW) == 0); +-#endif +-#if defined RTLD_BOOTSTRAP || defined STATIC_PIE_BOOTSTRAP +- assert (info[DT_RUNPATH] == NULL); +- assert (info[DT_RPATH] == NULL); ++ if (check) ++ { ++ /* Only the bind now flags are allowed. */ ++ assert (info[VERSYMIDX (DT_FLAGS_1)] == NULL ++ || (info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val & ~DF_1_NOW) == 0); ++ /* Flags must not be set for ld.so. */ ++ assert (info[DT_FLAGS] == NULL ++ || (info[DT_FLAGS]->d_un.d_val & ~DF_BIND_NOW) == 0); ++# ifdef STATIC_PIE_BOOTSTRAP ++ assert (info[DT_RUNPATH] == NULL); ++ assert (info[DT_RPATH] == NULL); ++# endif ++ } + #else + if (info[DT_FLAGS] != NULL) + { +diff --git a/elf/rtld.c b/elf/rtld.c +index ee45657aeac14f3c..352d596dedb42e79 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -514,6 +514,7 @@ _dl_start_final (void *arg, struct dl_start_final_info *info) + is trivial: always the map of ld.so itself. */ + #define RTLD_BOOTSTRAP + #define RESOLVE_MAP(map, scope, sym, version, flags) map ++#include "get-dynamic-info.h" + #include "dynamic-link.h" + + static ElfW(Addr) __attribute_used__ +@@ -549,7 +550,7 @@ _dl_start (void *arg) + /* Read our own dynamic section and fill in the info array. */ + bootstrap_map.l_ld = (void *) bootstrap_map.l_addr + elf_machine_dynamic (); + bootstrap_map.l_ld_readonly = DL_RO_DYN_SECTION; +- elf_get_dynamic_info (&bootstrap_map); ++ elf_get_dynamic_info (&bootstrap_map, true); + + #if NO_TLS_OFFSET != 0 + bootstrap_map.l_tls_offset = NO_TLS_OFFSET; +@@ -1653,7 +1654,7 @@ dl_main (const ElfW(Phdr) *phdr, + if (! rtld_is_main) + { + /* Extract the contents of the dynamic section for easy access. */ +- elf_get_dynamic_info (main_map); ++ elf_get_dynamic_info (main_map, false); + + /* If the main map is libc.so, update the base namespace to + refer to this map. If libc.so is loaded later, this happens +diff --git a/elf/setup-vdso.h b/elf/setup-vdso.h +index f44748bc9858e5fd..6fdffafcca5e9916 100644 +--- a/elf/setup-vdso.h ++++ b/elf/setup-vdso.h +@@ -64,7 +64,7 @@ setup_vdso (struct link_map *main_map __attribute__ ((unused)), + l->l_map_end += l->l_addr; + l->l_text_end += l->l_addr; + l->l_ld = (void *) ((ElfW(Addr)) l->l_ld + l->l_addr); +- elf_get_dynamic_info (l); ++ elf_get_dynamic_info (l, false); + _dl_setup_hash (l); + l->l_relocated = 1; + +diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h +index 7e6761bbe87540d5..86f866ca7c17bd9b 100644 +--- a/sysdeps/arm/dl-machine.h ++++ b/sysdeps/arm/dl-machine.h +@@ -21,6 +21,7 @@ + + #define ELF_MACHINE_NAME "ARM" + ++#include + #include + #include + #include +diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h +index 78ce890c0ff333ca..fa902612ca8557f9 100644 +--- a/sysdeps/i386/dl-machine.h ++++ b/sysdeps/i386/dl-machine.h +@@ -21,6 +21,7 @@ + + #define ELF_MACHINE_NAME "i386" + ++#include + #include + #include + #include +diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h +index d3fcbb37bf1f4f7c..90c77cfea1de8d63 100644 +--- a/sysdeps/x86_64/dl-machine.h ++++ b/sysdeps/x86_64/dl-machine.h +@@ -22,6 +22,7 @@ + + #define ELF_MACHINE_NAME "x86_64" + ++#include + #include + #include + #include diff --git a/glibc-upstream-2.34-138.patch b/glibc-upstream-2.34-138.patch new file mode 100644 index 0000000..778ada9 --- /dev/null +++ b/glibc-upstream-2.34-138.patch @@ -0,0 +1,1115 @@ +commit b868b45f6763a4adc4aa93248be9f84480768fcf +Author: Adhemerval Zanella +Date: Wed Oct 13 09:49:34 2021 -0300 + + elf: Fix dynamic-link.h usage on rtld.c + + The 4af6982e4c fix does not fully handle RTLD_BOOTSTRAP usage on + rtld.c due two issues: + + 1. RTLD_BOOTSTRAP is also used on dl-machine.h on various + architectures and it changes the semantics of various machine + relocation functions. + + 2. The elf_get_dynamic_info() change was done sideways, previously + to 490e6c62aa get-dynamic-info.h was included by the first + dynamic-link.h include *without* RTLD_BOOTSTRAP being defined. + It means that the code within elf_get_dynamic_info() that uses + RTLD_BOOTSTRAP is in fact unused. + + To fix 1. this patch now includes dynamic-link.h only once with + RTLD_BOOTSTRAP defined. The ELF_DYNAMIC_RELOCATE call will now have + the relocation fnctions with the expected semantics for the loader. + + And to fix 2. part of 4af6982e4c is reverted (the check argument + elf_get_dynamic_info() is not required) and the RTLD_BOOTSTRAP + pieces are removed. + + To reorganize the includes the static TLS definition is moved to + its own header to avoid a circular dependency (it is defined on + dynamic-link.h and dl-machine.h requires it at same time other + dynamic-link.h definition requires dl-machine.h defitions). + + Also ELF_MACHINE_NO_REL, ELF_MACHINE_NO_RELA, and ELF_MACHINE_PLT_REL + are moved to its own header. Only ancient ABIs need special values + (arm, i386, and mips), so a generic one is used as default. + + The powerpc Elf64_FuncDesc is also moved to its own header, since + csu code required its definition (which would require either include + elf/ folder or add a full path with elf/). + + Checked on x86_64, i686, aarch64, armhf, powerpc64, powerpc32, + and powerpc64le. + + Reviewed-by: Szabolcs Nagy + (cherry picked from commit d6d89608ac8cf2b37c75debad1fff653f6939f90) + + Resolved conflicts: + elf/rtld.c + +Conflicts: + elf/rtld.c - Manual merge around dl-execve.h include. + +diff --git a/elf/dl-load.c b/elf/dl-load.c +index eea06629a978aaf3..fb3da5aa565908a6 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1296,7 +1296,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, + if (l->l_ld != 0) + l->l_ld = (ElfW(Dyn) *) ((ElfW(Addr)) l->l_ld + l->l_addr); + +- elf_get_dynamic_info (l, false); ++ elf_get_dynamic_info (l); + + /* Make sure we are not dlopen'ing an object that has the + DF_1_NOOPEN flag set, or a PIE object. */ +diff --git a/elf/dl-reloc-static-pie.c b/elf/dl-reloc-static-pie.c +index f323b4dd0d5ba279..ababafcf98f9945d 100644 +--- a/elf/dl-reloc-static-pie.c ++++ b/elf/dl-reloc-static-pie.c +@@ -52,7 +52,7 @@ _dl_relocate_static_pie (void) + break; + } + +- elf_get_dynamic_info (main_map, false); ++ elf_get_dynamic_info (main_map); + + # ifdef ELF_MACHINE_BEFORE_RTLD_RELOC + ELF_MACHINE_BEFORE_RTLD_RELOC (main_map, main_map->l_info); +diff --git a/elf/dl-static-tls.h b/elf/dl-static-tls.h +new file mode 100644 +index 0000000000000000..730924fc0155acb7 +--- /dev/null ++++ b/elf/dl-static-tls.h +@@ -0,0 +1,51 @@ ++/* Inline functions for dynamic linking. ++ Copyright (C) 1995-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_STATIC_TLS_H ++#define _DL_STATIC_TLS_H ++ ++/* This macro is used as a callback from elf_machine_rel{a,} when a ++ static TLS reloc is about to be performed. Since (in dl-load.c) we ++ permit dynamic loading of objects that might use such relocs, we ++ have to check whether each use is actually doable. If the object ++ whose TLS segment the reference resolves to was allocated space in ++ the static TLS block at startup, then it's ok. Otherwise, we make ++ an attempt to allocate it in surplus space on the fly. If that ++ can't be done, we fall back to the error that DF_STATIC_TLS is ++ intended to produce. */ ++#define HAVE_STATIC_TLS(map, sym_map) \ ++ (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET \ ++ && ((sym_map)->l_tls_offset \ ++ != FORCED_DYNAMIC_TLS_OFFSET), 1)) ++ ++#define CHECK_STATIC_TLS(map, sym_map) \ ++ do { \ ++ if (!HAVE_STATIC_TLS (map, sym_map)) \ ++ _dl_allocate_static_tls (sym_map); \ ++ } while (0) ++ ++#define TRY_STATIC_TLS(map, sym_map) \ ++ (__builtin_expect ((sym_map)->l_tls_offset \ ++ != FORCED_DYNAMIC_TLS_OFFSET, 1) \ ++ && (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET, 1) \ ++ || _dl_try_allocate_static_tls (sym_map, true) == 0)) ++ ++int _dl_try_allocate_static_tls (struct link_map *map, bool optional) ++ attribute_hidden; ++ ++#endif +diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h +index 21cdfc88bbfb89ea..ac4cc70dea3da763 100644 +--- a/elf/dynamic-link.h ++++ b/elf/dynamic-link.h +@@ -16,35 +16,7 @@ + License along with the GNU C Library; if not, see + . */ + +-/* This macro is used as a callback from elf_machine_rel{a,} when a +- static TLS reloc is about to be performed. Since (in dl-load.c) we +- permit dynamic loading of objects that might use such relocs, we +- have to check whether each use is actually doable. If the object +- whose TLS segment the reference resolves to was allocated space in +- the static TLS block at startup, then it's ok. Otherwise, we make +- an attempt to allocate it in surplus space on the fly. If that +- can't be done, we fall back to the error that DF_STATIC_TLS is +- intended to produce. */ +-#define HAVE_STATIC_TLS(map, sym_map) \ +- (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET \ +- && ((sym_map)->l_tls_offset \ +- != FORCED_DYNAMIC_TLS_OFFSET), 1)) +- +-#define CHECK_STATIC_TLS(map, sym_map) \ +- do { \ +- if (!HAVE_STATIC_TLS (map, sym_map)) \ +- _dl_allocate_static_tls (sym_map); \ +- } while (0) +- +-#define TRY_STATIC_TLS(map, sym_map) \ +- (__builtin_expect ((sym_map)->l_tls_offset \ +- != FORCED_DYNAMIC_TLS_OFFSET, 1) \ +- && (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET, 1) \ +- || _dl_try_allocate_static_tls (sym_map, true) == 0)) +- +-int _dl_try_allocate_static_tls (struct link_map *map, bool optional) +- attribute_hidden; +- ++#include + #include + + #ifdef RESOLVE_MAP +@@ -91,9 +63,6 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + # endif + #endif + +-#include +- +- + #ifdef RESOLVE_MAP + + # if defined RTLD_BOOTSTRAP || defined STATIC_PIE_BOOTSTRAP +diff --git a/elf/get-dynamic-info.h b/elf/get-dynamic-info.h +index d169099fbc9897cf..1ac0663d1ff5de24 100644 +--- a/elf/get-dynamic-info.h ++++ b/elf/get-dynamic-info.h +@@ -22,10 +22,11 @@ + #define _GET_DYNAMIC_INFO_H + + #include ++#include + #include + + static inline void __attribute__ ((unused, always_inline)) +-elf_get_dynamic_info (struct link_map *l, bool check) ++elf_get_dynamic_info (struct link_map *l) + { + #if __ELF_NATIVE_CLASS == 32 + typedef Elf32_Word d_tag_utype; +@@ -33,7 +34,7 @@ elf_get_dynamic_info (struct link_map *l, bool check) + typedef Elf64_Xword d_tag_utype; + #endif + +-#if !defined RTLD_BOOTSTRAP && !defined STATIC_PIE_BOOTSTRAP ++#ifndef STATIC_PIE_BOOTSTRAP + if (l->l_ld == NULL) + return; + #endif +@@ -111,21 +112,10 @@ elf_get_dynamic_info (struct link_map *l, bool check) + if (info[DT_REL] != NULL) + assert (info[DT_RELENT]->d_un.d_val == sizeof (ElfW(Rel))); + #endif +-#ifdef RTLD_BOOTSTRAP +- if (check) +- { +- /* Only the bind now flags are allowed. */ +- assert (info[VERSYMIDX (DT_FLAGS_1)] == NULL +- || (info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val & ~DF_1_NOW) == 0); +- /* Flags must not be set for ld.so. */ +- assert (info[DT_FLAGS] == NULL +- || (info[DT_FLAGS]->d_un.d_val & ~DF_BIND_NOW) == 0); +-# ifdef STATIC_PIE_BOOTSTRAP +- assert (info[DT_RUNPATH] == NULL); +- assert (info[DT_RPATH] == NULL); +-# endif +- } +-#else ++#ifdef STATIC_PIE_BOOTSTRAP ++ assert (info[DT_RUNPATH] == NULL); ++ assert (info[DT_RPATH] == NULL); ++#endif + if (info[DT_FLAGS] != NULL) + { + /* Flags are used. Translate to the old form where available. +@@ -163,7 +153,6 @@ elf_get_dynamic_info (struct link_map *l, bool check) + if (info[DT_RUNPATH] != NULL) + /* If both RUNPATH and RPATH are given, the latter is ignored. */ + info[DT_RPATH] = NULL; +-#endif + } + + #endif +diff --git a/elf/rtld.c b/elf/rtld.c +index 352d596dedb42e79..37d28d5a66d7b5d6 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -32,7 +32,6 @@ + #include + #include + #include +-#include "dynamic-link.h" + #include + #include + #include +@@ -51,9 +50,18 @@ + #include + #include + #include ++#include + + #include + ++/* This #define produces dynamic linking inline functions for ++ bootstrap relocation instead of general-purpose relocation. ++ Since ld.so must not have any undefined symbols the result ++ is trivial: always the map of ld.so itself. */ ++#define RTLD_BOOTSTRAP ++#define RESOLVE_MAP(map, scope, sym, version, flags) map ++#include "dynamic-link.h" ++ + /* Only enables rtld profiling for architectures which provides non generic + hp-timing support. The generic support requires either syscall + (clock_gettime), which will incur in extra overhead on loading time. +@@ -508,15 +516,6 @@ _dl_start_final (void *arg, struct dl_start_final_info *info) + # define bootstrap_map info.l + #endif + +- /* This #define produces dynamic linking inline functions for +- bootstrap relocation instead of general-purpose relocation. +- Since ld.so must not have any undefined symbols the result +- is trivial: always the map of ld.so itself. */ +-#define RTLD_BOOTSTRAP +-#define RESOLVE_MAP(map, scope, sym, version, flags) map +-#include "get-dynamic-info.h" +-#include "dynamic-link.h" +- + static ElfW(Addr) __attribute_used__ + _dl_start (void *arg) + { +@@ -550,7 +549,7 @@ _dl_start (void *arg) + /* Read our own dynamic section and fill in the info array. */ + bootstrap_map.l_ld = (void *) bootstrap_map.l_addr + elf_machine_dynamic (); + bootstrap_map.l_ld_readonly = DL_RO_DYN_SECTION; +- elf_get_dynamic_info (&bootstrap_map, true); ++ elf_get_dynamic_info (&bootstrap_map); + + #if NO_TLS_OFFSET != 0 + bootstrap_map.l_tls_offset = NO_TLS_OFFSET; +@@ -1654,7 +1653,7 @@ dl_main (const ElfW(Phdr) *phdr, + if (! rtld_is_main) + { + /* Extract the contents of the dynamic section for easy access. */ +- elf_get_dynamic_info (main_map, false); ++ elf_get_dynamic_info (main_map); + + /* If the main map is libc.so, update the base namespace to + refer to this map. If libc.so is loaded later, this happens +diff --git a/elf/setup-vdso.h b/elf/setup-vdso.h +index 6fdffafcca5e9916..f44748bc9858e5fd 100644 +--- a/elf/setup-vdso.h ++++ b/elf/setup-vdso.h +@@ -64,7 +64,7 @@ setup_vdso (struct link_map *main_map __attribute__ ((unused)), + l->l_map_end += l->l_addr; + l->l_text_end += l->l_addr; + l->l_ld = (void *) ((ElfW(Addr)) l->l_ld + l->l_addr); +- elf_get_dynamic_info (l, false); ++ elf_get_dynamic_info (l); + _dl_setup_hash (l); + l->l_relocated = 1; + +diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h +index 34c0790b893a529b..07af183e711b50f2 100644 +--- a/sysdeps/aarch64/dl-machine.h ++++ b/sysdeps/aarch64/dl-machine.h +@@ -24,7 +24,9 @@ + #include + #include + #include ++#include + #include ++#include + #include + + /* Translate a processor specific dynamic tag to the index in l_info array. */ +@@ -196,10 +198,6 @@ _dl_start_user: \n\ + + #define ELF_MACHINE_JMP_SLOT AARCH64_R(JUMP_SLOT) + +-/* AArch64 uses RELA not REL */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + #define DL_PLATFORM_INIT dl_platform_init () + + static inline void __attribute__ ((unused)) +@@ -376,7 +374,7 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], + } + } + +-inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, + const ElfW(Rela) *reloc, +diff --git a/sysdeps/alpha/dl-machine.h b/sysdeps/alpha/dl-machine.h +index 66e1db524bb378f6..e948e54fb7223a18 100644 +--- a/sysdeps/alpha/dl-machine.h ++++ b/sysdeps/alpha/dl-machine.h +@@ -26,6 +26,8 @@ + #define ELF_MACHINE_NAME "alpha" + + #include ++#include ++#include + + + /* Mask identifying addresses reserved for the user program, +@@ -241,10 +243,6 @@ $fixup_stack: \n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_ALPHA_JMP_SLOT + +-/* The alpha never uses Elf64_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization functions. This is called very early in + * _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () +diff --git a/sysdeps/arc/dl-machine.h b/sysdeps/arc/dl-machine.h +index 4b64ffec256b7f3b..f843ed9bd6ff5fc2 100644 +--- a/sysdeps/arc/dl-machine.h ++++ b/sysdeps/arc/dl-machine.h +@@ -30,6 +30,8 @@ + #include + #include + #include ++#include ++#include + + /* Dynamic Linking ABI for ARCv2 ISA. + +@@ -203,10 +205,6 @@ __start: \n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_ARC_JUMP_SLOT + +-/* ARC uses Rela relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Fixup a PLT entry to bounce directly to the function at VALUE. */ + + static inline ElfW(Addr) +@@ -318,7 +316,7 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], + } + } + +-inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + void *const reloc_addr_arg) +@@ -327,7 +325,7 @@ elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc, + *reloc_addr += l_addr; + } + +-inline void ++static inline void + __attribute__ ((always_inline)) + elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[], + ElfW(Addr) l_addr, const ElfW(Rela) *reloc, +diff --git a/sysdeps/arm/dl-machine-rel.h b/sysdeps/arm/dl-machine-rel.h +new file mode 100644 +index 0000000000000000..bec114706cd027a4 +--- /dev/null ++++ b/sysdeps/arm/dl-machine-rel.h +@@ -0,0 +1,31 @@ ++/* ELF dynamic relocation type supported by the architecture. ARM version. ++ Copyright (C) 2001-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_MACHINE_REL_H ++#define _DL_MACHINE_REL_H ++ ++/* ARM never uses Elf32_Rela relocations for the dynamic linker. ++ Prelinked libraries may use Elf32_Rela though. */ ++#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP ++#define ELF_MACHINE_NO_REL 0 ++ ++/* ARM never uses Elf32_Rela relocations for the dynamic linker. ++ Prelinked libraries may use Elf32_Rela though. */ ++#define ELF_MACHINE_PLT_REL 1 ++ ++#endif +diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h +index 86f866ca7c17bd9b..3239841eb5b36623 100644 +--- a/sysdeps/arm/dl-machine.h ++++ b/sysdeps/arm/dl-machine.h +@@ -26,6 +26,8 @@ + #include + #include + #include ++#include ++#include + + #ifndef CLEAR_CACHE + # error CLEAR_CACHE definition required to handle TEXTREL +@@ -258,10 +260,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_ARM_JUMP_SLOT + +-/* ARM never uses Elf32_Rela relocations for the dynamic linker. +- Prelinked libraries may use Elf32_Rela though. */ +-#define ELF_MACHINE_PLT_REL 1 +- + /* We define an initialization functions. This is called very early in + _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () +@@ -294,11 +292,6 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rel *reloc, + #endif /* !dl_machine_h */ + + +-/* ARM never uses Elf32_Rela relocations for the dynamic linker. +- Prelinked libraries may use Elf32_Rela though. */ +-#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP +-#define ELF_MACHINE_NO_REL 0 +- + /* Names of the architecture-specific auditing callback functions. */ + #define ARCH_LA_PLTENTER arm_gnu_pltenter + #define ARCH_LA_PLTEXIT arm_gnu_pltexit +diff --git a/sysdeps/csky/dl-machine.h b/sysdeps/csky/dl-machine.h +index ec22f875772b1291..4dfd9578773f1c8e 100644 +--- a/sysdeps/csky/dl-machine.h ++++ b/sysdeps/csky/dl-machine.h +@@ -24,6 +24,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero if ELF header is compatible with the running host. */ + static inline int +@@ -172,10 +174,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_CKCORE_JUMP_SLOT + +-/* C-SKY never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization functions. This is called very early in + _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () +diff --git a/sysdeps/generic/dl-machine-rel.h b/sysdeps/generic/dl-machine-rel.h +new file mode 100644 +index 0000000000000000..9167a1dffc715704 +--- /dev/null ++++ b/sysdeps/generic/dl-machine-rel.h +@@ -0,0 +1,27 @@ ++/* ELF dynamic relocation type supported by the architecture. ++ Copyright (C) 2001-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_MACHINE_REL_H ++#define _DL_MACHINE_REL_H ++ ++/* Defined if the architecture supports Elf{32,64}_Rel relocations. */ ++#define ELF_MACHINE_NO_REL 1 ++/* Defined if the architecture supports Elf{32,64}_Rela relocations. */ ++#define ELF_MACHINE_NO_RELA 0 ++ ++#endif +diff --git a/sysdeps/generic/dl-machine.h b/sysdeps/generic/dl-machine.h +index 4a4ab4fc70ff1cf1..7da695d9030b000e 100644 +--- a/sysdeps/generic/dl-machine.h ++++ b/sysdeps/generic/dl-machine.h +@@ -20,6 +20,8 @@ + + #include + #include ++#include ++#include + + + /* Return nonzero iff ELF header is compatible with the running host. */ +diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h +index 088931f67065250c..ac66f044189edd18 100644 +--- a/sysdeps/hppa/dl-machine.h ++++ b/sysdeps/hppa/dl-machine.h +@@ -31,6 +31,8 @@ + #include + #include + #include ++#include ++#include + + /* These two definitions must match the definition of the stub in + bfd/elf32-hppa.c (see plt_stub[]). +@@ -525,10 +527,6 @@ asm ( \ + #define ELF_MACHINE_JMP_SLOT R_PARISC_IPLT + #define ELF_MACHINE_SIZEOF_JMP_SLOT PLT_ENTRY_SIZE + +-/* We only use RELA. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Return the address of the entry point. */ + #define ELF_MACHINE_START_ADDRESS(map, start) \ + ({ \ +diff --git a/sysdeps/i386/dl-machine-rel.h b/sysdeps/i386/dl-machine-rel.h +new file mode 100644 +index 0000000000000000..7ac46f78a69fbf98 +--- /dev/null ++++ b/sysdeps/i386/dl-machine-rel.h +@@ -0,0 +1,31 @@ ++/* ELF dynamic relocation type supported by the architecture. ARM version. ++ Copyright (C) 2001-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_MACHINE_REL_H ++#define _DL_MACHINE_REL_H ++ ++/* The i386 never uses Elf32_Rela relocations for the dynamic linker. ++ Prelinked libraries may use Elf32_Rela though. */ ++#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP ++#define ELF_MACHINE_NO_REL 0 ++ ++/* The i386 never uses Elf32_Rela relocations for the dynamic linker. ++ Prelinked libraries may use Elf32_Rela though. */ ++#define ELF_MACHINE_PLT_REL 1 ++ ++#endif +diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h +index fa902612ca8557f9..c55c9a3d64bed1f2 100644 +--- a/sysdeps/i386/dl-machine.h ++++ b/sysdeps/i386/dl-machine.h +@@ -26,6 +26,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int __attribute__ ((unused)) +@@ -237,10 +239,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_386_JMP_SLOT + +-/* The i386 never uses Elf32_Rela relocations for the dynamic linker. +- Prelinked libraries may use Elf32_Rela though. */ +-#define ELF_MACHINE_PLT_REL 1 +- + /* We define an initialization functions. This is called very early in + _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () +@@ -283,11 +281,6 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rel *reloc, + + #endif /* !dl_machine_h */ + +-/* The i386 never uses Elf32_Rela relocations for the dynamic linker. +- Prelinked libraries may use Elf32_Rela though. */ +-#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP +-#define ELF_MACHINE_NO_REL 0 +- + #ifdef RESOLVE_MAP + + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). +diff --git a/sysdeps/ia64/dl-machine.h b/sysdeps/ia64/dl-machine.h +index 2217d0b556c17683..c9608a51b0291164 100644 +--- a/sysdeps/ia64/dl-machine.h ++++ b/sysdeps/ia64/dl-machine.h +@@ -27,6 +27,8 @@ + #include + #include + #include ++#include ++#include + + /* Translate a processor specific dynamic tag to the index + in l_info array. */ +@@ -319,10 +321,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_IA64_IPLTLSB + +-/* According to the IA-64 specific documentation, Rela is always used. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Return the address of the entry point. */ + #define ELF_MACHINE_START_ADDRESS(map, start) \ + ({ \ +diff --git a/sysdeps/m68k/dl-machine.h b/sysdeps/m68k/dl-machine.h +index 5e34c4784e348b19..30323d62d443645a 100644 +--- a/sysdeps/m68k/dl-machine.h ++++ b/sysdeps/m68k/dl-machine.h +@@ -24,6 +24,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int +@@ -183,10 +185,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_68K_JMP_SLOT + +-/* The m68k never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + static inline Elf32_Addr + elf_machine_fixup_plt (struct link_map *map, lookup_t t, + const ElfW(Sym) *refsym, const ElfW(Sym) *sym, +diff --git a/sysdeps/microblaze/dl-machine.h b/sysdeps/microblaze/dl-machine.h +index 3fd4988e6093be1c..b8cc5a7fe65af90a 100644 +--- a/sysdeps/microblaze/dl-machine.h ++++ b/sysdeps/microblaze/dl-machine.h +@@ -23,6 +23,8 @@ + + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int +@@ -169,10 +171,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_MICROBLAZE_JUMP_SLOT + +-/* The microblaze never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + static inline Elf32_Addr + elf_machine_fixup_plt (struct link_map *map, lookup_t t, + const ElfW(Sym) *refsym, const ElfW(Sym) *sym, +diff --git a/sysdeps/mips/dl-machine-rel.h b/sysdeps/mips/dl-machine-rel.h +new file mode 100644 +index 0000000000000000..ed396180412bc723 +--- /dev/null ++++ b/sysdeps/mips/dl-machine-rel.h +@@ -0,0 +1,26 @@ ++/* ELF dynamic relocation type supported by the architecture. ARM version. ++ Copyright (C) 2001-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_MACHINE_REL_H ++#define _DL_MACHINE_REL_H ++ ++#define ELF_MACHINE_PLT_REL 1 ++#define ELF_MACHINE_NO_REL 0 ++#define ELF_MACHINE_NO_RELA 0 ++ ++#endif +diff --git a/sysdeps/mips/dl-machine.h b/sysdeps/mips/dl-machine.h +index 7a821ceb8e518cef..45a394907a98be32 100644 +--- a/sysdeps/mips/dl-machine.h ++++ b/sysdeps/mips/dl-machine.h +@@ -33,6 +33,8 @@ + #include + #include + #include ++#include ++#include + + /* The offset of gp from GOT might be system-dependent. It's set by + ld. The same value is also */ +@@ -60,10 +62,6 @@ + ((((type) == ELF_MACHINE_JMP_SLOT) * ELF_RTYPE_CLASS_PLT) \ + | (((type) == R_MIPS_COPY) * ELF_RTYPE_CLASS_COPY)) + +-#define ELF_MACHINE_PLT_REL 1 +-#define ELF_MACHINE_NO_REL 0 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Translate a processor specific dynamic tag to the index + in l_info array. */ + #define DT_MIPS(x) (DT_MIPS_##x - DT_LOPROC + DT_NUM) +diff --git a/sysdeps/nios2/dl-machine.h b/sysdeps/nios2/dl-machine.h +index 4de602b13d5500f6..430ca5d7ae1e0372 100644 +--- a/sysdeps/nios2/dl-machine.h ++++ b/sysdeps/nios2/dl-machine.h +@@ -24,6 +24,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int +@@ -200,10 +202,6 @@ _start:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_NIOS2_JUMP_SLOT + +-/* The Nios II never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Fixup a PLT entry to bounce directly to the function at VALUE. */ + + static inline Elf32_Addr +diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h +index cda012dc1b822254..8d062951ce0abd69 100644 +--- a/sysdeps/powerpc/powerpc32/dl-machine.h ++++ b/sysdeps/powerpc/powerpc32/dl-machine.h +@@ -25,6 +25,8 @@ + #include + #include + #include ++#include ++#include + + /* Translate a processor specific dynamic tag to the index + in l_info array. */ +@@ -145,10 +147,6 @@ __elf_preferred_address(struct link_map *loader, size_t maplength, + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_PPC_JMP_SLOT + +-/* The PowerPC never uses REL relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization function to initialize HWCAP/HWCAP2 and + platform data so it can be copied into the TCB later. This is called + very early in _dl_sysdep_start for dynamically linked binaries. */ +diff --git a/sysdeps/powerpc/powerpc64/dl-funcdesc.h b/sysdeps/powerpc/powerpc64/dl-funcdesc.h +new file mode 100644 +index 0000000000000000..b2d1f76ce02d629e +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/dl-funcdesc.h +@@ -0,0 +1,34 @@ ++/* PowerPC ELFv1 function descriptor definition. ++ Copyright (C) 2009-2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_FUNCDESC_H ++#define _DL_FUNCDESC_H ++ ++#if _CALL_ELF != 2 ++/* A PowerPC64 function descriptor. The .plt (procedure linkage ++ table) and .opd (official procedure descriptor) sections are ++ arrays of these. */ ++typedef struct ++{ ++ Elf64_Addr fd_func; ++ Elf64_Addr fd_toc; ++ Elf64_Addr fd_aux; ++} Elf64_FuncDesc; ++#endif ++ ++#endif +diff --git a/sysdeps/powerpc/powerpc64/dl-irel.h b/sysdeps/powerpc/powerpc64/dl-irel.h +index 0e11b7ff647c19d5..aa9a2dca71c3b05f 100644 +--- a/sysdeps/powerpc/powerpc64/dl-irel.h ++++ b/sysdeps/powerpc/powerpc64/dl-irel.h +@@ -23,7 +23,7 @@ + #include + #include + #include +-#include ++#include + + #define ELF_MACHINE_IRELA 1 + +diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h +index 3f92fbb369eb5023..3a4a21a4124dc72a 100644 +--- a/sysdeps/powerpc/powerpc64/dl-machine.h ++++ b/sysdeps/powerpc/powerpc64/dl-machine.h +@@ -28,23 +28,14 @@ + #include + #include + #include ++#include ++#include ++#include + + /* Translate a processor specific dynamic tag to the index + in l_info array. */ + #define DT_PPC64(x) (DT_PPC64_##x - DT_LOPROC + DT_NUM) + +-#if _CALL_ELF != 2 +-/* A PowerPC64 function descriptor. The .plt (procedure linkage +- table) and .opd (official procedure descriptor) sections are +- arrays of these. */ +-typedef struct +-{ +- Elf64_Addr fd_func; +- Elf64_Addr fd_toc; +- Elf64_Addr fd_aux; +-} Elf64_FuncDesc; +-#endif +- + #define ELF_MULT_MACHINES_SUPPORTED + + /* Return nonzero iff ELF header is compatible with the running host. */ +@@ -292,10 +283,6 @@ BODY_PREFIX "_dl_start_user:\n" \ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_PPC64_JMP_SLOT + +-/* The PowerPC never uses REL relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization function to initialize HWCAP/HWCAP2 and + platform data so it can be copied into the TCB later. This is called + very early in _dl_sysdep_start for dynamically linked binaries. */ +diff --git a/sysdeps/riscv/dl-machine.h b/sysdeps/riscv/dl-machine.h +index 343c0feb6b437001..a9a3f63cb4d91f26 100644 +--- a/sysdeps/riscv/dl-machine.h ++++ b/sysdeps/riscv/dl-machine.h +@@ -26,6 +26,8 @@ + #include + #include + #include ++#include ++#include + + #ifndef _RTLD_PROLOGUE + # define _RTLD_PROLOGUE(entry) \ +@@ -51,9 +53,6 @@ + || (__WORDSIZE == 64 && (type) == R_RISCV_TLS_TPREL64))) \ + | (ELF_RTYPE_CLASS_COPY * ((type) == R_RISCV_COPY))) + +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int __attribute_used__ + elf_machine_matches_host (const ElfW(Ehdr) *ehdr) +diff --git a/sysdeps/s390/s390-32/dl-machine.h b/sysdeps/s390/s390-32/dl-machine.h +index 96a5e80c846c816a..ba681d1eac7bda53 100644 +--- a/sysdeps/s390/s390-32/dl-machine.h ++++ b/sysdeps/s390/s390-32/dl-machine.h +@@ -27,6 +27,8 @@ + #include + #include + #include ++#include ++#include + + /* This is an older, now obsolete value. */ + #define EM_S390_OLD 0xA390 +@@ -277,10 +279,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_390_JMP_SLOT + +-/* The S390 never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization functions. This is called very early in + _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () +diff --git a/sysdeps/s390/s390-64/dl-machine.h b/sysdeps/s390/s390-64/dl-machine.h +index c94d09b9c8512738..af2cffd9f904274e 100644 +--- a/sysdeps/s390/s390-64/dl-machine.h ++++ b/sysdeps/s390/s390-64/dl-machine.h +@@ -28,6 +28,8 @@ + #include + #include + #include ++#include ++#include + + #define ELF_MACHINE_IRELATIVE R_390_IRELATIVE + +@@ -225,10 +227,6 @@ _dl_start_user:\n\ + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_390_JMP_SLOT + +-/* The 64 bit S/390 never uses Elf64_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization functions. This is called very early in + _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () +diff --git a/sysdeps/sh/dl-machine.h b/sysdeps/sh/dl-machine.h +index 0c22dfd8487a516e..d14023e7492f64e9 100644 +--- a/sysdeps/sh/dl-machine.h ++++ b/sysdeps/sh/dl-machine.h +@@ -24,6 +24,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int __attribute__ ((unused)) +@@ -251,10 +253,6 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc, + + #endif /* !dl_machine_h */ + +-/* SH never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + #ifdef RESOLVE_MAP + + /* Perform the relocation specified by RELOC and SYM (which is fully resolved). +diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h +index 6361cfae9eb8fa58..78f53bc49920fa46 100644 +--- a/sysdeps/sparc/sparc32/dl-machine.h ++++ b/sysdeps/sparc/sparc32/dl-machine.h +@@ -28,6 +28,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int +@@ -196,10 +198,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_SPARC_JMP_SLOT + +-/* The SPARC never uses Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Undo the sub %sp, 6*4, %sp; add %sp, 22*4, %o0 below to get at the + value we want in __libc_stack_end. */ + #define DL_STACK_END(cookie) \ +diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h +index 3fd18c6e5ef21e38..3fa79d038fd38976 100644 +--- a/sysdeps/sparc/sparc64/dl-machine.h ++++ b/sysdeps/sparc/sparc64/dl-machine.h +@@ -26,6 +26,8 @@ + #include + #include + #include ++#include ++#include + + #define ELF64_R_TYPE_ID(info) ((info) & 0xff) + #define ELF64_R_TYPE_DATA(info) ((info) >> 8) +@@ -118,10 +120,6 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + /* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ + #define ELF_MACHINE_JMP_SLOT R_SPARC_JMP_SLOT + +-/* The SPARC never uses Elf64_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* Set up the loaded object described by L so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ + +diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-vdso.h b/sysdeps/unix/sysv/linux/powerpc/libc-vdso.h +index db388a022d552b8c..72b75d3bebfed0b5 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/libc-vdso.h ++++ b/sysdeps/unix/sysv/linux/powerpc/libc-vdso.h +@@ -24,7 +24,7 @@ + #include + + #if (defined(__PPC64__) || defined(__powerpc64__)) && _CALL_ELF != 2 +-# include ++# include + /* The correct solution is for _dl_vdso_vsym to return the address of the OPD + for the kernel VDSO function. That address would then be stored in the + __vdso_* variables and returned as the result of the IFUNC resolver function. +diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h +index 90c77cfea1de8d63..94296719d4d9fb82 100644 +--- a/sysdeps/x86_64/dl-machine.h ++++ b/sysdeps/x86_64/dl-machine.h +@@ -27,6 +27,8 @@ + #include + #include + #include ++#include ++#include + + /* Return nonzero iff ELF header is compatible with the running host. */ + static inline int __attribute__ ((unused)) +@@ -208,10 +210,6 @@ _dl_start_user:\n\ + // XXX This is a work-around for a broken linker. Remove! + #define ELF_MACHINE_IRELATIVE R_X86_64_IRELATIVE + +-/* The x86-64 never uses Elf64_Rel/Elf32_Rel relocations. */ +-#define ELF_MACHINE_NO_REL 1 +-#define ELF_MACHINE_NO_RELA 0 +- + /* We define an initialization function. This is called very early in + _dl_sysdep_start. */ + #define DL_PLATFORM_INIT dl_platform_init () diff --git a/glibc-upstream-2.34-139.patch b/glibc-upstream-2.34-139.patch new file mode 100644 index 0000000..8633732 --- /dev/null +++ b/glibc-upstream-2.34-139.patch @@ -0,0 +1,221 @@ +commit f6a54a304223666ea4af73260c99c830d7726eca +Author: Adhemerval Zanella +Date: Fri Oct 15 14:35:31 2021 -0300 + + elf: Fix elf_get_dynamic_info() for bootstrap + + THe d6d89608ac8c broke powerpc for --enable-bind-now because it turned + out that different than patch assumption rtld elf_get_dynamic_info() + does require to handle RTLD_BOOTSTRAP to avoid DT_FLAGS and + DT_RUNPATH (more specially the GLRO usage which is not reallocate + yet). + + This patch fixes by passing two arguments to elf_get_dynamic_info() + to inform that by rtld (bootstrap) or static pie initialization + (static_pie_bootstrap). I think using explicit argument is way more + clear and burried C preprocessor, and compiler should remove the + dead code. + + I checked on x86_64 and i686 with default options, --enable-bind-now, + and --enable-bind-now and --enable--static-pie. I also check on + aarch64, armhf, powerpc64, and powerpc with default and + --enable-bind-now. + + (cherry picked from commit 5118dcac68c4eadfd6304bb33adde63d062dc07f) + + Resolved conflicts: + elf/rtld.c - Manual merge. + +diff --git a/elf/dl-load.c b/elf/dl-load.c +index fb3da5aa565908a6..a920b12a906a9dec 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1296,7 +1296,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, + if (l->l_ld != 0) + l->l_ld = (ElfW(Dyn) *) ((ElfW(Addr)) l->l_ld + l->l_addr); + +- elf_get_dynamic_info (l); ++ elf_get_dynamic_info (l, false, false); + + /* Make sure we are not dlopen'ing an object that has the + DF_1_NOOPEN flag set, or a PIE object. */ +diff --git a/elf/dl-reloc-static-pie.c b/elf/dl-reloc-static-pie.c +index ababafcf98f9945d..757205affe65d9e1 100644 +--- a/elf/dl-reloc-static-pie.c ++++ b/elf/dl-reloc-static-pie.c +@@ -25,7 +25,6 @@ + + #include + +-#define STATIC_PIE_BOOTSTRAP + #define RESOLVE_MAP(map, scope, sym, version, flags) map + #include "dynamic-link.h" + #include "get-dynamic-info.h" +@@ -52,7 +51,7 @@ _dl_relocate_static_pie (void) + break; + } + +- elf_get_dynamic_info (main_map); ++ elf_get_dynamic_info (main_map, false, true); + + # ifdef ELF_MACHINE_BEFORE_RTLD_RELOC + ELF_MACHINE_BEFORE_RTLD_RELOC (main_map, main_map->l_info); +diff --git a/elf/get-dynamic-info.h b/elf/get-dynamic-info.h +index 1ac0663d1ff5de24..f63e07dc6d2cd5e6 100644 +--- a/elf/get-dynamic-info.h ++++ b/elf/get-dynamic-info.h +@@ -26,7 +26,8 @@ + #include + + static inline void __attribute__ ((unused, always_inline)) +-elf_get_dynamic_info (struct link_map *l) ++elf_get_dynamic_info (struct link_map *l, bool bootstrap, ++ bool static_pie_bootstrap) + { + #if __ELF_NATIVE_CLASS == 32 + typedef Elf32_Word d_tag_utype; +@@ -35,7 +36,7 @@ elf_get_dynamic_info (struct link_map *l) + #endif + + #ifndef STATIC_PIE_BOOTSTRAP +- if (l->l_ld == NULL) ++ if (!bootstrap && l->l_ld == NULL) + return; + #endif + +@@ -112,47 +113,63 @@ elf_get_dynamic_info (struct link_map *l) + if (info[DT_REL] != NULL) + assert (info[DT_RELENT]->d_un.d_val == sizeof (ElfW(Rel))); + #endif +-#ifdef STATIC_PIE_BOOTSTRAP +- assert (info[DT_RUNPATH] == NULL); +- assert (info[DT_RPATH] == NULL); +-#endif +- if (info[DT_FLAGS] != NULL) ++ if (bootstrap || static_pie_bootstrap) + { +- /* Flags are used. Translate to the old form where available. +- Since these l_info entries are only tested for NULL pointers it +- is ok if they point to the DT_FLAGS entry. */ +- l->l_flags = info[DT_FLAGS]->d_un.d_val; +- +- if (l->l_flags & DF_SYMBOLIC) +- info[DT_SYMBOLIC] = info[DT_FLAGS]; +- if (l->l_flags & DF_TEXTREL) +- info[DT_TEXTREL] = info[DT_FLAGS]; +- if (l->l_flags & DF_BIND_NOW) +- info[DT_BIND_NOW] = info[DT_FLAGS]; ++ assert (info[DT_RUNPATH] == NULL); ++ assert (info[DT_RPATH] == NULL); + } +- if (info[VERSYMIDX (DT_FLAGS_1)] != NULL) ++ if (bootstrap) + { +- l->l_flags_1 = info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val; +- if (l->l_flags_1 & DF_1_NODELETE) +- l->l_nodelete_pending = true; +- +- /* Only DT_1_SUPPORTED_MASK bits are supported, and we would like +- to assert this, but we can't. Users have been setting +- unsupported DF_1_* flags for a long time and glibc has ignored +- them. Therefore to avoid breaking existing applications the +- best we can do is add a warning during debugging with the +- intent of notifying the user of the problem. */ +- if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FILES, 0) +- && l->l_flags_1 & ~DT_1_SUPPORTED_MASK) +- _dl_debug_printf ("\nWARNING: Unsupported flag value(s) of 0x%x in DT_FLAGS_1.\n", +- l->l_flags_1 & ~DT_1_SUPPORTED_MASK); +- +- if (l->l_flags_1 & DF_1_NOW) +- info[DT_BIND_NOW] = info[VERSYMIDX (DT_FLAGS_1)]; ++ /* Only the bind now flags are allowed. */ ++ assert (info[VERSYMIDX (DT_FLAGS_1)] == NULL ++ || (info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val & ~DF_1_NOW) == 0); ++ /* Flags must not be set for ld.so. */ ++ assert (info[DT_FLAGS] == NULL ++ || (info[DT_FLAGS]->d_un.d_val & ~DF_BIND_NOW) == 0); + } +- if (info[DT_RUNPATH] != NULL) +- /* If both RUNPATH and RPATH are given, the latter is ignored. */ +- info[DT_RPATH] = NULL; ++ else ++ { ++ if (info[DT_FLAGS] != NULL) ++ { ++ /* Flags are used. Translate to the old form where available. ++ Since these l_info entries are only tested for NULL pointers it ++ is ok if they point to the DT_FLAGS entry. */ ++ l->l_flags = info[DT_FLAGS]->d_un.d_val; ++ ++ if (l->l_flags & DF_SYMBOLIC) ++ info[DT_SYMBOLIC] = info[DT_FLAGS]; ++ if (l->l_flags & DF_TEXTREL) ++ info[DT_TEXTREL] = info[DT_FLAGS]; ++ if (l->l_flags & DF_BIND_NOW) ++ info[DT_BIND_NOW] = info[DT_FLAGS]; ++ } ++ ++ if (info[VERSYMIDX (DT_FLAGS_1)] != NULL) ++ { ++ l->l_flags_1 = info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val; ++ if (l->l_flags_1 & DF_1_NODELETE) ++ l->l_nodelete_pending = true; ++ ++ /* Only DT_1_SUPPORTED_MASK bits are supported, and we would like ++ to assert this, but we can't. Users have been setting ++ unsupported DF_1_* flags for a long time and glibc has ignored ++ them. Therefore to avoid breaking existing applications the ++ best we can do is add a warning during debugging with the ++ intent of notifying the user of the problem. */ ++ if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FILES, 0) ++ && l->l_flags_1 & ~DT_1_SUPPORTED_MASK) ++ _dl_debug_printf ("\nWARNING: Unsupported flag value(s) of 0x%x " ++ "in DT_FLAGS_1.\n", ++ l->l_flags_1 & ~DT_1_SUPPORTED_MASK); ++ ++ if (l->l_flags_1 & DF_1_NOW) ++ info[DT_BIND_NOW] = info[VERSYMIDX (DT_FLAGS_1)]; ++ } ++ ++ if (info[DT_RUNPATH] != NULL) ++ /* If both RUNPATH and RPATH are given, the latter is ignored. */ ++ info[DT_RPATH] = NULL; ++ } + } + + #endif +diff --git a/elf/rtld.c b/elf/rtld.c +index 37d28d5a66d7b5d6..ad5ddb2a0ab94e7f 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -549,7 +549,7 @@ _dl_start (void *arg) + /* Read our own dynamic section and fill in the info array. */ + bootstrap_map.l_ld = (void *) bootstrap_map.l_addr + elf_machine_dynamic (); + bootstrap_map.l_ld_readonly = DL_RO_DYN_SECTION; +- elf_get_dynamic_info (&bootstrap_map); ++ elf_get_dynamic_info (&bootstrap_map, true, false); + + #if NO_TLS_OFFSET != 0 + bootstrap_map.l_tls_offset = NO_TLS_OFFSET; +@@ -1653,7 +1653,7 @@ dl_main (const ElfW(Phdr) *phdr, + if (! rtld_is_main) + { + /* Extract the contents of the dynamic section for easy access. */ +- elf_get_dynamic_info (main_map); ++ elf_get_dynamic_info (main_map, false, false); + + /* If the main map is libc.so, update the base namespace to + refer to this map. If libc.so is loaded later, this happens +diff --git a/elf/setup-vdso.h b/elf/setup-vdso.h +index f44748bc9858e5fd..3f20578046de76ed 100644 +--- a/elf/setup-vdso.h ++++ b/elf/setup-vdso.h +@@ -64,7 +64,7 @@ setup_vdso (struct link_map *main_map __attribute__ ((unused)), + l->l_map_end += l->l_addr; + l->l_text_end += l->l_addr; + l->l_ld = (void *) ((ElfW(Addr)) l->l_ld + l->l_addr); +- elf_get_dynamic_info (l); ++ elf_get_dynamic_info (l, false, false); + _dl_setup_hash (l); + l->l_relocated = 1; + diff --git a/glibc-upstream-2.34-140.patch b/glibc-upstream-2.34-140.patch new file mode 100644 index 0000000..69ab10b --- /dev/null +++ b/glibc-upstream-2.34-140.patch @@ -0,0 +1,69 @@ +commit a31bbe3242266aaea423e5879f38aed69aea1d5e +Author: Adhemerval Zanella +Date: Thu Jul 29 11:13:57 2021 -0300 + + elf: Move LAV_CURRENT to link_lavcurrent.h + + No functional change. + + (cherry picked from commit 54816ae98d57930b7c945f17485714a5574bfe47) + + Resolved conflicts: + elf/Makefile + +diff --git a/bits/link_lavcurrent.h b/bits/link_lavcurrent.h +new file mode 100644 +index 0000000000000000..44fbea1e8060997f +--- /dev/null ++++ b/bits/link_lavcurrent.h +@@ -0,0 +1,25 @@ ++/* Data structure for communication from the run-time dynamic linker for ++ loaded ELF shared objects. LAV_CURRENT definition. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _LINK_H ++# error "Never include directly; use instead." ++#endif ++ ++/* Version numbers for la_version handshake interface. */ ++#define LAV_CURRENT 1 +diff --git a/elf/Makefile b/elf/Makefile +index cd8725c76f4cfb48..7fa80946ff3aae42 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -24,6 +24,7 @@ include ../Makeconfig + headers = \ + bits/elfclass.h \ + bits/link.h \ ++ bits/link_lavcurrent.h \ + elf.h \ + link.h \ + # headers +diff --git a/elf/link.h b/elf/link.h +index ff3a85c847930b9b..21a351686b9bf7c8 100644 +--- a/elf/link.h ++++ b/elf/link.h +@@ -96,7 +96,7 @@ struct link_map + #ifdef __USE_GNU + + /* Version numbers for la_version handshake interface. */ +-#define LAV_CURRENT 1 ++#include + + /* Activity types signaled through la_activity. */ + enum diff --git a/glibc-upstream-2.34-141.patch b/glibc-upstream-2.34-141.patch new file mode 100644 index 0000000..2856c96 --- /dev/null +++ b/glibc-upstream-2.34-141.patch @@ -0,0 +1,390 @@ +commit e25fe992132c460fecc1ab9fade185d5dd3f91ff +Author: Adhemerval Zanella +Date: Thu Nov 11 09:28:21 2021 -0300 + + elf: Move la_activity (LA_ACT_ADD) after _dl_add_to_namespace_list() (BZ #28062) + + It ensures that the the namespace is guaranteed to not be empty. + + Checked on x86_64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit ed3ce71f5c64c5f07cbde0ef03554ea8950d8f2c) + + Resolved conflicts: + elf/Makefile + +diff --git a/elf/Makefile b/elf/Makefile +index 7fa80946ff3aae42..bf6da98bdd15a18d 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -363,6 +363,7 @@ tests += \ + tst-audit15 \ + tst-audit16 \ + tst-audit17 \ ++ tst-audit18 \ + tst-auditmany \ + tst-auxobj \ + tst-auxobj-dlopen \ +@@ -623,6 +624,7 @@ modules-names = \ + tst-audit12mod2 \ + tst-audit12mod3 \ + tst-audit13mod1 \ ++ tst-audit18mod \ + tst-auditlogmod-1 \ + tst-auditlogmod-2 \ + tst-auditlogmod-3 \ +@@ -640,6 +642,7 @@ modules-names = \ + tst-auditmod9b \ + tst-auditmod11 \ + tst-auditmod12 \ ++ tst-auditmod18 \ + tst-auxvalmod \ + tst-big-note-lib \ + tst-deep1mod1 \ +@@ -1999,6 +2002,10 @@ $(objpfx)tst-auditmod17.so: $(objpfx)tst-auditmod17.os + CFLAGS-.os += $(call elide-stack-protector,.os,tst-auditmod17) + tst-audit17-ENV = LD_AUDIT=$(objpfx)tst-auditmod17.so + ++$(objpfx)tst-audit18.out: $(objpfx)tst-auditmod18.so \ ++ $(objpfx)tst-audit18mod.so ++tst-audit18-ARGS = -- $(host-test-program-cmd) ++ + # tst-sonamemove links against an older implementation of the library. + LDFLAGS-tst-sonamemove-linkmod1.so = \ + -Wl,--version-script=tst-sonamemove-linkmod1.map \ +diff --git a/elf/dl-load.c b/elf/dl-load.c +index a920b12a906a9dec..a8c6df3959f2b331 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1054,42 +1054,6 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, + /* This is the ELF header. We read it in `open_verify'. */ + header = (void *) fbp->buf; + +- /* Signal that we are going to add new objects. */ +- if (r->r_state == RT_CONSISTENT) +- { +-#ifdef SHARED +- /* Auditing checkpoint: we are going to add new objects. */ +- if ((mode & __RTLD_AUDIT) == 0 +- && __glibc_unlikely (GLRO(dl_naudit) > 0)) +- { +- struct link_map *head = GL(dl_ns)[nsid]._ns_loaded; +- /* Do not call the functions for any auditing object. */ +- if (head->l_auditing == 0) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- afct->activity (&link_map_audit_state (head, cnt)->cookie, +- LA_ACT_ADD); +- +- afct = afct->next; +- } +- } +- } +-#endif +- +- /* Notify the debugger we have added some objects. We need to +- call _dl_debug_initialize in a static program in case dynamic +- linking has not been used before. */ +- r->r_state = RT_ADD; +- _dl_debug_state (); +- LIBC_PROBE (map_start, 2, nsid, r); +- make_consistent = true; +- } +- else +- assert (r->r_state == RT_ADD); +- + /* Enter the new object in the list of loaded objects. */ + l = _dl_new_object (realname, name, l_type, loader, mode, nsid); + if (__glibc_unlikely (l == NULL)) +@@ -1511,6 +1475,44 @@ cannot enable executable stack as shared object requires"); + /* Now that the object is fully initialized add it to the object list. */ + _dl_add_to_namespace_list (l, nsid); + ++ /* Signal that we are going to add new objects. */ ++ if (r->r_state == RT_CONSISTENT) ++ { ++#ifdef SHARED ++ /* Auditing checkpoint: we are going to add new objects. Since this ++ is called after _dl_add_to_namespace_list the namespace is guaranteed ++ to not be empty. */ ++ if ((mode & __RTLD_AUDIT) == 0 ++ && __glibc_unlikely (GLRO(dl_naudit) > 0)) ++ { ++ struct link_map *head = GL(dl_ns)[nsid]._ns_loaded; ++ /* Do not call the functions for any auditing object. */ ++ if (head->l_auditing == 0) ++ { ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->activity != NULL) ++ afct->activity (&link_map_audit_state (head, cnt)->cookie, ++ LA_ACT_ADD); ++ ++ afct = afct->next; ++ } ++ } ++ } ++#endif ++ ++ /* Notify the debugger we have added some objects. We need to ++ call _dl_debug_initialize in a static program in case dynamic ++ linking has not been used before. */ ++ r->r_state = RT_ADD; ++ _dl_debug_state (); ++ LIBC_PROBE (map_start, 2, nsid, r); ++ make_consistent = true; ++ } ++ else ++ assert (r->r_state == RT_ADD); ++ + #ifdef SHARED + /* Auditing checkpoint: we have a new object. */ + if (__glibc_unlikely (GLRO(dl_naudit) > 0) +diff --git a/elf/tst-audit18.c b/elf/tst-audit18.c +new file mode 100644 +index 0000000000000000..ef784908f60d50aa +--- /dev/null ++++ b/elf/tst-audit18.c +@@ -0,0 +1,129 @@ ++/* Check DT_AUDIT with dlmopen. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int restart; ++#define CMDLINE_OPTIONS \ ++ { "restart", no_argument, &restart, 1 }, ++ ++static int ++handle_restart (void) ++{ ++ { ++ void *h = xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW); ++ ++ pid_t (*s) (void) = xdlsym (h, "getpid"); ++ TEST_COMPARE (s (), getpid ()); ++ ++ xdlclose (h); ++ } ++ ++ { ++ void *h = xdlmopen (LM_ID_NEWLM, "tst-audit18mod.so", RTLD_NOW); ++ ++ int (*foo) (void) = xdlsym (h, "foo"); ++ TEST_COMPARE (foo (), 10); ++ ++ xdlclose (h); ++ } ++ ++ return 0; ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ /* We must have either: ++ - One our fource parameters left if called initially: ++ + path to ld.so optional ++ + "--library-path" optional ++ + the library path optional ++ + the application name */ ++ ++ if (restart) ++ return handle_restart (); ++ ++ char *spargv[9]; ++ int i = 0; ++ for (; i < argc - 1; i++) ++ spargv[i] = argv[i + 1]; ++ spargv[i++] = (char *) "--direct"; ++ spargv[i++] = (char *) "--restart"; ++ spargv[i] = NULL; ++ ++ setenv ("LD_AUDIT", "tst-auditmod18.so", 0); ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit18", 0, sc_allow_stderr); ++ ++ struct ++ { ++ const char *name; ++ bool found; ++ } audit_iface[] = ++ { ++ { "la_version", false }, ++ { "la_objsearch", false }, ++ { "la_activity", false }, ++ { "la_objopen", false }, ++ { "la_objclose", false }, ++ { "la_preinit", false }, ++#if __WORDSIZE == 32 ++ { "la_symbind32", false }, ++#elif __WORDSIZE == 64 ++ { "la_symbind64", false }, ++#endif ++ }; ++ ++ /* Some hooks are called more than once but the test only check if any ++ is called at least once. */ ++ FILE *out = fmemopen (result.err.buffer, result.err.length, "r"); ++ TEST_VERIFY (out != NULL); ++ char *buffer = NULL; ++ size_t buffer_length = 0; ++ while (xgetline (&buffer, &buffer_length, out)) ++ { ++ for (int i = 0; i < array_length (audit_iface); i++) ++ if (strncmp (buffer, audit_iface[i].name, ++ strlen (audit_iface[i].name)) == 0) ++ audit_iface[i].found = true; ++ } ++ free (buffer); ++ xfclose (out); ++ ++ for (int i = 0; i < array_length (audit_iface); i++) ++ TEST_COMPARE (audit_iface[i].found, true); ++ ++ support_capture_subprocess_free (&result); ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION_ARGV do_test ++#include +diff --git a/elf/tst-audit18mod.c b/elf/tst-audit18mod.c +new file mode 100644 +index 0000000000000000..096a9167c9f8353f +--- /dev/null ++++ b/elf/tst-audit18mod.c +@@ -0,0 +1,23 @@ ++/* Check DT_AUDIT with dlmopen. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++int ++foo (void) ++{ ++ return 10; ++} +diff --git a/elf/tst-auditmod18.c b/elf/tst-auditmod18.c +new file mode 100644 +index 0000000000000000..182992e9fdb1620c +--- /dev/null ++++ b/elf/tst-auditmod18.c +@@ -0,0 +1,73 @@ ++/* Check DT_AUDIT with dlmopen. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ fprintf (stderr, "%s\n", __func__); ++ return LAV_CURRENT; ++} ++ ++char * ++la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag) ++{ ++ fprintf (stderr, "%s\n", __func__); ++ return (char *) name; ++} ++ ++void ++la_activity (uintptr_t *cookie, unsigned int flag) ++{ ++ fprintf (stderr, "%s\n", __func__); ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ fprintf (stderr, "%s\n", __func__); ++ return LA_FLG_BINDTO | LA_FLG_BINDFROM; ++} ++ ++unsigned int ++la_objclose (uintptr_t *cookie) ++{ ++ fprintf (stderr, "%s\n", __func__); ++ return 0; ++} ++ ++void ++la_preinit (uintptr_t *cookie) ++{ ++ fprintf (stderr, "%s\n", __func__); ++} ++ ++uintptr_t ++#if __ELF_NATIVE_CLASS == 32 ++la_symbind32 (Elf32_Sym *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, unsigned int *flags, const char *symname) ++#else ++la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, unsigned int *flags, const char *symname) ++#endif ++{ ++ fprintf (stderr, "%s\n", __func__); ++ return sym->st_value; ++} diff --git a/glibc-upstream-2.34-142.patch b/glibc-upstream-2.34-142.patch new file mode 100644 index 0000000..20e72f1 --- /dev/null +++ b/glibc-upstream-2.34-142.patch @@ -0,0 +1,159 @@ +commit ce0cb6d1d2daac2d58006a41c3d19c551b86f255 +Author: Adhemerval Zanella +Date: Mon Jul 19 15:47:51 2021 -0300 + + elf: Add _dl_audit_objopen + + It consolidates the code required to call la_objopen audit callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit aee6e90f93e285016b6cd9c8bd00402c19ba271b) + + Resolved conflicts: + elf/Makefile + +diff --git a/elf/Makefile b/elf/Makefile +index bf6da98bdd15a18d..85165c0591412a45 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -118,6 +118,7 @@ elide-routines.os = \ + # interpreter and operating independent of libc. + rtld-routines = \ + $(all-dl-routines) \ ++ dl-audit \ + dl-compat \ + dl-conflict \ + dl-diagnostics \ +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +new file mode 100644 +index 0000000000000000..4066dfe85146b9d4 +--- /dev/null ++++ b/elf/dl-audit.c +@@ -0,0 +1,39 @@ ++/* Audit common functions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++_dl_audit_objopen (struct link_map *l, Lmid_t nsid) ++{ ++ if (__glibc_likely (GLRO(dl_naudit) == 0)) ++ return; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->objopen != NULL) ++ { ++ struct auditstate *state = link_map_audit_state (l, cnt); ++ state->bindflags = afct->objopen (l, nsid, &state->cookie); ++ l->l_audit_any_plt |= state->bindflags != 0; ++ } ++ ++ afct = afct->next; ++ } ++} +diff --git a/elf/dl-load.c b/elf/dl-load.c +index a8c6df3959f2b331..a2d73d025c65cd79 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1515,22 +1515,8 @@ cannot enable executable stack as shared object requires"); + + #ifdef SHARED + /* Auditing checkpoint: we have a new object. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0) +- && !GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->objopen != NULL) +- { +- struct auditstate *state = link_map_audit_state (l, cnt); +- state->bindflags = afct->objopen (l, nsid, &state->cookie); +- l->l_audit_any_plt |= state->bindflags != 0; +- } +- +- afct = afct->next; +- } +- } ++ if (!GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing) ++ _dl_audit_objopen (l, nsid); + #endif + + return l; +diff --git a/elf/rtld.c b/elf/rtld.c +index ad5ddb2a0ab94e7f..45fec0df3043b90a 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1064,25 +1064,6 @@ ERROR: audit interface '%s' requires version %d (maximum supported version %d); + dlmargs.map->l_auditing = 1; + } + +-/* Notify the the audit modules that the object MAP has already been +- loaded. */ +-static void +-notify_audit_modules_of_loaded_object (struct link_map *map) +-{ +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->objopen != NULL) +- { +- struct auditstate *state = link_map_audit_state (map, cnt); +- state->bindflags = afct->objopen (map, LM_ID_BASE, &state->cookie); +- map->l_audit_any_plt |= state->bindflags != 0; +- } +- +- afct = afct->next; +- } +-} +- + /* Load all audit modules. */ + static void + load_audit_modules (struct link_map *main_map, struct audit_list *audit_list) +@@ -1101,8 +1082,8 @@ load_audit_modules (struct link_map *main_map, struct audit_list *audit_list) + program and the dynamic linker itself). */ + if (GLRO(dl_naudit) > 0) + { +- notify_audit_modules_of_loaded_object (main_map); +- notify_audit_modules_of_loaded_object (&GL(dl_rtld_map)); ++ _dl_audit_objopen (main_map, LM_ID_BASE); ++ _dl_audit_objopen (&GL(dl_rtld_map), LM_ID_BASE); + } + } + +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index bcf1f199c5985c65..5709e4e48dff4355 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1372,6 +1372,11 @@ link_map_audit_state (struct link_map *l, size_t index) + return &base[index]; + } + } ++ ++/* Call the la_objopen from the audit modules for the link_map L on the ++ namespace identification NSID. */ ++void _dl_audit_objopen (struct link_map *l, Lmid_t nsid) ++ attribute_hidden; + #endif /* SHARED */ + + #if PTHREAD_IN_LIBC && defined SHARED diff --git a/glibc-upstream-2.34-143.patch b/glibc-upstream-2.34-143.patch new file mode 100644 index 0000000..d93c0cb --- /dev/null +++ b/glibc-upstream-2.34-143.patch @@ -0,0 +1,254 @@ +commit 66e9d27a090874ab93030a908eb86fc29f856151 +Author: Adhemerval Zanella +Date: Tue Jul 20 11:03:34 2021 -0300 + + elf: Add _dl_audit_activity_map and _dl_audit_activity_nsid + + It consolidates the code required to call la_activity audit + callback. + + Also for a new Lmid_t the namespace link_map list are empty, so it + requires to check if before using it. This can happen for when audit + module is used along with dlmopen. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit 3dac3959a5cb585b065cef2cb8a8d909c907e202) + +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 4066dfe85146b9d4..74b87f4b39be75e1 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -18,6 +18,32 @@ + + #include + ++void ++_dl_audit_activity_map (struct link_map *l, int action) ++{ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->activity != NULL) ++ afct->activity (&link_map_audit_state (l, cnt)->cookie, action); ++ afct = afct->next; ++ } ++} ++ ++void ++_dl_audit_activity_nsid (Lmid_t nsid, int action) ++{ ++ /* If head is NULL, the namespace has become empty, and the audit interface ++ does not give us a way to signal LA_ACT_CONSISTENT for it because the ++ first loaded module is used to identify the namespace. */ ++ struct link_map *head = GL(dl_ns)[nsid]._ns_loaded; ++ if (__glibc_likely (GLRO(dl_naudit) == 0) ++ || head == NULL || head->l_auditing) ++ return; ++ ++ _dl_audit_activity_map (head, action); ++} ++ + void + _dl_audit_objopen (struct link_map *l, Lmid_t nsid) + { +diff --git a/elf/dl-close.c b/elf/dl-close.c +index f6fbf9de7d78555b..5a8cc9e7cb5186cc 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -472,25 +472,7 @@ _dl_close_worker (struct link_map *map, bool force) + + #ifdef SHARED + /* Auditing checkpoint: we will start deleting objects. */ +- if (__glibc_unlikely (do_audit)) +- { +- struct link_map *head = ns->_ns_loaded; +- struct audit_ifaces *afct = GLRO(dl_audit); +- /* Do not call the functions for any auditing object. */ +- if (head->l_auditing == 0) +- { +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- { +- struct auditstate *state = link_map_audit_state (head, cnt); +- afct->activity (&state->cookie, LA_ACT_DELETE); +- } +- +- afct = afct->next; +- } +- } +- } ++ _dl_audit_activity_nsid (nsid, LA_ACT_DELETE); + #endif + + /* Notify the debugger we are about to remove some loaded objects. */ +@@ -785,32 +767,9 @@ _dl_close_worker (struct link_map *map, bool force) + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + + #ifdef SHARED +- /* Auditing checkpoint: we have deleted all objects. */ +- if (__glibc_unlikely (do_audit)) +- { +- struct link_map *head = ns->_ns_loaded; +- /* If head is NULL, the namespace has become empty, and the +- audit interface does not give us a way to signal +- LA_ACT_CONSISTENT for it because the first loaded module is +- used to identify the namespace. +- +- Furthermore, do not notify auditors of the cleanup of a +- failed audit module loading attempt. */ +- if (head != NULL && head->l_auditing == 0) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- { +- struct auditstate *state = link_map_audit_state (head, cnt); +- afct->activity (&state->cookie, LA_ACT_CONSISTENT); +- } +- +- afct = afct->next; +- } +- } +- } ++ /* Auditing checkpoint: we have deleted all objects. Also, do not notify ++ auditors of the cleanup of a failed audit module loading attempt. */ ++ _dl_audit_activity_nsid (nsid, LA_ACT_CONSISTENT); + #endif + + if (__builtin_expect (ns->_ns_loaded == NULL, 0) +diff --git a/elf/dl-load.c b/elf/dl-load.c +index a2d73d025c65cd79..baf0a926053deaed 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1482,24 +1482,8 @@ cannot enable executable stack as shared object requires"); + /* Auditing checkpoint: we are going to add new objects. Since this + is called after _dl_add_to_namespace_list the namespace is guaranteed + to not be empty. */ +- if ((mode & __RTLD_AUDIT) == 0 +- && __glibc_unlikely (GLRO(dl_naudit) > 0)) +- { +- struct link_map *head = GL(dl_ns)[nsid]._ns_loaded; +- /* Do not call the functions for any auditing object. */ +- if (head->l_auditing == 0) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- afct->activity (&link_map_audit_state (head, cnt)->cookie, +- LA_ACT_ADD); +- +- afct = afct->next; +- } +- } +- } ++ if ((mode & __RTLD_AUDIT) == 0) ++ _dl_audit_activity_nsid (nsid, LA_ACT_ADD); + #endif + + /* Notify the debugger we have added some objects. We need to +diff --git a/elf/dl-open.c b/elf/dl-open.c +index bc68e2c376debd71..3f01aa480730da13 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -611,25 +611,7 @@ dl_open_worker_begin (void *a) + + #ifdef SHARED + /* Auditing checkpoint: we have added all objects. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0)) +- { +- struct link_map *head = GL(dl_ns)[new->l_ns]._ns_loaded; +- /* Do not call the functions for any auditing object. */ +- if (head->l_auditing == 0) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- { +- struct auditstate *state = link_map_audit_state (head, cnt); +- afct->activity (&state->cookie, LA_ACT_CONSISTENT); +- } +- +- afct = afct->next; +- } +- } +- } ++ _dl_audit_activity_nsid (new->l_ns, LA_ACT_CONSISTENT); + #endif + + /* Notify the debugger all new objects are now ready to go. */ +diff --git a/elf/rtld.c b/elf/rtld.c +index 45fec0df3043b90a..b6bb46ca97b7972f 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1804,18 +1804,7 @@ dl_main (const ElfW(Phdr) *phdr, + + /* Auditing checkpoint: we are ready to signal that the initial map + is being constructed. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0)) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- afct->activity (&link_map_audit_state (main_map, cnt)->cookie, +- LA_ACT_ADD); +- +- afct = afct->next; +- } +- } ++ _dl_audit_activity_map (main_map, LA_ACT_ADD); + + /* We have two ways to specify objects to preload: via environment + variable and via the file /etc/ld.so.preload. The latter can also +@@ -2496,23 +2485,7 @@ dl_main (const ElfW(Phdr) *phdr, + + #ifdef SHARED + /* Auditing checkpoint: we have added all objects. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0)) +- { +- struct link_map *head = GL(dl_ns)[LM_ID_BASE]._ns_loaded; +- /* Do not call the functions for any auditing object. */ +- if (head->l_auditing == 0) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->activity != NULL) +- afct->activity (&link_map_audit_state (head, cnt)->cookie, +- LA_ACT_CONSISTENT); +- +- afct = afct->next; +- } +- } +- } ++ _dl_audit_activity_nsid (LM_ID_BASE, LA_ACT_CONSISTENT); + #endif + + /* Notify the debugger all new objects are now ready to go. We must re-get +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 5709e4e48dff4355..7384abcf5e0e8e24 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1373,6 +1373,16 @@ link_map_audit_state (struct link_map *l, size_t index) + } + } + ++/* Call the la_activity from the audit modules from the link map L and issues ++ the ACTION argument. */ ++void _dl_audit_activity_map (struct link_map *l, int action) ++ attribute_hidden; ++ ++/* Call the la_activity from the audit modules from the link map from the ++ namespace NSID and issues the ACTION argument. */ ++void _dl_audit_activity_nsid (Lmid_t nsid, int action) ++ attribute_hidden; ++ + /* Call the la_objopen from the audit modules for the link_map L on the + namespace identification NSID. */ + void _dl_audit_objopen (struct link_map *l, Lmid_t nsid) diff --git a/glibc-upstream-2.34-144.patch b/glibc-upstream-2.34-144.patch new file mode 100644 index 0000000..62ab51c --- /dev/null +++ b/glibc-upstream-2.34-144.patch @@ -0,0 +1,157 @@ +commit ec0fc2a15358dc5f7191f9994f04b1385d14377d +Author: Adhemerval Zanella +Date: Tue Jul 20 13:47:36 2021 -0300 + + elf: Add _dl_audit_objsearch + + It consolidates the code required to call la_objsearch audit + callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit c91008d3490e4e3ce29520068405f081f0d368ca) + +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 74b87f4b39be75e1..5682427220569d90 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -44,6 +44,28 @@ _dl_audit_activity_nsid (Lmid_t nsid, int action) + _dl_audit_activity_map (head, action); + } + ++const char * ++_dl_audit_objsearch (const char *name, struct link_map *l, unsigned int code) ++{ ++ if (l == NULL || l->l_auditing || code == 0) ++ return name; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->objsearch != NULL) ++ { ++ struct auditstate *state = link_map_audit_state (l, cnt); ++ name = afct->objsearch (name, &state->cookie, code); ++ if (name == NULL) ++ return NULL; ++ } ++ afct = afct->next; ++ } ++ ++ return name; ++} ++ + void + _dl_audit_objopen (struct link_map *l, Lmid_t nsid) + { +diff --git a/elf/dl-load.c b/elf/dl-load.c +index baf0a926053deaed..eb6b658b698f5694 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -1596,32 +1596,20 @@ open_verify (const char *name, int fd, + + #ifdef SHARED + /* Give the auditing libraries a chance. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0) && whatcode != 0 +- && loader->l_auditing == 0) ++ if (__glibc_unlikely (GLRO(dl_naudit) > 0)) + { + const char *original_name = name; +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->objsearch != NULL) +- { +- struct auditstate *state = link_map_audit_state (loader, cnt); +- name = afct->objsearch (name, &state->cookie, whatcode); +- if (name == NULL) +- /* Ignore the path. */ +- return -1; +- } +- +- afct = afct->next; +- } ++ name = _dl_audit_objsearch (name, loader, whatcode); ++ if (name == NULL) ++ return -1; + + if (fd != -1 && name != original_name && strcmp (name, original_name)) +- { +- /* An audit library changed what we're supposed to open, +- so FD no longer matches it. */ +- __close_nocancel (fd); +- fd = -1; +- } ++ { ++ /* An audit library changed what we're supposed to open, ++ so FD no longer matches it. */ ++ __close_nocancel (fd); ++ fd = -1; ++ } + } + #endif + +@@ -2060,36 +2048,17 @@ _dl_map_object (struct link_map *loader, const char *name, + #ifdef SHARED + /* Give the auditing libraries a chance to change the name before we + try anything. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0) +- && (loader == NULL || loader->l_auditing == 0)) ++ if (__glibc_unlikely (GLRO(dl_naudit) > 0)) + { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ const char *before = name; ++ name = _dl_audit_objsearch (name, loader, LA_SER_ORIG); ++ if (name == NULL) + { +- if (afct->objsearch != NULL) +- { +- const char *before = name; +- struct auditstate *state = link_map_audit_state (loader, cnt); +- name = afct->objsearch (name, &state->cookie, LA_SER_ORIG); +- if (name == NULL) +- { +- /* Do not try anything further. */ +- fd = -1; +- goto no_file; +- } +- if (before != name && strcmp (before, name) != 0) +- { +- if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES)) +- _dl_debug_printf ("audit changed filename %s -> %s\n", +- before, name); +- +- if (origname == NULL) +- origname = before; +- } +- } +- +- afct = afct->next; ++ fd = -1; ++ goto no_file; + } ++ if (before != name && strcmp (before, name) != 0) ++ origname = before; + } + #endif + +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 7384abcf5e0e8e24..1f212a18d7bfc440 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1373,6 +1373,13 @@ link_map_audit_state (struct link_map *l, size_t index) + } + } + ++/* Call the la_objsearch from the audit modules from the link map L. If ++ ORIGNAME is non NULL, it is updated with the revious name prior calling ++ la_objsearch. */ ++const char *_dl_audit_objsearch (const char *name, struct link_map *l, ++ unsigned int code) ++ attribute_hidden; ++ + /* Call the la_activity from the audit modules from the link map L and issues + the ACTION argument. */ + void _dl_audit_activity_map (struct link_map *l, int action) diff --git a/glibc-upstream-2.34-145.patch b/glibc-upstream-2.34-145.patch new file mode 100644 index 0000000..f429360 --- /dev/null +++ b/glibc-upstream-2.34-145.patch @@ -0,0 +1,123 @@ +commit 198660741b23ec9defb19e22951d4a721de603c8 +Author: Adhemerval Zanella +Date: Tue Jul 20 14:04:51 2021 -0300 + + elf: Add _dl_audit_objclose + + It consolidates the code required to call la_objclose audit + callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit 311c9ee54ea963ff69bd3a2e6981c37e893b4c3e) + +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 5682427220569d90..cb1c3de93cba447b 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -85,3 +85,24 @@ _dl_audit_objopen (struct link_map *l, Lmid_t nsid) + afct = afct->next; + } + } ++ ++void ++_dl_audit_objclose (struct link_map *l) ++{ ++ if (__glibc_likely (GLRO(dl_naudit) == 0) ++ || GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing) ++ return; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->objclose != NULL) ++ { ++ struct auditstate *state= link_map_audit_state (l, cnt); ++ /* Return value is ignored. */ ++ afct->objclose (&state->cookie); ++ } ++ ++ afct = afct->next; ++ } ++} +diff --git a/elf/dl-close.c b/elf/dl-close.c +index 5a8cc9e7cb5186cc..985cd4e2821436af 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -260,9 +260,6 @@ _dl_close_worker (struct link_map *map, bool force) + _dl_sort_maps (maps, nloaded, (nsid == LM_ID_BASE), true); + + /* Call all termination functions at once. */ +-#ifdef SHARED +- bool do_audit = GLRO(dl_naudit) > 0 && !ns->_ns_loaded->l_auditing; +-#endif + bool unload_any = false; + bool scope_mem_left = false; + unsigned int unload_global = 0; +@@ -296,22 +293,7 @@ _dl_close_worker (struct link_map *map, bool force) + + #ifdef SHARED + /* Auditing checkpoint: we remove an object. */ +- if (__glibc_unlikely (do_audit)) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->objclose != NULL) +- { +- struct auditstate *state +- = link_map_audit_state (imap, cnt); +- /* Return value is ignored. */ +- (void) afct->objclose (&state->cookie); +- } +- +- afct = afct->next; +- } +- } ++ _dl_audit_objclose (imap); + #endif + + /* This object must not be used anymore. */ +diff --git a/elf/dl-fini.c b/elf/dl-fini.c +index c683884c355dfd52..b789cfb9f2ac6c85 100644 +--- a/elf/dl-fini.c ++++ b/elf/dl-fini.c +@@ -146,21 +146,7 @@ _dl_fini (void) + + #ifdef SHARED + /* Auditing checkpoint: another object closed. */ +- if (!do_audit && __builtin_expect (GLRO(dl_naudit) > 0, 0)) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->objclose != NULL) +- { +- struct auditstate *state +- = link_map_audit_state (l, cnt); +- /* Return value is ignored. */ +- (void) afct->objclose (&state->cookie); +- } +- afct = afct->next; +- } +- } ++ _dl_audit_objclose (l); + #endif + } + +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 1f212a18d7bfc440..982f23c0287955fe 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1394,6 +1394,10 @@ void _dl_audit_activity_nsid (Lmid_t nsid, int action) + namespace identification NSID. */ + void _dl_audit_objopen (struct link_map *l, Lmid_t nsid) + attribute_hidden; ++ ++/* Call the la_objclose from the audit modules for the link_map L. */ ++void _dl_audit_objclose (struct link_map *l) ++ attribute_hidden; + #endif /* SHARED */ + + #if PTHREAD_IN_LIBC && defined SHARED diff --git a/glibc-upstream-2.34-146.patch b/glibc-upstream-2.34-146.patch new file mode 100644 index 0000000..4024ad2 --- /dev/null +++ b/glibc-upstream-2.34-146.patch @@ -0,0 +1,334 @@ +commit b2d99731b6d27c719a30b8ffa931e91c73a6bb4b +Author: Adhemerval Zanella +Date: Tue Jul 20 15:58:35 2021 -0300 + + elf: Add _dl_audit_symbind_alt and _dl_audit_symbind + + It consolidates the code required to call la_symbind{32,64} audit + callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit cda4f265c65fb6c4ce38ca1cf0a7e527c5e77cd5) + +diff --git a/elf/Versions b/elf/Versions +index 2af210b8f771c950..164682eaeaa9a1da 100644 +--- a/elf/Versions ++++ b/elf/Versions +@@ -58,6 +58,7 @@ ld { + _dl_argv; _dl_find_dso_for_object; _dl_get_tls_static_info; + _dl_deallocate_tls; _dl_make_stack_executable; + _dl_rtld_di_serinfo; _dl_starting_up; _dl_fatal_printf; ++ _dl_audit_symbind_alt; + _rtld_global; _rtld_global_ro; + + # Only here for gdb while a better method is developed. +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index cb1c3de93cba447b..a21530f30bc5524b 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -16,6 +16,7 @@ + License along with the GNU C Library; if not, see + . */ + ++#include + #include + + void +@@ -106,3 +107,124 @@ _dl_audit_objclose (struct link_map *l) + afct = afct->next; + } + } ++ ++void ++_dl_audit_symbind_alt (struct link_map *l, const ElfW(Sym) *ref, void **value, ++ lookup_t result) ++{ ++ if ((l->l_audit_any_plt | result->l_audit_any_plt) == 0) ++ return; ++ ++ const char *strtab = (const char *) D_PTR (result, l_info[DT_STRTAB]); ++ /* Compute index of the symbol entry in the symbol table of the DSO with ++ the definition. */ ++ unsigned int ndx = (ref - (ElfW(Sym) *) D_PTR (result, l_info[DT_SYMTAB])); ++ ++ unsigned int altvalue = 0; ++ /* Synthesize a symbol record where the st_value field is the result. */ ++ ElfW(Sym) sym = *ref; ++ sym.st_value = (ElfW(Addr)) *value; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ struct auditstate *match_audit = link_map_audit_state (l, cnt); ++ struct auditstate *result_audit = link_map_audit_state (result, cnt); ++ if (afct->symbind != NULL ++ && ((match_audit->bindflags & LA_FLG_BINDFROM) != 0 ++ || ((result_audit->bindflags & LA_FLG_BINDTO) ++ != 0))) ++ { ++ unsigned int flags = altvalue | LA_SYMB_DLSYM; ++ uintptr_t new_value = afct->symbind (&sym, ndx, ++ &match_audit->cookie, ++ &result_audit->cookie, ++ &flags, strtab + ref->st_name); ++ if (new_value != (uintptr_t) sym.st_value) ++ { ++ altvalue = LA_SYMB_ALTVALUE; ++ sym.st_value = new_value; ++ } ++ ++ afct = afct->next; ++ } ++ ++ *value = (void *) sym.st_value; ++ } ++} ++rtld_hidden_def (_dl_audit_symbind_alt) ++ ++void ++_dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, ++ const ElfW(Sym) *defsym, DL_FIXUP_VALUE_TYPE *value, ++ lookup_t result) ++{ ++ reloc_result->bound = result; ++ /* Compute index of the symbol entry in the symbol table of the DSO with the ++ definition. */ ++ reloc_result->boundndx = (defsym - (ElfW(Sym) *) D_PTR (result, ++ l_info[DT_SYMTAB])); ++ ++ if ((l->l_audit_any_plt | result->l_audit_any_plt) == 0) ++ { ++ /* Set all bits since this symbol binding is not interesting. */ ++ reloc_result->enterexit = (1u << DL_NNS) - 1; ++ return; ++ } ++ ++ /* Synthesize a symbol record where the st_value field is the result. */ ++ ElfW(Sym) sym = *defsym; ++ sym.st_value = DL_FIXUP_VALUE_ADDR (*value); ++ ++ /* Keep track whether there is any interest in tracing the call in the lower ++ two bits. */ ++ assert (DL_NNS * 2 <= sizeof (reloc_result->flags) * 8); ++ assert ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) == 3); ++ reloc_result->enterexit = LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT; ++ ++ const char *strtab2 = (const void *) D_PTR (result, l_info[DT_STRTAB]); ++ ++ unsigned int flags = 0; ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ /* XXX Check whether both DSOs must request action or only one */ ++ struct auditstate *l_state = link_map_audit_state (l, cnt); ++ struct auditstate *result_state = link_map_audit_state (result, cnt); ++ if ((l_state->bindflags & LA_FLG_BINDFROM) != 0 ++ && (result_state->bindflags & LA_FLG_BINDTO) != 0) ++ { ++ if (afct->symbind != NULL) ++ { ++ uintptr_t new_value = afct->symbind (&sym, ++ reloc_result->boundndx, ++ &l_state->cookie, ++ &result_state->cookie, ++ &flags, ++ strtab2 + defsym->st_name); ++ if (new_value != (uintptr_t) sym.st_value) ++ { ++ flags |= LA_SYMB_ALTVALUE; ++ sym.st_value = new_value; ++ } ++ } ++ ++ /* Remember the results for every audit library and store a summary ++ in the first two bits. */ ++ reloc_result->enterexit &= flags & (LA_SYMB_NOPLTENTER ++ | LA_SYMB_NOPLTEXIT); ++ reloc_result->enterexit |= ((flags & (LA_SYMB_NOPLTENTER ++ | LA_SYMB_NOPLTEXIT)) ++ << ((cnt + 1) * 2)); ++ } ++ else ++ /* If the bind flags say this auditor is not interested, set the bits ++ manually. */ ++ reloc_result->enterexit |= ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) ++ << ((cnt + 1) * 2)); ++ afct = afct->next; ++ } ++ ++ reloc_result->flags = flags; ++ *value = DL_FIXUP_ADDR_VALUE (sym.st_value); ++} +diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c +index 61c260ddb81b586c..c4413c9165cec8cb 100644 +--- a/elf/dl-runtime.c ++++ b/elf/dl-runtime.c +@@ -297,84 +297,7 @@ _dl_profile_fixup ( + auditing libraries the possibility to change the value and + tell us whether further auditing is wanted. */ + if (defsym != NULL && GLRO(dl_naudit) > 0) +- { +- reloc_result->bound = result; +- /* Compute index of the symbol entry in the symbol table of +- the DSO with the definition. */ +- reloc_result->boundndx = (defsym +- - (ElfW(Sym) *) D_PTR (result, +- l_info[DT_SYMTAB])); +- +- /* Determine whether any of the two participating DSOs is +- interested in auditing. */ +- if ((l->l_audit_any_plt | result->l_audit_any_plt) != 0) +- { +- unsigned int flags = 0; +- struct audit_ifaces *afct = GLRO(dl_audit); +- /* Synthesize a symbol record where the st_value field is +- the result. */ +- ElfW(Sym) sym = *defsym; +- sym.st_value = DL_FIXUP_VALUE_ADDR (value); +- +- /* Keep track whether there is any interest in tracing +- the call in the lower two bits. */ +- assert (DL_NNS * 2 <= sizeof (reloc_result->flags) * 8); +- assert ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) == 3); +- reloc_result->enterexit = LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT; +- +- const char *strtab2 = (const void *) D_PTR (result, +- l_info[DT_STRTAB]); +- +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- /* XXX Check whether both DSOs must request action or +- only one */ +- struct auditstate *l_state = link_map_audit_state (l, cnt); +- struct auditstate *result_state +- = link_map_audit_state (result, cnt); +- if ((l_state->bindflags & LA_FLG_BINDFROM) != 0 +- && (result_state->bindflags & LA_FLG_BINDTO) != 0) +- { +- if (afct->symbind != NULL) +- { +- uintptr_t new_value +- = afct->symbind (&sym, reloc_result->boundndx, +- &l_state->cookie, +- &result_state->cookie, +- &flags, +- strtab2 + defsym->st_name); +- if (new_value != (uintptr_t) sym.st_value) +- { +- flags |= LA_SYMB_ALTVALUE; +- sym.st_value = new_value; +- } +- } +- +- /* Remember the results for every audit library and +- store a summary in the first two bits. */ +- reloc_result->enterexit +- &= flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT); +- reloc_result->enterexit +- |= ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT)) +- << ((cnt + 1) * 2)); +- } +- else +- /* If the bind flags say this auditor is not interested, +- set the bits manually. */ +- reloc_result->enterexit +- |= ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) +- << ((cnt + 1) * 2)); +- +- afct = afct->next; +- } +- +- reloc_result->flags = flags; +- value = DL_FIXUP_ADDR_VALUE (sym.st_value); +- } +- else +- /* Set all bits since this symbol binding is not interesting. */ +- reloc_result->enterexit = (1u << DL_NNS) - 1; +- } ++ _dl_audit_symbind (l, reloc_result, defsym, &value, result); + #endif + + /* Store the result for later runs. */ +diff --git a/elf/dl-sym-post.h b/elf/dl-sym-post.h +index d68c2d2b1cd43c9b..a11095d3e8c3c937 100644 +--- a/elf/dl-sym-post.h ++++ b/elf/dl-sym-post.h +@@ -52,54 +52,9 @@ _dl_sym_post (lookup_t result, const ElfW(Sym) *ref, void *value, + tell us whether further auditing is wanted. */ + if (__glibc_unlikely (GLRO(dl_naudit) > 0)) + { +- const char *strtab = (const char *) D_PTR (result, +- l_info[DT_STRTAB]); +- /* Compute index of the symbol entry in the symbol table of +- the DSO with the definition. */ +- unsigned int ndx = (ref - (ElfW(Sym) *) D_PTR (result, +- l_info[DT_SYMTAB])); +- + if (match == NULL) + match = _dl_sym_find_caller_link_map (caller); +- +- if ((match->l_audit_any_plt | result->l_audit_any_plt) != 0) +- { +- unsigned int altvalue = 0; +- struct audit_ifaces *afct = GLRO(dl_audit); +- /* Synthesize a symbol record where the st_value field is +- the result. */ +- ElfW(Sym) sym = *ref; +- sym.st_value = (ElfW(Addr)) value; +- +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- struct auditstate *match_audit +- = link_map_audit_state (match, cnt); +- struct auditstate *result_audit +- = link_map_audit_state (result, cnt); +- if (afct->symbind != NULL +- && ((match_audit->bindflags & LA_FLG_BINDFROM) != 0 +- || ((result_audit->bindflags & LA_FLG_BINDTO) +- != 0))) +- { +- unsigned int flags = altvalue | LA_SYMB_DLSYM; +- uintptr_t new_value +- = afct->symbind (&sym, ndx, +- &match_audit->cookie, +- &result_audit->cookie, +- &flags, strtab + ref->st_name); +- if (new_value != (uintptr_t) sym.st_value) +- { +- altvalue = LA_SYMB_ALTVALUE; +- sym.st_value = new_value; +- } +- } +- +- afct = afct->next; +- } +- +- value = (void *) sym.st_value; +- } ++ _dl_audit_symbind_alt (match, ref, &value, result); + } + #endif + return value; +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 982f23c0287955fe..61f1dfb3f79a613a 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1398,6 +1398,16 @@ void _dl_audit_objopen (struct link_map *l, Lmid_t nsid) + /* Call the la_objclose from the audit modules for the link_map L. */ + void _dl_audit_objclose (struct link_map *l) + attribute_hidden; ++ ++/* Call the la_symbind{32,64} from the audit modules for the link_map L. */ ++void _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, ++ const ElfW(Sym) *defsym, DL_FIXUP_VALUE_TYPE *value, ++ lookup_t result) ++ attribute_hidden; ++/* Same as _dl_audit_symbind, but also sets LA_SYMB_DLSYM flag. */ ++void _dl_audit_symbind_alt (struct link_map *l, const ElfW(Sym) *ref, ++ void **value, lookup_t result); ++rtld_hidden_proto (_dl_audit_symbind_alt) + #endif /* SHARED */ + + #if PTHREAD_IN_LIBC && defined SHARED diff --git a/glibc-upstream-2.34-147.patch b/glibc-upstream-2.34-147.patch new file mode 100644 index 0000000..5c93ad3 --- /dev/null +++ b/glibc-upstream-2.34-147.patch @@ -0,0 +1,107 @@ +commit 31473c273be14270f8eef68e35c03fd2305eb2c3 +Author: Adhemerval Zanella +Date: Thu Jul 22 17:10:57 2021 -0300 + + elf: Add _dl_audit_preinit + + It consolidates the code required to call la_preinit audit + callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit 0b98a8748759e88b58927882a8714109abe0a2d6) + +diff --git a/csu/libc-start.c b/csu/libc-start.c +index 0350b006fdcc22d2..d01e57ea59ceb880 100644 +--- a/csu/libc-start.c ++++ b/csu/libc-start.c +@@ -377,32 +377,15 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL), + /* This is a current program. Use the dynamic segment to find + constructors. */ + call_init (argc, argv, __environ); +-#else /* !SHARED */ +- call_init (argc, argv, __environ); +-#endif /* SHARED */ + +-#ifdef SHARED + /* Auditing checkpoint: we have a new object. */ +- if (__glibc_unlikely (GLRO(dl_naudit) > 0)) +- { +- struct audit_ifaces *afct = GLRO(dl_audit); +- struct link_map *head = GL(dl_ns)[LM_ID_BASE]._ns_loaded; +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->preinit != NULL) +- afct->preinit (&link_map_audit_state (head, cnt)->cookie); +- +- afct = afct->next; +- } +- } +-#endif ++ _dl_audit_preinit (GL(dl_ns)[LM_ID_BASE]._ns_loaded); + +-#ifdef SHARED + if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS)) + GLRO(dl_debug_printf) ("\ntransferring control: %s\n\n", argv[0]); +-#endif ++#else /* !SHARED */ ++ call_init (argc, argv, __environ); + +-#ifndef SHARED + _dl_debug_initialize (0, LM_ID_BASE); + #endif + +diff --git a/elf/Versions b/elf/Versions +index 164682eaeaa9a1da..bb6697647b397772 100644 +--- a/elf/Versions ++++ b/elf/Versions +@@ -58,7 +58,7 @@ ld { + _dl_argv; _dl_find_dso_for_object; _dl_get_tls_static_info; + _dl_deallocate_tls; _dl_make_stack_executable; + _dl_rtld_di_serinfo; _dl_starting_up; _dl_fatal_printf; +- _dl_audit_symbind_alt; ++ _dl_audit_symbind_alt; _dl_audit_preinit; + _rtld_global; _rtld_global_ro; + + # Only here for gdb while a better method is developed. +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index a21530f30bc5524b..0b6fac8e48877c93 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -108,6 +108,21 @@ _dl_audit_objclose (struct link_map *l) + } + } + ++void ++_dl_audit_preinit (struct link_map *l) ++{ ++ if (__glibc_likely (GLRO(dl_naudit) == 0)) ++ return; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->preinit != NULL) ++ afct->preinit (&link_map_audit_state (l, cnt)->cookie); ++ afct = afct->next; ++ } ++} ++ + void + _dl_audit_symbind_alt (struct link_map *l, const ElfW(Sym) *ref, void **value, + lookup_t result) +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 61f1dfb3f79a613a..91193a036fc5c6ef 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1399,6 +1399,9 @@ void _dl_audit_objopen (struct link_map *l, Lmid_t nsid) + void _dl_audit_objclose (struct link_map *l) + attribute_hidden; + ++/* Call the la_preinit from the audit modules for the link_map L. */ ++void _dl_audit_preinit (struct link_map *l); ++ + /* Call the la_symbind{32,64} from the audit modules for the link_map L. */ + void _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + const ElfW(Sym) *defsym, DL_FIXUP_VALUE_TYPE *value, diff --git a/glibc-upstream-2.34-148.patch b/glibc-upstream-2.34-148.patch new file mode 100644 index 0000000..11ffb9f --- /dev/null +++ b/glibc-upstream-2.34-148.patch @@ -0,0 +1,206 @@ +commit fd9c4e8a1b72fa1372855051217f9480680d882a +Author: Adhemerval Zanella +Date: Thu Jul 22 17:45:33 2021 -0300 + + elf: Add _dl_audit_pltenter + + It consolidates the code required to call la_pltenter audit + callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit eff687e8462b0eaf65992a6031b54a4b1cd16796) + +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 0b6fac8e48877c93..15250c67e8ac1658 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -17,7 +17,9 @@ + . */ + + #include ++#include + #include ++#include + + void + _dl_audit_activity_map (struct link_map *l, int action) +@@ -243,3 +245,78 @@ _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + reloc_result->flags = flags; + *value = DL_FIXUP_ADDR_VALUE (sym.st_value); + } ++ ++void ++_dl_audit_pltenter (struct link_map *l, struct reloc_result *reloc_result, ++ DL_FIXUP_VALUE_TYPE *value, void *regs, long int *framesize) ++{ ++ /* Don't do anything if no auditor wants to intercept this call. */ ++ if (GLRO(dl_naudit) == 0 ++ || (reloc_result->enterexit & LA_SYMB_NOPLTENTER)) ++ return; ++ ++ /* Sanity check: DL_FIXUP_VALUE_CODE_ADDR (value) should have been ++ initialized earlier in this function or in another thread. */ ++ assert (DL_FIXUP_VALUE_CODE_ADDR (*value) != 0); ++ ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound, ++ l_info[DT_SYMTAB]) ++ + reloc_result->boundndx); ++ ++ /* Set up the sym parameter. */ ++ ElfW(Sym) sym = *defsym; ++ sym.st_value = DL_FIXUP_VALUE_ADDR (*value); ++ ++ /* Get the symbol name. */ ++ const char *strtab = (const void *) D_PTR (reloc_result->bound, ++ l_info[DT_STRTAB]); ++ const char *symname = strtab + sym.st_name; ++ ++ /* Keep track of overwritten addresses. */ ++ unsigned int flags = reloc_result->flags; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->ARCH_LA_PLTENTER != NULL ++ && (reloc_result->enterexit ++ & (LA_SYMB_NOPLTENTER << (2 * (cnt + 1)))) == 0) ++ { ++ long int new_framesize = -1; ++ struct auditstate *l_state = link_map_audit_state (l, cnt); ++ struct auditstate *bound_state ++ = link_map_audit_state (reloc_result->bound, cnt); ++ uintptr_t new_value ++ = afct->ARCH_LA_PLTENTER (&sym, reloc_result->boundndx, ++ &l_state->cookie, &bound_state->cookie, ++ regs, &flags, symname, &new_framesize); ++ if (new_value != (uintptr_t) sym.st_value) ++ { ++ flags |= LA_SYMB_ALTVALUE; ++ sym.st_value = new_value; ++ } ++ ++ /* Remember the results for every audit library and store a summary ++ in the first two bits. */ ++ reloc_result->enterexit |= ((flags & (LA_SYMB_NOPLTENTER ++ | LA_SYMB_NOPLTEXIT)) ++ << (2 * (cnt + 1))); ++ ++ if ((reloc_result->enterexit & (LA_SYMB_NOPLTEXIT ++ << (2 * (cnt + 1)))) ++ == 0 && new_framesize != -1 && *framesize != -2) ++ { ++ /* If this is the first call providing information, use it. */ ++ if (*framesize == -1) ++ *framesize = new_framesize; ++ /* If two pltenter calls provide conflicting information, use ++ the larger value. */ ++ else if (new_framesize != *framesize) ++ *framesize = MAX (new_framesize, *framesize); ++ } ++ } ++ ++ afct = afct->next; ++ } ++ ++ *value = DL_FIXUP_ADDR_VALUE (sym.st_value); ++} +diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c +index c4413c9165cec8cb..dfedeaf2dd1c7253 100644 +--- a/elf/dl-runtime.c ++++ b/elf/dl-runtime.c +@@ -320,78 +320,7 @@ _dl_profile_fixup ( + #ifdef SHARED + /* Auditing checkpoint: report the PLT entering and allow the + auditors to change the value. */ +- if (GLRO(dl_naudit) > 0 +- /* Don't do anything if no auditor wants to intercept this call. */ +- && (reloc_result->enterexit & LA_SYMB_NOPLTENTER) == 0) +- { +- /* Sanity check: DL_FIXUP_VALUE_CODE_ADDR (value) should have been +- initialized earlier in this function or in another thread. */ +- assert (DL_FIXUP_VALUE_CODE_ADDR (value) != 0); +- ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound, +- l_info[DT_SYMTAB]) +- + reloc_result->boundndx); +- +- /* Set up the sym parameter. */ +- ElfW(Sym) sym = *defsym; +- sym.st_value = DL_FIXUP_VALUE_ADDR (value); +- +- /* Get the symbol name. */ +- const char *strtab = (const void *) D_PTR (reloc_result->bound, +- l_info[DT_STRTAB]); +- const char *symname = strtab + sym.st_name; +- +- /* Keep track of overwritten addresses. */ +- unsigned int flags = reloc_result->flags; +- +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->ARCH_LA_PLTENTER != NULL +- && (reloc_result->enterexit +- & (LA_SYMB_NOPLTENTER << (2 * (cnt + 1)))) == 0) +- { +- long int new_framesize = -1; +- struct auditstate *l_state = link_map_audit_state (l, cnt); +- struct auditstate *bound_state +- = link_map_audit_state (reloc_result->bound, cnt); +- uintptr_t new_value +- = afct->ARCH_LA_PLTENTER (&sym, reloc_result->boundndx, +- &l_state->cookie, +- &bound_state->cookie, +- regs, &flags, symname, +- &new_framesize); +- if (new_value != (uintptr_t) sym.st_value) +- { +- flags |= LA_SYMB_ALTVALUE; +- sym.st_value = new_value; +- } +- +- /* Remember the results for every audit library and +- store a summary in the first two bits. */ +- reloc_result->enterexit +- |= ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT)) +- << (2 * (cnt + 1))); +- +- if ((reloc_result->enterexit & (LA_SYMB_NOPLTEXIT +- << (2 * (cnt + 1)))) +- == 0 && new_framesize != -1 && framesize != -2) +- { +- /* If this is the first call providing information, +- use it. */ +- if (framesize == -1) +- framesize = new_framesize; +- /* If two pltenter calls provide conflicting information, +- use the larger value. */ +- else if (new_framesize != framesize) +- framesize = MAX (new_framesize, framesize); +- } +- } +- +- afct = afct->next; +- } +- +- value = DL_FIXUP_ADDR_VALUE (sym.st_value); +- } ++ _dl_audit_pltenter (l, reloc_result, &value, regs, &framesize); + #endif + + /* Store the frame size information. */ +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 91193a036fc5c6ef..ea187dd266f14e06 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1411,6 +1411,10 @@ void _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + void _dl_audit_symbind_alt (struct link_map *l, const ElfW(Sym) *ref, + void **value, lookup_t result); + rtld_hidden_proto (_dl_audit_symbind_alt) ++void _dl_audit_pltenter (struct link_map *l, struct reloc_result *reloc_result, ++ DL_FIXUP_VALUE_TYPE *value, void *regs, ++ long int *framesize) ++ attribute_hidden; + #endif /* SHARED */ + + #if PTHREAD_IN_LIBC && defined SHARED diff --git a/glibc-upstream-2.34-149.patch b/glibc-upstream-2.34-149.patch new file mode 100644 index 0000000..927d0e7 --- /dev/null +++ b/glibc-upstream-2.34-149.patch @@ -0,0 +1,715 @@ +commit a8e211daea6bdb505b10319ed3492e7d871c1e75 +Author: Adhemerval Zanella +Date: Thu Jul 22 18:02:42 2021 -0300 + + elf: Add _dl_audit_pltexit + + It consolidates the code required to call la_pltexit audit + callback. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit 8c0664e2b861fd3789602cc0b0b1922b0e20cb3a) + + Resolved conflicts: + sysdeps/hppa/dl-runtime.c + +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 15250c67e8ac1658..152712b12fed6de2 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -20,6 +20,8 @@ + #include + #include + #include ++#include ++#include + + void + _dl_audit_activity_map (struct link_map *l, int action) +@@ -320,3 +322,48 @@ _dl_audit_pltenter (struct link_map *l, struct reloc_result *reloc_result, + + *value = DL_FIXUP_ADDR_VALUE (sym.st_value); + } ++ ++void ++DL_ARCH_FIXUP_ATTRIBUTE ++_dl_audit_pltexit (struct link_map *l, ElfW(Word) reloc_arg, ++ const void *inregs, void *outregs) ++{ ++ const uintptr_t pltgot = (uintptr_t) D_PTR (l, l_info[DT_PLTGOT]); ++ ++ /* This is the address in the array where we store the result of previous ++ relocations. */ ++ // XXX Maybe the bound information must be stored on the stack since ++ // XXX with bind_not a new value could have been stored in the meantime. ++ struct reloc_result *reloc_result = ++ &l->l_reloc_result[reloc_index (pltgot, reloc_arg, sizeof (PLTREL))]; ++ ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound, ++ l_info[DT_SYMTAB]) ++ + reloc_result->boundndx); ++ ++ /* Set up the sym parameter. */ ++ ElfW(Sym) sym = *defsym; ++ sym.st_value = DL_FIXUP_VALUE_ADDR (reloc_result->addr); ++ ++ /* Get the symbol name. */ ++ const char *strtab = (const void *) D_PTR (reloc_result->bound, ++ l_info[DT_STRTAB]); ++ const char *symname = strtab + sym.st_name; ++ ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ if (afct->ARCH_LA_PLTEXIT != NULL ++ && (reloc_result->enterexit ++ & (LA_SYMB_NOPLTEXIT >> (2 * cnt))) == 0) ++ { ++ struct auditstate *l_state = link_map_audit_state (l, cnt); ++ struct auditstate *bound_state ++ = link_map_audit_state (reloc_result->bound, cnt); ++ afct->ARCH_LA_PLTEXIT (&sym, reloc_result->boundndx, ++ &l_state->cookie, &bound_state->cookie, ++ inregs, outregs, symname); ++ } ++ ++ afct = afct->next; ++ } ++} +diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c +index dfedeaf2dd1c7253..e42f6e8b8dfca08e 100644 +--- a/elf/dl-runtime.c ++++ b/elf/dl-runtime.c +@@ -16,8 +16,6 @@ + License along with the GNU C Library; if not, see + . */ + +-#define IN_DL_RUNTIME 1 /* This can be tested in dl-machine.h. */ +- + #include + #include + #include +@@ -31,19 +29,6 @@ + #include + + +-#if (!ELF_MACHINE_NO_RELA && !defined ELF_MACHINE_PLT_REL) \ +- || ELF_MACHINE_NO_REL +-# define PLTREL ElfW(Rela) +-#else +-# define PLTREL ElfW(Rel) +-#endif +- +-/* The fixup functions might have need special attributes. If none +- are provided define the macro as empty. */ +-#ifndef ARCH_FIXUP_ATTRIBUTE +-# define ARCH_FIXUP_ATTRIBUTE +-#endif +- + /* This function is called through a special trampoline from the PLT the + first time each PLT entry is called. We must perform the relocation + specified in the PLT of the given shared object, and return the resolved +@@ -52,7 +37,7 @@ + function. */ + + DL_FIXUP_VALUE_TYPE +-attribute_hidden __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE ++attribute_hidden __attribute ((noinline)) DL_ARCH_FIXUP_ATTRIBUTE + _dl_fixup ( + # ifdef ELF_MACHINE_RUNTIME_FIXUP_ARGS + ELF_MACHINE_RUNTIME_FIXUP_ARGS, +@@ -148,7 +133,8 @@ _dl_fixup ( + + #ifndef PROF + DL_FIXUP_VALUE_TYPE +-__attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE ++__attribute ((noinline)) ++DL_ARCH_FIXUP_ATTRIBUTE + _dl_profile_fixup ( + #ifdef ELF_MACHINE_RUNTIME_FIXUP_ARGS + ELF_MACHINE_RUNTIME_FIXUP_ARGS, +@@ -332,52 +318,3 @@ _dl_profile_fixup ( + } + + #endif /* PROF */ +- +- +-#include +-void +-ARCH_FIXUP_ATTRIBUTE +-_dl_call_pltexit (struct link_map *l, ElfW(Word) reloc_arg, +- const void *inregs, void *outregs) +-{ +-#ifdef SHARED +- const uintptr_t pltgot = (uintptr_t) D_PTR (l, l_info[DT_PLTGOT]); +- +- /* This is the address in the array where we store the result of previous +- relocations. */ +- // XXX Maybe the bound information must be stored on the stack since +- // XXX with bind_not a new value could have been stored in the meantime. +- struct reloc_result *reloc_result = +- &l->l_reloc_result[reloc_index (pltgot, reloc_arg, sizeof (PLTREL))]; +- ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound, +- l_info[DT_SYMTAB]) +- + reloc_result->boundndx); +- +- /* Set up the sym parameter. */ +- ElfW(Sym) sym = *defsym; +- sym.st_value = DL_FIXUP_VALUE_ADDR (reloc_result->addr); +- +- /* Get the symbol name. */ +- const char *strtab = (const void *) D_PTR (reloc_result->bound, +- l_info[DT_STRTAB]); +- const char *symname = strtab + sym.st_name; +- +- struct audit_ifaces *afct = GLRO(dl_audit); +- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) +- { +- if (afct->ARCH_LA_PLTEXIT != NULL +- && (reloc_result->enterexit +- & (LA_SYMB_NOPLTEXIT >> (2 * cnt))) == 0) +- { +- struct auditstate *l_state = link_map_audit_state (l, cnt); +- struct auditstate *bound_state +- = link_map_audit_state (reloc_result->bound, cnt); +- afct->ARCH_LA_PLTEXIT (&sym, reloc_result->boundndx, +- &l_state->cookie, &bound_state->cookie, +- inregs, outregs, symname); +- } +- +- afct = afct->next; +- } +-#endif +-} +diff --git a/elf/dl-support.c b/elf/dl-support.c +index c5ee5d33aa7e1d65..f29dc965f4d10648 100644 +--- a/elf/dl-support.c ++++ b/elf/dl-support.c +@@ -437,3 +437,11 @@ _dl_get_dl_main_map (void) + return &_dl_main_map; + } + #endif ++ ++/* This is used by _dl_runtime_profile, not used on static code. */ ++void ++DL_ARCH_FIXUP_ATTRIBUTE ++_dl_audit_pltexit (struct link_map *l, ElfW(Word) reloc_arg, ++ const void *inregs, void *outregs) ++{ ++} +diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S +index a7e9267c1c6a4863..9b352b1d0f7d62e7 100644 +--- a/sysdeps/aarch64/dl-trampoline.S ++++ b/sysdeps/aarch64/dl-trampoline.S +@@ -293,7 +293,7 @@ _dl_runtime_profile: + ldp x0, x1, [x29, #OFFSET_SAVED_CALL_X0] + add x2, x29, #OFFSET_RG + add x3, x29, #OFFSET_RV +- bl _dl_call_pltexit ++ bl _dl_audit_pltexit + + ldp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0] + ldp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0] +diff --git a/sysdeps/alpha/dl-trampoline.S b/sysdeps/alpha/dl-trampoline.S +index 9dfce5b0839dc122..55380d48ad8536ee 100644 +--- a/sysdeps/alpha/dl-trampoline.S ++++ b/sysdeps/alpha/dl-trampoline.S +@@ -187,7 +187,7 @@ _dl_runtime_profile_new: + jsr $26, ($27), 0 + ldgp $29, 0($26) + +- /* Set up for call to _dl_call_pltexit. */ ++ /* Set up for call to _dl_audit_pltexit. */ + ldq $16, 16*8($15) + ldq $17, 17*8($15) + stq $0, 16*8($15) +@@ -196,7 +196,7 @@ _dl_runtime_profile_new: + lda $19, 16*8($15) + stt $f0, 18*8($15) + stt $f1, 19*8($15) +- bsr $26, _dl_call_pltexit !samegp ++ bsr $26, _dl_audit_pltexit !samegp + + mov $15, $30 + cfi_def_cfa_register (30) +@@ -518,7 +518,7 @@ _dl_runtime_profile_old: + jsr $26, ($27), 0 + ldgp $29, 0($26) + +- /* Set up for call to _dl_call_pltexit. */ ++ /* Set up for call to _dl_audit_pltexit. */ + ldq $16, 48*8($15) + ldq $17, 49*8($15) + stq $0, 46*8($15) +@@ -527,7 +527,7 @@ _dl_runtime_profile_old: + lda $19, 46*8($15) + stt $f0, 48*8($15) + stt $f1, 49*8($15) +- bsr $26, _dl_call_pltexit !samegp ++ bsr $26, _dl_audit_pltexit !samegp + + mov $15, $30 + cfi_def_cfa_register (30) +diff --git a/sysdeps/arm/dl-machine-rel.h b/sysdeps/arm/dl-machine-rel.h +index bec114706cd027a4..a9ee25a6b1d381ac 100644 +--- a/sysdeps/arm/dl-machine-rel.h ++++ b/sysdeps/arm/dl-machine-rel.h +@@ -28,4 +28,6 @@ + Prelinked libraries may use Elf32_Rela though. */ + #define ELF_MACHINE_PLT_REL 1 + ++#define PLTREL ElfW(Rel) ++ + #endif +diff --git a/sysdeps/arm/dl-trampoline.S b/sysdeps/arm/dl-trampoline.S +index 70105308ca7df934..a2d322706db77981 100644 +--- a/sysdeps/arm/dl-trampoline.S ++++ b/sysdeps/arm/dl-trampoline.S +@@ -194,7 +194,7 @@ _dl_runtime_profile: + ldmia ip, {r0,r1} + add r2, r7, #72 + add r3, r7, #0 +- bl _dl_call_pltexit ++ bl _dl_audit_pltexit + + @ Return to caller. + ldmia r7, {r0-r3} +diff --git a/sysdeps/generic/dl-fixup-attribute.h b/sysdeps/generic/dl-fixup-attribute.h +new file mode 100644 +index 0000000000000000..aa92169b709b3fea +--- /dev/null ++++ b/sysdeps/generic/dl-fixup-attribute.h +@@ -0,0 +1,24 @@ ++/* ABI specifics for lazy resolution functions. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_FIXUP_ATTRIBUTE_H ++#define _DL_FIXUP_ATTRIBUTE_H ++ ++#define DL_ARCH_FIXUP_ATTRIBUTE ++ ++#endif +diff --git a/sysdeps/generic/dl-machine-rel.h b/sysdeps/generic/dl-machine-rel.h +index 9167a1dffc715704..9d5b7bb749e69e63 100644 +--- a/sysdeps/generic/dl-machine-rel.h ++++ b/sysdeps/generic/dl-machine-rel.h +@@ -23,5 +23,7 @@ + #define ELF_MACHINE_NO_REL 1 + /* Defined if the architecture supports Elf{32,64}_Rela relocations. */ + #define ELF_MACHINE_NO_RELA 0 ++/* Used to calculate the index of link_map l_reloc_result. */ ++#define PLTREL ElfW(Rela) + + #endif +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index ea187dd266f14e06..686f0a7b9709eb10 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1415,6 +1416,11 @@ void _dl_audit_pltenter (struct link_map *l, struct reloc_result *reloc_result, + DL_FIXUP_VALUE_TYPE *value, void *regs, + long int *framesize) + attribute_hidden; ++void DL_ARCH_FIXUP_ATTRIBUTE _dl_audit_pltexit (struct link_map *l, ++ ElfW(Word) reloc_arg, ++ const void *inregs, ++ void *outregs) ++ attribute_hidden; + #endif /* SHARED */ + + #if PTHREAD_IN_LIBC && defined SHARED +diff --git a/sysdeps/hppa/dl-runtime.c b/sysdeps/hppa/dl-runtime.c +index a71b5b2013abf723..8699171930f51489 100644 +--- a/sysdeps/hppa/dl-runtime.c ++++ b/sysdeps/hppa/dl-runtime.c +@@ -25,7 +25,7 @@ + return that to the caller. The caller will continue on to call + _dl_fixup with the relocation offset. */ + +-ElfW(Word) __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE ++ElfW(Word) __attribute ((noinline)) DL_ARCH_FIXUP_ATTRIBUTE + _dl_fix_reloc_arg (struct fdesc *fptr, struct link_map *l) + { + Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type; +diff --git a/sysdeps/hppa/dl-trampoline.S b/sysdeps/hppa/dl-trampoline.S +index cb18ea7eabba41ed..c54879bae0148012 100644 +--- a/sysdeps/hppa/dl-trampoline.S ++++ b/sysdeps/hppa/dl-trampoline.S +@@ -300,7 +300,7 @@ L(cont): + ldw -4(%sp),%r1 + copy %r1, %sp + +- /* Arguments to _dl_call_pltexit */ ++ /* Arguments to _dl_audit_pltexit */ + ldw -116(%sp), %r26 /* (1) got[1] == struct link_map */ + ldw -120(%sp), %r25 /* (2) reloc offsets */ + ldo -56(%sp), %r24 /* (3) *La_hppa_regs */ +@@ -312,8 +312,8 @@ L(cont): + ldo -128(%sp), %r1 + fstd %fr4,0(%r1) + +- /* Call _dl_call_pltexit */ +- bl _dl_call_pltexit,%rp ++ /* Call _dl_audit_pltexit */ ++ bl _dl_audit_pltexit,%rp + nop + + /* Restore *La_hppa_retval */ +diff --git a/sysdeps/i386/dl-fixup-attribute.h b/sysdeps/i386/dl-fixup-attribute.h +new file mode 100644 +index 0000000000000000..c10e9936f4db7254 +--- /dev/null ++++ b/sysdeps/i386/dl-fixup-attribute.h +@@ -0,0 +1,30 @@ ++/* ABI specifics for lazy resolution functions. i386 version. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_FIXUP_ATTRIBUTE_H ++#define _DL_FIXUP_ATTRIBUTE_H ++ ++/* We cannot use this scheme for profiling because the _mcount call destroys ++ the passed register information. */ ++#ifndef PROF ++# define DL_ARCH_FIXUP_ATTRIBUTE __attribute__ ((regparm (3), stdcall, unused)) ++#else ++# define DL_ARCH_FIXUP_ATTRIBUTE ++#endif ++ ++#endif +diff --git a/sysdeps/i386/dl-machine-rel.h b/sysdeps/i386/dl-machine-rel.h +index 7ac46f78a69fbf98..bb3480d45415d761 100644 +--- a/sysdeps/i386/dl-machine-rel.h ++++ b/sysdeps/i386/dl-machine-rel.h +@@ -28,4 +28,6 @@ + Prelinked libraries may use Elf32_Rela though. */ + #define ELF_MACHINE_PLT_REL 1 + ++#define PLTREL ElfW(Rel) ++ + #endif +diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h +index c55c9a3d64bed1f2..5483e903d81e85c6 100644 +--- a/sysdeps/i386/dl-machine.h ++++ b/sysdeps/i386/dl-machine.h +@@ -122,29 +122,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + return lazy; + } + +-#ifdef IN_DL_RUNTIME +- +-# ifndef PROF +-/* We add a declaration of this function here so that in dl-runtime.c +- the ELF_MACHINE_RUNTIME_TRAMPOLINE macro really can pass the parameters +- in registers. +- +- We cannot use this scheme for profiling because the _mcount call +- destroys the passed register information. */ +-#define ARCH_FIXUP_ATTRIBUTE __attribute__ ((regparm (3), stdcall, unused)) +- +-extern ElfW(Addr) _dl_fixup (struct link_map *l, +- ElfW(Word) reloc_offset) +- ARCH_FIXUP_ATTRIBUTE; +-extern ElfW(Addr) _dl_profile_fixup (struct link_map *l, +- ElfW(Word) reloc_offset, +- ElfW(Addr) retaddr, void *regs, +- long int *framesizep) +- ARCH_FIXUP_ATTRIBUTE; +-# endif +- +-#endif +- + /* Mask identifying addresses reserved for the user program, + where the dynamic linker should not map anything. */ + #define ELF_MACHINE_USER_ADDRESS_MASK 0xf0000000UL +diff --git a/sysdeps/i386/dl-trampoline.S b/sysdeps/i386/dl-trampoline.S +index b5ec0326df94f0fd..3a33051c52da9cde 100644 +--- a/sysdeps/i386/dl-trampoline.S ++++ b/sysdeps/i386/dl-trampoline.S +@@ -265,7 +265,7 @@ _dl_runtime_profile: + movl (LRV_SIZE + 4 + LR_SIZE)(%esp), %eax + # PLT1 + movl (LRV_SIZE + 4 + LR_SIZE + 4)(%esp), %edx +- call _dl_call_pltexit ++ call _dl_audit_pltexit + movl LRV_EAX_OFFSET(%esp), %eax + movl LRV_EDX_OFFSET(%esp), %edx + fldt LRV_ST1_OFFSET(%esp) +diff --git a/sysdeps/ia64/dl-trampoline.S b/sysdeps/ia64/dl-trampoline.S +index 3053405a3a21d62e..11e86932c75d5b6b 100644 +--- a/sysdeps/ia64/dl-trampoline.S ++++ b/sysdeps/ia64/dl-trampoline.S +@@ -133,7 +133,7 @@ END(_dl_runtime_resolve) + + + /* The fourth argument to _dl_profile_fixup and the third one to +- _dl_call_pltexit are a pointer to La_ia64_regs: ++ _dl_audit_pltexit are a pointer to La_ia64_regs: + + 8byte r8 + 8byte r9 +@@ -159,7 +159,7 @@ END(_dl_runtime_resolve) + 8byte sp + + The fifth argument to _dl_profile_fixup is a pointer to long int. +- The fourth argument to _dl_call_pltexit is a pointer to ++ The fourth argument to _dl_audit_pltexit is a pointer to + La_ia64_retval: + + 8byte r8 +@@ -261,7 +261,7 @@ ENTRY(_dl_runtime_profile) + } + { .mii + mov r18 = ar.unat /* save it in La_ia64_regs */ +- mov loc7 = out3 /* save it for _dl_call_pltexit */ ++ mov loc7 = out3 /* save it for _dl_audit_pltexit */ + mov loc5 = r11 /* preserve language specific register */ + } + { .mmi +@@ -272,7 +272,7 @@ ENTRY(_dl_runtime_profile) + } + { .mii + mov ar.unat = r17 /* restore it for function call */ +- mov loc8 = r16 /* save it for _dl_call_pltexit */ ++ mov loc8 = r16 /* save it for _dl_audit_pltexit */ + nop.i 0x0 + } + { .mmi +@@ -291,7 +291,7 @@ ENTRY(_dl_runtime_profile) + { .mmi + stf.spill [r2] = f14, 32 + stf.spill [r3] = f15, 24 +- mov loc9 = out1 /* save it for _dl_call_pltexit */ ++ mov loc9 = out1 /* save it for _dl_audit_pltexit */ + ;; + } + { .mmb +@@ -426,7 +426,7 @@ ENTRY(_dl_runtime_profile) + br.call.sptk.many b0 = b6 + } + { .mii +- /* Prepare stack for _dl_call_pltexit. Loc10 has the original ++ /* Prepare stack for _dl_audit_pltexit. Loc10 has the original + stack pointer. */ + adds r12 = -PLTEXIT_FRAME_SIZE, loc10 + adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10 +@@ -461,14 +461,14 @@ ENTRY(_dl_runtime_profile) + { .mmi + stf.spill [r2] = f12, 32 + stf.spill [r3] = f13, 32 +- /* We need to restore gp for _dl_call_pltexit. */ ++ /* We need to restore gp for _dl_audit_pltexit. */ + mov gp = loc11 + ;; + } + { .mmb + stf.spill [r2] = f14 + stf.spill [r3] = f15 +- br.call.sptk.many b0 = _dl_call_pltexit ++ br.call.sptk.many b0 = _dl_audit_pltexit + } + { .mmi + /* Load all the non-floating and floating return values. Skip +diff --git a/sysdeps/m68k/dl-trampoline.S b/sysdeps/m68k/dl-trampoline.S +index a51a5f7f573c6330..72bde664c31c4256 100644 +--- a/sysdeps/m68k/dl-trampoline.S ++++ b/sysdeps/m68k/dl-trampoline.S +@@ -202,7 +202,7 @@ _dl_runtime_profile: + cfi_adjust_cfa_offset (4) + move.l (32+FPSPACE)(%sp), -(%sp) + cfi_adjust_cfa_offset (4) +- jbsr _dl_call_pltexit ++ jbsr _dl_audit_pltexit + lea 16(%sp), %sp + cfi_adjust_cfa_offset (-16) + move.l (%sp)+, %d0 +diff --git a/sysdeps/mips/dl-machine-rel.h b/sysdeps/mips/dl-machine-rel.h +index ed396180412bc723..3d0dfec01f6b193e 100644 +--- a/sysdeps/mips/dl-machine-rel.h ++++ b/sysdeps/mips/dl-machine-rel.h +@@ -22,5 +22,6 @@ + #define ELF_MACHINE_PLT_REL 1 + #define ELF_MACHINE_NO_REL 0 + #define ELF_MACHINE_NO_RELA 0 ++#define PLTREL ElfW(Rel) + + #endif +diff --git a/sysdeps/powerpc/powerpc64/dl-trampoline.S b/sysdeps/powerpc/powerpc64/dl-trampoline.S +index 61bd8571fcc93caa..97f0105ce780514e 100644 +--- a/sysdeps/powerpc/powerpc64/dl-trampoline.S ++++ b/sysdeps/powerpc/powerpc64/dl-trampoline.S +@@ -197,7 +197,7 @@ END(_dl_runtime_resolve) + #ifndef PROF + ENTRY (_dl_profile_resolve, 4) + /* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we +- need to call _dl_call_pltexit. */ ++ need to call _dl_audit_pltexit. */ + std r31,-8(r1) + std r30,-16(r1) + /* We need to save the registers used to pass parameters, ie. r3 thru +@@ -452,7 +452,7 @@ L(restoreFXR2): + L(callpltexit): + addi r5,r1,INT_PARMS + addi r6,r1,INT_RTN +- bl JUMPTARGET(_dl_call_pltexit) ++ bl JUMPTARGET(_dl_audit_pltexit) + #ifndef SHARED + nop + #endif +diff --git a/sysdeps/s390/s390-32/dl-trampoline.h b/sysdeps/s390/s390-32/dl-trampoline.h +index c224a2b92832af9b..9e4cd1055fe6ab20 100644 +--- a/sysdeps/s390/s390-32/dl-trampoline.h ++++ b/sysdeps/s390/s390-32/dl-trampoline.h +@@ -282,7 +282,7 @@ _dl_runtime_profile: + basr %r1,0 + 5: l %r14,7f-5b(%r1) + la %r5,CFA_OFF+RETVAL_OFF(%r12) # struct La_s390_32_retval * +- bas %r14,0(%r14,%r1) # call _dl_call_pltexit ++ bas %r14,0(%r14,%r1) # call _dl_audit_pltexit + + lr %r15,%r12 # remove stack frame + # undef FRAME_SIZE +@@ -301,7 +301,7 @@ _dl_runtime_profile: + br %r14 + + 6: .long _dl_profile_fixup - 0b +-7: .long _dl_call_pltexit - 5b ++7: .long _dl_audit_pltexit - 5b + cfi_endproc + .size _dl_runtime_profile, .-_dl_runtime_profile + # undef SIZEOF_STRUCT_LA_S390_32_REGS +diff --git a/sysdeps/s390/s390-64/dl-trampoline.h b/sysdeps/s390/s390-64/dl-trampoline.h +index ae741a3bad5ec77e..6e5bad40459ec765 100644 +--- a/sysdeps/s390/s390-64/dl-trampoline.h ++++ b/sysdeps/s390/s390-64/dl-trampoline.h +@@ -284,7 +284,7 @@ _dl_runtime_profile: + lmg %r2,%r4,CFA_OFF+PLT1_OFF(%r12) # r2, r3: args saved by PLT + # r4: struct La_s390_64_regs * + la %r5,CFA_OFF+RETVAL_OFF(%r12) # struct La_s390_64_retval * +- brasl %r14,_dl_call_pltexit ++ brasl %r14,_dl_audit_pltexit + + lgr %r15,%r12 # remove stack frame + # undef FRAME_SIZE +diff --git a/sysdeps/sh/dl-trampoline.S b/sysdeps/sh/dl-trampoline.S +index 824ac84ba1830ce5..f9038cd10ed5286f 100644 +--- a/sysdeps/sh/dl-trampoline.S ++++ b/sysdeps/sh/dl-trampoline.S +@@ -423,8 +423,8 @@ _dl_runtime_profile: + .align 2 + #ifdef SHARED + 7: .long _GLOBAL_OFFSET_TABLE_ +-8: .long _dl_call_pltexit@GOTOFF ++8: .long _dl_audit_pltexit@GOTOFF + #else +-8: .long _dl_call_pltexit ++8: .long _dl_audit_pltexit + #endif + .size _dl_runtime_profile, .-_dl_runtime_profile +diff --git a/sysdeps/sparc/sparc32/dl-trampoline.S b/sysdeps/sparc/sparc32/dl-trampoline.S +index 426f90c99a7ed369..2f64809731c865a2 100644 +--- a/sysdeps/sparc/sparc32/dl-trampoline.S ++++ b/sysdeps/sparc/sparc32/dl-trampoline.S +@@ -127,7 +127,7 @@ _dl_profile_invoke: + mov %l5, %o0 + mov %l6, %o1 + add %sp, (11 * 8), %o2 +- call _dl_call_pltexit ++ call _dl_audit_pltexit + add %sp, ( 9 * 8), %o3 + + ldd [%sp + ( 9 * 8)], %i0 +diff --git a/sysdeps/sparc/sparc64/dl-trampoline.S b/sysdeps/sparc/sparc64/dl-trampoline.S +index 8d59fa67209cd8ab..86605e37acd929fd 100644 +--- a/sysdeps/sparc/sparc64/dl-trampoline.S ++++ b/sysdeps/sparc/sparc64/dl-trampoline.S +@@ -196,7 +196,7 @@ _dl_profile_invoke: + mov %l5, %o0 + mov %l6, %o1 + add %sp, STACK_BIAS + (24 * 8), %o2 +- call _dl_call_pltexit ++ call _dl_audit_pltexit + add %sp, STACK_BIAS + (16 * 8), %o3 + + ldx [%sp + STACK_BIAS + (16 * 8)], %i0 +diff --git a/sysdeps/x86_64/dl-runtime.h b/sysdeps/x86_64/dl-runtime.h +index 9c8d3977eee27069..19ba33ef30970c20 100644 +--- a/sysdeps/x86_64/dl-runtime.h ++++ b/sysdeps/x86_64/dl-runtime.h +@@ -18,7 +18,7 @@ + 02111-1307 USA. */ + + /* The ABI calls for the PLT stubs to pass the index of the relocation +- and not its offset. In _dl_profile_fixup and _dl_call_pltexit we ++ and not its offset. In _dl_profile_fixup and _dl_audit_pltexit we + also use the index. Therefore it is wasteful to compute the offset + in the trampoline just to reverse the operation immediately + afterwards. */ +diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h +index b9a12970cd6206ee..b5de7efff778559e 100644 +--- a/sysdeps/x86_64/dl-trampoline.h ++++ b/sysdeps/x86_64/dl-trampoline.h +@@ -388,7 +388,7 @@ _dl_runtime_profile: + jns 3f + + /* There's nothing in the frame size, so there +- will be no call to the _dl_call_pltexit. */ ++ will be no call to the _dl_audit_pltexit. */ + + /* Get back registers content. */ + movq LR_RCX_OFFSET(%rsp), %rcx +@@ -436,7 +436,7 @@ _dl_runtime_profile: + mov 24(%rbx), %RSP_LP # Drop the copied stack content + + /* Now we have to prepare the La_x86_64_retval structure for the +- _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, ++ _dl_audit_pltexit. The La_x86_64_regs is being pointed by rsp now, + so we just need to allocate the sizeof(La_x86_64_retval) space on + the stack, since the alignment has already been taken care of. */ + # ifdef RESTORE_AVX +@@ -491,7 +491,7 @@ _dl_runtime_profile: + movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. + movq 40(%rbx), %rsi # Copy args pushed by PLT in register. + movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index +- call _dl_call_pltexit ++ call _dl_audit_pltexit + + /* Restore return registers. */ + movq LRV_RAX_OFFSET(%rsp), %rax diff --git a/glibc-upstream-2.34-150.patch b/glibc-upstream-2.34-150.patch new file mode 100644 index 0000000..40829fd --- /dev/null +++ b/glibc-upstream-2.34-150.patch @@ -0,0 +1,454 @@ +commit 29496b3103ff13aa3c1d8b62552a98f39da0fe59 +Author: Adhemerval Zanella +Date: Wed Jun 30 10:24:09 2021 -0300 + + elf: Avoid unnecessary slowdown from profiling with audit (BZ#15533) + + The rtld-audit interfaces introduces a slowdown due to enabling + profiling instrumentation (as if LD_AUDIT implied LD_PROFILE). + However, instrumenting is only necessary if one of audit libraries + provides PLT callbacks (la_pltenter or la_pltexit symbols). Otherwise, + the slowdown can be avoided. + + The following patch adjusts the logic that enables profiling to iterate + over all audit modules and check if any of those provides a PLT hook. + To keep la_symbind to work even without PLT callbacks, _dl_fixup now + calls the audit callback if the modules implements it. + + Co-authored-by: Alexander Monakov + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit 063f9ba220f434c7f30dd65c4cff17c0c458a7cf) + + Resolved conflicts: + NEWS + elf/Makefile + +diff --git a/elf/Makefile b/elf/Makefile +index 85165c0591412a45..eab9d46b6165e6be 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -365,6 +365,7 @@ tests += \ + tst-audit16 \ + tst-audit17 \ + tst-audit18 \ ++ tst-audit19b \ + tst-auditmany \ + tst-auxobj \ + tst-auxobj-dlopen \ +@@ -454,6 +455,7 @@ tests-internal += \ + neededtest2 \ + neededtest3 \ + neededtest4 \ ++ tst-audit19a \ + tst-create_format1 \ + tst-dl-hwcaps_split \ + tst-dlmopen2 \ +@@ -626,6 +628,7 @@ modules-names = \ + tst-audit12mod3 \ + tst-audit13mod1 \ + tst-audit18mod \ ++ tst-audit19bmod \ + tst-auditlogmod-1 \ + tst-auditlogmod-2 \ + tst-auditlogmod-3 \ +@@ -644,6 +647,8 @@ modules-names = \ + tst-auditmod11 \ + tst-auditmod12 \ + tst-auditmod18 \ ++ tst-auditmod19a \ ++ tst-auditmod19b \ + tst-auxvalmod \ + tst-big-note-lib \ + tst-deep1mod1 \ +@@ -2007,6 +2012,13 @@ $(objpfx)tst-audit18.out: $(objpfx)tst-auditmod18.so \ + $(objpfx)tst-audit18mod.so + tst-audit18-ARGS = -- $(host-test-program-cmd) + ++$(objpfx)tst-audit19a.out: $(objpfx)tst-auditmod19a.so ++tst-audit19a-ENV = LD_AUDIT=$(objpfx)tst-auditmod19a.so ++ ++$(objpfx)tst-audit19b.out: $(objpfx)tst-auditmod19b.so ++$(objpfx)tst-audit19b: $(objpfx)tst-audit19bmod.so ++tst-audit19b-ARGS = -- $(host-test-program-cmd) ++ + # tst-sonamemove links against an older implementation of the library. + LDFLAGS-tst-sonamemove-linkmod1.so = \ + -Wl,--version-script=tst-sonamemove-linkmod1.map \ +diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c +index 3447de7f3536cd70..5b69321bda1f2b27 100644 +--- a/elf/dl-reloc.c ++++ b/elf/dl-reloc.c +@@ -205,12 +205,28 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + int skip_ifunc = reloc_mode & __RTLD_NOIFUNC; + + #ifdef SHARED ++ bool consider_symbind = false; + /* If we are auditing, install the same handlers we need for profiling. */ + if ((reloc_mode & __RTLD_AUDIT) == 0) +- consider_profiling |= GLRO(dl_audit) != NULL; ++ { ++ struct audit_ifaces *afct = GLRO(dl_audit); ++ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) ++ { ++ /* Profiling is needed only if PLT hooks are provided. */ ++ if (afct->ARCH_LA_PLTENTER != NULL ++ || afct->ARCH_LA_PLTEXIT != NULL) ++ consider_profiling = 1; ++ if (afct->symbind != NULL) ++ consider_symbind = true; ++ ++ afct = afct->next; ++ } ++ } + #elif defined PROF + /* Never use dynamic linker profiling for gprof profiling code. */ + # define consider_profiling 0 ++#else ++# define consider_symbind 0 + #endif + + if (l->l_relocated) +@@ -272,7 +288,7 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + ELF_DYNAMIC_RELOCATE (l, scope, lazy, consider_profiling, skip_ifunc); + + #ifndef PROF +- if (__glibc_unlikely (consider_profiling) ++ if ((consider_profiling || consider_symbind) + && l->l_info[DT_PLTRELSZ] != NULL) + { + /* Allocate the array which will contain the already found +diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c +index e42f6e8b8dfca08e..77a5cccdcbcb9293 100644 +--- a/elf/dl-runtime.c ++++ b/elf/dl-runtime.c +@@ -124,6 +124,37 @@ _dl_fixup ( + && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0)) + value = elf_ifunc_invoke (DL_FIXUP_VALUE_ADDR (value)); + ++#ifdef SHARED ++ /* Auditing checkpoint: we have a new binding. Provide the auditing ++ libraries the possibility to change the value and tell us whether further ++ auditing is wanted. ++ The l_reloc_result is only allocated if there is an audit module which ++ provides a la_symbind. */ ++ if (l->l_reloc_result != NULL) ++ { ++ /* This is the address in the array where we store the result of previous ++ relocations. */ ++ struct reloc_result *reloc_result ++ = &l->l_reloc_result[reloc_index (pltgot, reloc_arg, sizeof (PLTREL))]; ++ unsigned int init = atomic_load_acquire (&reloc_result->init); ++ if (init == 0) ++ { ++ _dl_audit_symbind (l, reloc_result, sym, &value, result); ++ ++ /* Store the result for later runs. */ ++ if (__glibc_likely (! GLRO(dl_bind_not))) ++ { ++ reloc_result->addr = value; ++ /* Guarantee all previous writes complete before init is ++ updated. See CONCURRENCY NOTES below. */ ++ atomic_store_release (&reloc_result->init, 1); ++ } ++ } ++ else ++ value = reloc_result->addr; ++ } ++#endif ++ + /* Finally, fix up the plt itself. */ + if (__glibc_unlikely (GLRO(dl_bind_not))) + return value; +diff --git a/elf/rtld.c b/elf/rtld.c +index b6bb46ca97b7972f..f632a767d7a269ef 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1016,13 +1016,7 @@ ERROR: audit interface '%s' requires version %d (maximum supported version %d); + "la_objsearch\0" + "la_objopen\0" + "la_preinit\0" +-#if __ELF_NATIVE_CLASS == 32 +- "la_symbind32\0" +-#elif __ELF_NATIVE_CLASS == 64 +- "la_symbind64\0" +-#else +-# error "__ELF_NATIVE_CLASS must be defined" +-#endif ++ LA_SYMBIND "\0" + #define STRING(s) __STRING (s) + "la_" STRING (ARCH_LA_PLTENTER) "\0" + "la_" STRING (ARCH_LA_PLTEXIT) "\0" +diff --git a/elf/tst-audit19a.c b/elf/tst-audit19a.c +new file mode 100644 +index 0000000000000000..035cde9351c2711b +--- /dev/null ++++ b/elf/tst-audit19a.c +@@ -0,0 +1,38 @@ ++/* Check if DT_AUDIT a module without la_plt{enter,exit} symbols does not incur ++ in profiling (BZ#15533). ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ void *h = xdlopen ("tst-auditmod19a.so", RTLD_NOW); ++ ++ struct link_map *lmap; ++ TEST_VERIFY_EXIT (dlinfo (h, RTLD_DI_LINKMAP, &lmap) == 0); ++ ++ /* The internal array is only allocated if profiling is enabled. */ ++ TEST_VERIFY (lmap->l_reloc_result == NULL); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-audit19b.c b/elf/tst-audit19b.c +new file mode 100644 +index 0000000000000000..da015734f24e0d79 +--- /dev/null ++++ b/elf/tst-audit19b.c +@@ -0,0 +1,94 @@ ++/* Check if DT_AUDIT a module with la_plt{enter,exit} call la_symbind ++ for lazy resolution. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int restart; ++#define CMDLINE_OPTIONS \ ++ { "restart", no_argument, &restart, 1 }, ++ ++int tst_audit18bmod1_func (void); ++ ++static int ++handle_restart (void) ++{ ++ TEST_COMPARE (tst_audit18bmod1_func (), 10); ++ return 0; ++} ++ ++static inline bool ++startswith (const char *str, const char *pre) ++{ ++ size_t lenpre = strlen (pre); ++ size_t lenstr = strlen (str); ++ return lenstr < lenpre ? false : memcmp (pre, str, lenpre) == 0; ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ /* We must have either: ++ - One our fource parameters left if called initially: ++ + path to ld.so optional ++ + "--library-path" optional ++ + the library path optional ++ + the application name */ ++ ++ if (restart) ++ return handle_restart (); ++ ++ char *spargv[9]; ++ int i = 0; ++ for (; i < argc - 1; i++) ++ spargv[i] = argv[i + 1]; ++ spargv[i++] = (char *) "--direct"; ++ spargv[i++] = (char *) "--restart"; ++ spargv[i] = NULL; ++ ++ setenv ("LD_AUDIT", "tst-auditmod18b.so", 0); ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit18b", 0, sc_allow_stderr); ++ ++ bool find_symbind = false; ++ ++ FILE *out = fmemopen (result.err.buffer, result.err.length, "r"); ++ TEST_VERIFY (out != NULL); ++ char *buffer = NULL; ++ size_t buffer_length = 0; ++ while (xgetline (&buffer, &buffer_length, out)) ++ if (startswith (buffer, "la_symbind: tst_audit18bmod1_func") == 0) ++ find_symbind = true; ++ ++ TEST_COMPARE (find_symbind, true); ++ ++ free (buffer); ++ xfclose (out); ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION_ARGV do_test ++#include +diff --git a/elf/tst-audit19bmod.c b/elf/tst-audit19bmod.c +new file mode 100644 +index 0000000000000000..9ffdcd8f3ffbc38e +--- /dev/null ++++ b/elf/tst-audit19bmod.c +@@ -0,0 +1,23 @@ ++/* Extra module for tst-audit18b. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++int ++tst_audit18bmod1_func (void) ++{ ++ return 10; ++} +diff --git a/elf/tst-auditmod19a.c b/elf/tst-auditmod19a.c +new file mode 100644 +index 0000000000000000..f58204099457743d +--- /dev/null ++++ b/elf/tst-auditmod19a.c +@@ -0,0 +1,25 @@ ++/* Audit module for tst-audit18a. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} +diff --git a/elf/tst-auditmod19b.c b/elf/tst-auditmod19b.c +new file mode 100644 +index 0000000000000000..e2248b2a75946746 +--- /dev/null ++++ b/elf/tst-auditmod19b.c +@@ -0,0 +1,46 @@ ++/* Audit module for tst-audit18b. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ return LA_FLG_BINDTO | LA_FLG_BINDFROM; ++} ++ ++uintptr_t ++#if __ELF_NATIVE_CLASS == 32 ++la_symbind32 (Elf32_Sym *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, unsigned int *flags, const char *symname) ++#else ++la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, unsigned int *flags, const char *symname) ++#endif ++{ ++ fprintf (stderr, "la_symbind: %s\n", symname); ++ return sym->st_value; ++} +diff --git a/include/link.h b/include/link.h +index 4dcf01d8aea90bc2..b3f160c278222b3c 100644 +--- a/include/link.h ++++ b/include/link.h +@@ -363,8 +363,10 @@ struct auditstate + + #if __ELF_NATIVE_CLASS == 32 + # define symbind symbind32 ++# define LA_SYMBIND "la_symbind32" + #elif __ELF_NATIVE_CLASS == 64 + # define symbind symbind64 ++# define LA_SYMBIND "la_symbind64" + #else + # error "__ELF_NATIVE_CLASS must be defined" + #endif diff --git a/glibc-upstream-2.34-151.patch b/glibc-upstream-2.34-151.patch new file mode 100644 index 0000000..21e804b --- /dev/null +++ b/glibc-upstream-2.34-151.patch @@ -0,0 +1,294 @@ +commit 02c6a3d35316d360ae08623f617b1873d2f6159a +Author: Adhemerval Zanella +Date: Wed Jun 30 15:51:31 2021 -0300 + + elf: Add audit tests for modules with TLSDESC + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit d1b38173c9255b1a4ae00018ad9b35404a7c74d0) + +diff --git a/elf/Makefile b/elf/Makefile +index eab9d46b6165e6be..29f545d2272bf6e2 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -807,6 +807,22 @@ modules-names += tst-gnu2-tls1mod + $(objpfx)tst-gnu2-tls1: $(objpfx)tst-gnu2-tls1mod.so + tst-gnu2-tls1mod.so-no-z-defs = yes + CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=gnu2 ++ ++tests += tst-audit-tlsdesc tst-audit-tlsdesc-dlopen ++modules-names += tst-audit-tlsdesc-mod1 tst-audit-tlsdesc-mod2 tst-auditmod-tlsdesc ++$(objpfx)tst-audit-tlsdesc: $(objpfx)tst-audit-tlsdesc-mod1.so \ ++ $(objpfx)tst-audit-tlsdesc-mod2.so \ ++ $(shared-thread-library) ++CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=gnu2 ++CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=gnu2 ++$(objpfx)tst-audit-tlsdesc-dlopen: $(shared-thread-library) ++$(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-audit-tlsdesc-mod1.so \ ++ $(objpfx)tst-audit-tlsdesc-mod2.so ++$(objpfx)tst-audit-tlsdesc-mod1.so: $(objpfx)tst-audit-tlsdesc-mod2.so ++$(objpfx)tst-audit-tlsdesc.out: $(objpfx)tst-auditmod-tlsdesc.so ++tst-audit-tlsdesc-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so ++$(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-auditmod-tlsdesc.so ++tst-audit-tlsdesc-dlopen-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so + endif + ifeq (yes,$(have-protected-data)) + modules-names += tst-protected1moda tst-protected1modb +diff --git a/elf/tst-audit-tlsdesc-dlopen.c b/elf/tst-audit-tlsdesc-dlopen.c +new file mode 100644 +index 0000000000000000..9c16bb087aca1b77 +--- /dev/null ++++ b/elf/tst-audit-tlsdesc-dlopen.c +@@ -0,0 +1,67 @@ ++/* DT_AUDIT with modules with TLSDESC. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static void * ++thr_func (void *mod) ++{ ++ int* (*get_global1)(void) = xdlsym (mod, "get_global1"); ++ int* (*get_global2)(void) = xdlsym (mod, "get_global2"); ++ void (*set_global2)(int) = xdlsym (mod, "set_global2"); ++ int* (*get_local1)(void) = xdlsym (mod, "get_local1"); ++ int* (*get_local2)(void) = xdlsym (mod, "get_local2"); ++ ++ int *global1 = get_global1 (); ++ TEST_COMPARE (*global1, 0); ++ ++*global1; ++ ++ int *global2 = get_global2 (); ++ TEST_COMPARE (*global2, 0); ++ ++*global2; ++ TEST_COMPARE (*global2, 1); ++ ++ set_global2 (10); ++ TEST_COMPARE (*global2, 10); ++ ++ int *local1 = get_local1 (); ++ TEST_COMPARE (*local1, 0); ++ ++*local1; ++ ++ int *local2 = get_local2 (); ++ TEST_COMPARE (*local2, 0); ++ ++*local2; ++ ++ return 0; ++} ++ ++static int ++do_test (void) ++{ ++ void *mod = xdlopen ("tst-audit-tlsdesc-mod1.so", RTLD_LAZY); ++ ++ pthread_t thr = xpthread_create (NULL, thr_func, mod); ++ void *r = xpthread_join (thr); ++ TEST_VERIFY (r == NULL); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-audit-tlsdesc-mod1.c b/elf/tst-audit-tlsdesc-mod1.c +new file mode 100644 +index 0000000000000000..61c7dd99a2fb5e28 +--- /dev/null ++++ b/elf/tst-audit-tlsdesc-mod1.c +@@ -0,0 +1,41 @@ ++/* DT_AUDIT with modules with TLSDESC. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++__thread int global1; ++ ++int * ++get_global1 (void) ++{ ++ return &global1; ++} ++ ++static __thread int local1; ++ ++void * ++get_local1 (void) ++{ ++ return &local1; ++} ++ ++extern __thread int global2; ++ ++void ++set_global2 (int v) ++{ ++ global2 = v; ++} +diff --git a/elf/tst-audit-tlsdesc-mod2.c b/elf/tst-audit-tlsdesc-mod2.c +new file mode 100644 +index 0000000000000000..28aef635f688ee03 +--- /dev/null ++++ b/elf/tst-audit-tlsdesc-mod2.c +@@ -0,0 +1,33 @@ ++/* DT_AUDIT with modules with TLSDESC. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++__thread int global2; ++ ++int * ++get_global2 (void) ++{ ++ return &global2; ++} ++ ++static __thread int local2; ++ ++void * ++get_local2 (void) ++{ ++ return &local2; ++} +diff --git a/elf/tst-audit-tlsdesc.c b/elf/tst-audit-tlsdesc.c +new file mode 100644 +index 0000000000000000..3c8be81c95528f47 +--- /dev/null ++++ b/elf/tst-audit-tlsdesc.c +@@ -0,0 +1,60 @@ ++/* DT_AUDIT with modules with TLSDESC. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++extern __thread int global1; ++extern __thread int global2; ++void *get_local1 (void); ++void set_global2 (int v); ++void *get_local2 (void); ++ ++static void * ++thr_func (void *clousure) ++{ ++ TEST_COMPARE (global1, 0); ++ ++global1; ++ TEST_COMPARE (global2, 0); ++ ++global2; ++ TEST_COMPARE (global2, 1); ++ ++ set_global2 (10); ++ TEST_COMPARE (global2, 10); ++ ++ int *local1 = get_local1 (); ++ TEST_COMPARE (*local1, 0); ++ ++*local1; ++ ++ int *local2 = get_local2 (); ++ TEST_COMPARE (*local2, 0); ++ ++*local2; ++ ++ return 0; ++} ++ ++static int ++do_test (void) ++{ ++ pthread_t thr = xpthread_create (NULL, thr_func, NULL); ++ void *r = xpthread_join (thr); ++ TEST_VERIFY (r == NULL); ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-auditmod-tlsdesc.c b/elf/tst-auditmod-tlsdesc.c +new file mode 100644 +index 0000000000000000..e4b835d1f1fb6f73 +--- /dev/null ++++ b/elf/tst-auditmod-tlsdesc.c +@@ -0,0 +1,25 @@ ++/* DT_AUDIT with modules with TLSDESC. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} diff --git a/glibc-upstream-2.34-152.patch b/glibc-upstream-2.34-152.patch new file mode 100644 index 0000000..e2c8172 --- /dev/null +++ b/glibc-upstream-2.34-152.patch @@ -0,0 +1,314 @@ +commit d1b9bee29a1c4e0b80db53f228e22550c3604894 +Author: Adhemerval Zanella +Date: Mon Jul 19 18:42:26 2021 -0300 + + elf: Issue audit la_objopen for vDSO + + The vDSO is is listed in the link_map chain, but is never the subject of + an la_objopen call. A new internal flag __RTLD_VDSO is added that + acts as __RTLD_OPENEXEC to allocate the required 'struct auditstate' + extra space for the 'struct link_map'. + + The return value from the callback is currently ignored, since there + is no PLT call involved by glibc when using the vDSO, neither the vDSO + are exported directly. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit f0e23d34a7bdf6b90fba954ee741419171ac41b2) + + Resolved conflicts: + elf/Makefile + +diff --git a/elf/Makefile b/elf/Makefile +index 29f545d2272bf6e2..465442bf59fa9794 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -366,6 +366,7 @@ tests += \ + tst-audit17 \ + tst-audit18 \ + tst-audit19b \ ++ tst-audit22 \ + tst-auditmany \ + tst-auxobj \ + tst-auxobj-dlopen \ +@@ -649,6 +650,7 @@ modules-names = \ + tst-auditmod18 \ + tst-auditmod19a \ + tst-auditmod19b \ ++ tst-auditmod22 \ + tst-auxvalmod \ + tst-big-note-lib \ + tst-deep1mod1 \ +@@ -2035,6 +2037,9 @@ $(objpfx)tst-audit19b.out: $(objpfx)tst-auditmod19b.so + $(objpfx)tst-audit19b: $(objpfx)tst-audit19bmod.so + tst-audit19b-ARGS = -- $(host-test-program-cmd) + ++$(objpfx)tst-audit22.out: $(objpfx)tst-auditmod22.so ++tst-audit22-ARGS = -- $(host-test-program-cmd) ++ + # tst-sonamemove links against an older implementation of the library. + LDFLAGS-tst-sonamemove-linkmod1.so = \ + -Wl,--version-script=tst-sonamemove-linkmod1.map \ +diff --git a/elf/dl-object.c b/elf/dl-object.c +index 1875599eb274dc35..dee49a32d4fdf07d 100644 +--- a/elf/dl-object.c ++++ b/elf/dl-object.c +@@ -59,16 +59,19 @@ _dl_new_object (char *realname, const char *libname, int type, + { + #ifdef SHARED + unsigned int naudit; +- if (__glibc_unlikely ((mode & __RTLD_OPENEXEC) != 0)) ++ if (__glibc_unlikely ((mode & (__RTLD_OPENEXEC | __RTLD_VDSO)) != 0)) + { +- assert (type == lt_executable); +- assert (nsid == LM_ID_BASE); ++ if (mode & __RTLD_OPENEXEC) ++ { ++ assert (type == lt_executable); ++ assert (nsid == LM_ID_BASE); + +- /* Ignore the specified libname for the main executable. It is +- only known with an explicit loader invocation. */ +- libname = ""; ++ /* Ignore the specified libname for the main executable. It is ++ only known with an explicit loader invocation. */ ++ libname = ""; ++ } + +- /* We create the map for the executable before we know whether ++ /* We create the map for the executable and vDSO before we know whether + we have auditing libraries and if yes, how many. Assume the + worst. */ + naudit = DL_NNS; +diff --git a/elf/rtld.c b/elf/rtld.c +index f632a767d7a269ef..b089e5cf4740443e 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1922,6 +1922,12 @@ dl_main (const ElfW(Phdr) *phdr, + assert (i == npreloads); + } + ++#ifdef NEED_DL_SYSINFO_DSO ++ /* Now that the audit modules are opened, call la_objopen for the vDSO. */ ++ if (GLRO(dl_sysinfo_map) != NULL) ++ _dl_audit_objopen (GLRO(dl_sysinfo_map), LM_ID_BASE); ++#endif ++ + /* Load all the libraries specified by DT_NEEDED entries. If LD_PRELOAD + specified some libraries to load, these are inserted before the actual + dependencies in the executable's searchlist for symbol resolution. */ +diff --git a/elf/setup-vdso.h b/elf/setup-vdso.h +index 3f20578046de76ed..2b013d974a377a83 100644 +--- a/elf/setup-vdso.h ++++ b/elf/setup-vdso.h +@@ -30,7 +30,7 @@ setup_vdso (struct link_map *main_map __attribute__ ((unused)), + We just want our data structures to describe it as if we had just + mapped and relocated it normally. */ + struct link_map *l = _dl_new_object ((char *) "", "", lt_library, NULL, +- 0, LM_ID_BASE); ++ __RTLD_VDSO, LM_ID_BASE); + if (__glibc_likely (l != NULL)) + { + l->l_phdr = ((const void *) GLRO(dl_sysinfo_dso) +diff --git a/elf/tst-audit22.c b/elf/tst-audit22.c +new file mode 100644 +index 0000000000000000..18fd22a760ddc3d8 +--- /dev/null ++++ b/elf/tst-audit22.c +@@ -0,0 +1,124 @@ ++/* Check DTAUDIT and vDSO interaction. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int restart; ++#define CMDLINE_OPTIONS \ ++ { "restart", no_argument, &restart, 1 }, ++ ++static uintptr_t vdso_addr; ++ ++static int ++handle_restart (void) ++{ ++ fprintf (stderr, "vdso: %p\n", (void*) vdso_addr); ++ return 0; ++} ++ ++static uintptr_t ++parse_address (const char *str) ++{ ++ void *r; ++ TEST_COMPARE (sscanf (str, "%p\n", &r), 1); ++ return (uintptr_t) r; ++} ++ ++static inline bool ++startswith (const char *str, const char *pre) ++{ ++ size_t lenpre = strlen (pre); ++ size_t lenstr = strlen (str); ++ return lenstr >= lenpre && memcmp (pre, str, lenpre) == 0; ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ vdso_addr = getauxval (AT_SYSINFO_EHDR); ++ if (vdso_addr == 0) ++ FAIL_UNSUPPORTED ("getauxval (AT_SYSINFO_EHDR) returned 0"); ++ ++ /* We must have either: ++ - One our fource parameters left if called initially: ++ + path to ld.so optional ++ + "--library-path" optional ++ + the library path optional ++ + the application name */ ++ if (restart) ++ return handle_restart (); ++ ++ char *spargv[9]; ++ int i = 0; ++ for (; i < argc - 1; i++) ++ spargv[i] = argv[i + 1]; ++ spargv[i++] = (char *) "--direct"; ++ spargv[i++] = (char *) "--restart"; ++ spargv[i] = NULL; ++ ++ setenv ("LD_AUDIT", "tst-auditmod22.so", 0); ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit22", 0, sc_allow_stderr); ++ ++ /* The respawned process should always print the vDSO address (otherwise it ++ will fails as unsupported). However, on some architectures the audit ++ module might see the vDSO with l_addr being 0, meaning a fixed mapping ++ (linux-gate.so). In this case we don't check its value against ++ AT_SYSINFO_EHDR one. */ ++ uintptr_t vdso_process = 0; ++ bool vdso_audit_found = false; ++ uintptr_t vdso_audit = 0; ++ ++ FILE *out = fmemopen (result.err.buffer, result.err.length, "r"); ++ TEST_VERIFY (out != NULL); ++ char *buffer = NULL; ++ size_t buffer_length = 0; ++ while (xgetline (&buffer, &buffer_length, out)) ++ { ++ if (startswith (buffer, "vdso: ")) ++ vdso_process = parse_address (buffer + strlen ("vdso: ")); ++ else if (startswith (buffer, "vdso found: ")) ++ { ++ vdso_audit = parse_address (buffer + strlen ("vdso found: ")); ++ vdso_audit_found = true; ++ } ++ } ++ ++ TEST_COMPARE (vdso_audit_found, true); ++ if (vdso_audit != 0) ++ TEST_COMPARE (vdso_process, vdso_audit); ++ ++ free (buffer); ++ xfclose (out); ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION_ARGV do_test ++#include +diff --git a/elf/tst-auditmod22.c b/elf/tst-auditmod22.c +new file mode 100644 +index 0000000000000000..8e05ce8cbb215dd5 +--- /dev/null ++++ b/elf/tst-auditmod22.c +@@ -0,0 +1,51 @@ ++/* Check DTAUDIT and vDSO interaction. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static inline bool ++startswith (const char *str, const char *pre) ++{ ++ size_t lenpre = strlen (pre); ++ size_t lenstr = strlen (str); ++ return lenstr < lenpre ? false : memcmp (pre, str, lenpre) == 0; ++} ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ /* The linux-gate.so is placed at a fixed address, thus l_addr being 0, ++ and it might be the value reported as the AT_SYSINFO_EHDR. */ ++ if (map->l_addr == 0 && startswith (map->l_name, "linux-gate.so")) ++ fprintf (stderr, "vdso found: %p\n", NULL); ++ else if (map->l_addr == getauxval (AT_SYSINFO_EHDR)) ++ fprintf (stderr, "vdso found: %p\n", (void*) map->l_addr); ++ ++ return 0; ++} +diff --git a/include/dlfcn.h b/include/dlfcn.h +index a4c283728f94deb4..e73294b0af587913 100644 +--- a/include/dlfcn.h ++++ b/include/dlfcn.h +@@ -12,6 +12,8 @@ + #define __RTLD_AUDIT 0x08000000 + #define __RTLD_SECURE 0x04000000 /* Apply additional security checks. */ + #define __RTLD_NOIFUNC 0x02000000 /* Suppress calling ifunc functions. */ ++#define __RTLD_VDSO 0x01000000 /* Tell _dl_new_object the object is ++ system-loaded. */ + + #define __LM_ID_CALLER -2 + diff --git a/glibc-upstream-2.34-153.patch b/glibc-upstream-2.34-153.patch new file mode 100644 index 0000000..2dba7b1 --- /dev/null +++ b/glibc-upstream-2.34-153.patch @@ -0,0 +1,167 @@ +commit 2255621f0e2cd07f7a6147928ce644e13526ffb6 +Author: Adhemerval Zanella +Date: Wed Jun 30 17:33:57 2021 -0300 + + elf: Do not fail for failed dlmopen on audit modules (BZ #28061) + + The dl_main sets the LM_ID_BASE to RT_ADD just before starting to + add load new shared objects. The state is set to RT_CONSISTENT just + after all objects are loaded. + + However if a audit modules tries to dlmopen an inexistent module, + the _dl_open will assert that the namespace is in an inconsistent + state. + + This is different than dlopen, since first it will not use + LM_ID_BASE and second _dl_map_object_from_fd is the sole responsible + to set and reset the r_state value. + + So the assert on _dl_open can not really be seen if the state is + consistent, since _dt_main resets it. This patch removes the assert. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Florian Weimer + (cherry picked from commit 484e672ddabe0a919a692520e6ac8f2580866235) + + Resolved conflicts: + elf/Makefile + elf/dl-open.c + +diff --git a/elf/Makefile b/elf/Makefile +index 465442bf59fa9794..91b2269257523a64 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -366,6 +366,7 @@ tests += \ + tst-audit17 \ + tst-audit18 \ + tst-audit19b \ ++ tst-audit20 \ + tst-audit22 \ + tst-auditmany \ + tst-auxobj \ +@@ -650,6 +651,7 @@ modules-names = \ + tst-auditmod18 \ + tst-auditmod19a \ + tst-auditmod19b \ ++ tst-auditmod20 \ + tst-auditmod22 \ + tst-auxvalmod \ + tst-big-note-lib \ +@@ -2037,6 +2039,9 @@ $(objpfx)tst-audit19b.out: $(objpfx)tst-auditmod19b.so + $(objpfx)tst-audit19b: $(objpfx)tst-audit19bmod.so + tst-audit19b-ARGS = -- $(host-test-program-cmd) + ++$(objpfx)tst-audit20.out: $(objpfx)tst-auditmod20.so ++tst-audit20-ENV = LD_AUDIT=$(objpfx)tst-auditmod20.so ++ + $(objpfx)tst-audit22.out: $(objpfx)tst-auditmod22.so + tst-audit22-ARGS = -- $(host-test-program-cmd) + +diff --git a/elf/dl-open.c b/elf/dl-open.c +index 3f01aa480730da13..bc6872632880634e 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -914,8 +914,6 @@ no more namespaces available for dlmopen()")); + the flag here. */ + } + +- assert (_dl_debug_initialize (0, args.nsid)->r_state == RT_CONSISTENT); +- + /* Release the lock. */ + __rtld_lock_unlock_recursive (GL(dl_load_lock)); + +diff --git a/elf/tst-audit20.c b/elf/tst-audit20.c +new file mode 100644 +index 0000000000000000..6f39ccee865b012b +--- /dev/null ++++ b/elf/tst-audit20.c +@@ -0,0 +1,25 @@ ++/* Check dlopen failure on audit modules. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++static int ++do_test (void) ++{ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-auditmod20.c b/elf/tst-auditmod20.c +new file mode 100644 +index 0000000000000000..c57e50ee4e88dd6b +--- /dev/null ++++ b/elf/tst-auditmod20.c +@@ -0,0 +1,57 @@ ++/* Check dlopen failure on audit modules. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++unsigned int ++la_version (unsigned int v) ++{ ++ return LAV_CURRENT; ++} ++ ++static void ++check (void) ++{ ++ { ++ void *mod = dlopen ("nonexistent.so", RTLD_NOW); ++ if (mod != NULL) ++ abort (); ++ } ++ ++ { ++ void *mod = dlmopen (LM_ID_BASE, "nonexistent.so", RTLD_NOW); ++ if (mod != NULL) ++ abort (); ++ } ++} ++ ++void ++la_activity (uintptr_t *cookie, unsigned int flag) ++{ ++ if (flag != LA_ACT_CONSISTENT) ++ return; ++ check (); ++} ++ ++void ++la_preinit (uintptr_t *cookie) ++{ ++ check (); ++} diff --git a/glibc-upstream-2.34-154.patch b/glibc-upstream-2.34-154.patch new file mode 100644 index 0000000..15b64cf --- /dev/null +++ b/glibc-upstream-2.34-154.patch @@ -0,0 +1,434 @@ +commit 98047ba95caf9ed596908ca73a22070c5e27597b +Author: Adhemerval Zanella +Date: Mon Jan 24 10:46:15 2022 -0300 + + elf: Add la_activity during application exit + + la_activity is not called during application exit, even though + la_objclose is. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + (cherry picked from commit 5fa11a2bc94c912c3b25860065086902674537ba) + +diff --git a/elf/Makefile b/elf/Makefile +index 91b2269257523a64..407aaeaeb8c84020 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -368,6 +368,7 @@ tests += \ + tst-audit19b \ + tst-audit20 \ + tst-audit22 \ ++ tst-audit23 \ + tst-auditmany \ + tst-auxobj \ + tst-auxobj-dlopen \ +@@ -631,6 +632,7 @@ modules-names = \ + tst-audit13mod1 \ + tst-audit18mod \ + tst-audit19bmod \ ++ tst-audit23mod \ + tst-auditlogmod-1 \ + tst-auditlogmod-2 \ + tst-auditlogmod-3 \ +@@ -653,6 +655,7 @@ modules-names = \ + tst-auditmod19b \ + tst-auditmod20 \ + tst-auditmod22 \ ++ tst-auditmod23 \ + tst-auxvalmod \ + tst-big-note-lib \ + tst-deep1mod1 \ +@@ -2045,6 +2048,10 @@ tst-audit20-ENV = LD_AUDIT=$(objpfx)tst-auditmod20.so + $(objpfx)tst-audit22.out: $(objpfx)tst-auditmod22.so + tst-audit22-ARGS = -- $(host-test-program-cmd) + ++$(objpfx)tst-audit23.out: $(objpfx)tst-auditmod23.so \ ++ $(objpfx)tst-audit23mod.so ++tst-audit23-ARGS = -- $(host-test-program-cmd) ++ + # tst-sonamemove links against an older implementation of the library. + LDFLAGS-tst-sonamemove-linkmod1.so = \ + -Wl,--version-script=tst-sonamemove-linkmod1.map \ +diff --git a/elf/dl-fini.c b/elf/dl-fini.c +index b789cfb9f2ac6c85..fa876da0ffa1cf97 100644 +--- a/elf/dl-fini.c ++++ b/elf/dl-fini.c +@@ -64,6 +64,10 @@ _dl_fini (void) + __rtld_lock_unlock_recursive (GL(dl_load_lock)); + else + { ++#ifdef SHARED ++ _dl_audit_activity_nsid (ns, LA_ACT_DELETE); ++#endif ++ + /* Now we can allocate an array to hold all the pointers and + copy the pointers in. */ + struct link_map *maps[nloaded]; +@@ -153,6 +157,10 @@ _dl_fini (void) + /* Correct the previous increment. */ + --l->l_direct_opencount; + } ++ ++#ifdef SHARED ++ _dl_audit_activity_nsid (ns, LA_ACT_CONSISTENT); ++#endif + } + } + +diff --git a/elf/tst-audit23.c b/elf/tst-audit23.c +new file mode 100644 +index 0000000000000000..4904cf1340a97ee1 +--- /dev/null ++++ b/elf/tst-audit23.c +@@ -0,0 +1,239 @@ ++/* Check for expected la_objopen and la_objeclose for all objects. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int restart; ++#define CMDLINE_OPTIONS \ ++ { "restart", no_argument, &restart, 1 }, ++ ++static int ++handle_restart (void) ++{ ++ xdlopen ("tst-audit23mod.so", RTLD_NOW); ++ xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW); ++ ++ return 0; ++} ++ ++static inline bool ++startswith (const char *str, const char *pre) ++{ ++ size_t lenpre = strlen (pre); ++ size_t lenstr = strlen (str); ++ return lenstr >= lenpre && memcmp (pre, str, lenpre) == 0; ++} ++ ++static inline bool ++is_vdso (const char *str) ++{ ++ return startswith (str, "linux-gate") ++ || startswith (str, "linux-vdso"); ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ /* We must have either: ++ - One or four parameters left if called initially: ++ + path to ld.so optional ++ + "--library-path" optional ++ + the library path optional ++ + the application name */ ++ if (restart) ++ return handle_restart (); ++ ++ char *spargv[9]; ++ TEST_VERIFY_EXIT (((argc - 1) + 3) < array_length (spargv)); ++ int i = 0; ++ for (; i < argc - 1; i++) ++ spargv[i] = argv[i + 1]; ++ spargv[i++] = (char *) "--direct"; ++ spargv[i++] = (char *) "--restart"; ++ spargv[i] = NULL; ++ ++ setenv ("LD_AUDIT", "tst-auditmod23.so", 0); ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit22", 0, sc_allow_stderr); ++ ++ /* The expected la_objopen/la_objclose: ++ 1. executable ++ 2. loader ++ 3. libc.so ++ 4. tst-audit23mod.so ++ 5. libc.so (LM_ID_NEWLM). ++ 6. vdso (optional and ignored). */ ++ enum { max_objs = 6 }; ++ struct la_obj_t ++ { ++ char *lname; ++ uintptr_t laddr; ++ Lmid_t lmid; ++ bool closed; ++ } objs[max_objs] = { [0 ... max_objs-1] = { .closed = false } }; ++ size_t nobjs = 0; ++ ++ /* The expected namespaces are one for the audit module, one for the ++ application, and another for the dlmopen on handle_restart. */ ++ enum { max_ns = 3 }; ++ uintptr_t acts[max_ns] = { 0 }; ++ size_t nacts = 0; ++ int last_act = -1; ++ uintptr_t last_act_cookie = -1; ++ bool seen_first_objclose = false; ++ ++ FILE *out = fmemopen (result.err.buffer, result.err.length, "r"); ++ TEST_VERIFY (out != NULL); ++ char *buffer = NULL; ++ size_t buffer_length = 0; ++ while (xgetline (&buffer, &buffer_length, out)) ++ { ++ if (startswith (buffer, "la_activity: ")) ++ { ++ uintptr_t cookie; ++ int this_act; ++ int r = sscanf (buffer, "la_activity: %d %"SCNxPTR"", &this_act, ++ &cookie); ++ TEST_COMPARE (r, 2); ++ ++ /* The cookie identifies the object at the head of the link map, ++ so we only add a new namespace if it changes from the previous ++ one. This works since dlmopen is the last in the test body. */ ++ if (cookie != last_act_cookie && last_act_cookie != -1) ++ TEST_COMPARE (last_act, LA_ACT_CONSISTENT); ++ ++ if (this_act == LA_ACT_ADD && acts[nacts] != cookie) ++ { ++ acts[nacts++] = cookie; ++ last_act_cookie = cookie; ++ } ++ /* The LA_ACT_DELETE is called in the reverse order of LA_ACT_ADD ++ at program termination (if the tests adds a dlclose or a library ++ with extra dependencies this will need to be adapted). */ ++ else if (this_act == LA_ACT_DELETE) ++ { ++ last_act_cookie = acts[--nacts]; ++ TEST_COMPARE (acts[nacts], cookie); ++ acts[nacts] = 0; ++ } ++ else if (this_act == LA_ACT_CONSISTENT) ++ { ++ TEST_COMPARE (cookie, last_act_cookie); ++ ++ /* LA_ACT_DELETE must always be followed by an la_objclose. */ ++ if (last_act == LA_ACT_DELETE) ++ TEST_COMPARE (seen_first_objclose, true); ++ else ++ TEST_COMPARE (last_act, LA_ACT_ADD); ++ } ++ ++ last_act = this_act; ++ seen_first_objclose = false; ++ } ++ else if (startswith (buffer, "la_objopen: ")) ++ { ++ char *lname; ++ uintptr_t laddr; ++ Lmid_t lmid; ++ uintptr_t cookie; ++ int r = sscanf (buffer, "la_objopen: %"SCNxPTR" %ms %"SCNxPTR" %ld", ++ &cookie, &lname, &laddr, &lmid); ++ TEST_COMPARE (r, 4); ++ ++ /* la_objclose is not triggered by vDSO because glibc does not ++ unload it. */ ++ if (is_vdso (lname)) ++ continue; ++ if (nobjs == max_objs) ++ FAIL_EXIT1 ("non expected la_objopen: %s %"PRIxPTR" %ld", ++ lname, laddr, lmid); ++ objs[nobjs].lname = lname; ++ objs[nobjs].laddr = laddr; ++ objs[nobjs].lmid = lmid; ++ objs[nobjs].closed = false; ++ nobjs++; ++ ++ /* This indirectly checks that la_objopen always comes before ++ la_objclose btween la_activity calls. */ ++ seen_first_objclose = false; ++ } ++ else if (startswith (buffer, "la_objclose: ")) ++ { ++ char *lname; ++ uintptr_t laddr; ++ Lmid_t lmid; ++ uintptr_t cookie; ++ int r = sscanf (buffer, "la_objclose: %"SCNxPTR" %ms %"SCNxPTR" %ld", ++ &cookie, &lname, &laddr, &lmid); ++ TEST_COMPARE (r, 4); ++ ++ for (size_t i = 0; i < nobjs; i++) ++ { ++ if (strcmp (lname, objs[i].lname) == 0 && lmid == objs[i].lmid) ++ { ++ TEST_COMPARE (objs[i].closed, false); ++ objs[i].closed = true; ++ break; ++ } ++ } ++ ++ /* la_objclose should be called after la_activity(LA_ACT_DELETE) for ++ the closed object's namespace. */ ++ TEST_COMPARE (last_act, LA_ACT_DELETE); ++ if (!seen_first_objclose) ++ { ++ TEST_COMPARE (last_act_cookie, cookie); ++ seen_first_objclose = true; ++ } ++ } ++ } ++ ++ for (size_t i = 0; i < nobjs; i++) ++ { ++ TEST_COMPARE (objs[i].closed, true); ++ free (objs[i].lname); ++ } ++ ++ /* la_activity(LA_ACT_CONSISTENT) should be the last callback received. ++ Since only one link map may be not-CONSISTENT at a time, this also ++ ensures la_activity(LA_ACT_CONSISTENT) is the last callback received ++ for every namespace. */ ++ TEST_COMPARE (last_act, LA_ACT_CONSISTENT); ++ ++ free (buffer); ++ xfclose (out); ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION_ARGV do_test ++#include +diff --git a/elf/tst-audit23mod.c b/elf/tst-audit23mod.c +new file mode 100644 +index 0000000000000000..30315687037d25e8 +--- /dev/null ++++ b/elf/tst-audit23mod.c +@@ -0,0 +1,23 @@ ++/* Extra module for tst-audit23 ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++int ++foo (void) ++{ ++ return 0; ++} +diff --git a/elf/tst-auditmod23.c b/elf/tst-auditmod23.c +new file mode 100644 +index 0000000000000000..d7c60d7a5cbc4f8a +--- /dev/null ++++ b/elf/tst-auditmod23.c +@@ -0,0 +1,74 @@ ++/* Audit module loaded by tst-audit23. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++struct map_desc_t ++{ ++ char *lname; ++ uintptr_t laddr; ++ Lmid_t lmid; ++}; ++ ++void ++la_activity (uintptr_t *cookie, unsigned int flag) ++{ ++ fprintf (stderr, "%s: %d %"PRIxPTR"\n", __func__, flag, (uintptr_t) cookie); ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *l_name = map->l_name[0] == '\0' ? "mainapp" : map->l_name; ++ fprintf (stderr, "%s: %"PRIxPTR" %s %"PRIxPTR" %ld\n", __func__, ++ (uintptr_t) cookie, l_name, map->l_addr, lmid); ++ ++ struct map_desc_t *map_desc = malloc (sizeof (struct map_desc_t)); ++ if (map_desc == NULL) ++ abort (); ++ ++ map_desc->lname = strdup (l_name); ++ map_desc->laddr = map->l_addr; ++ map_desc->lmid = lmid; ++ ++ *cookie = (uintptr_t) map_desc; ++ ++ return 0; ++} ++ ++unsigned int ++la_objclose (uintptr_t *cookie) ++{ ++ struct map_desc_t *map_desc = (struct map_desc_t *) *cookie; ++ fprintf (stderr, "%s: %"PRIxPTR" %s %"PRIxPTR" %ld\n", __func__, ++ (uintptr_t) cookie, map_desc->lname, map_desc->laddr, ++ map_desc->lmid); ++ ++ return 0; ++} diff --git a/glibc-upstream-2.34-155.patch b/glibc-upstream-2.34-155.patch new file mode 100644 index 0000000..bb24edf --- /dev/null +++ b/glibc-upstream-2.34-155.patch @@ -0,0 +1,299 @@ +commit efb21b5fb27fbad447d9f242436fb591870f0045 +Author: Adhemerval Zanella +Date: Mon Jan 24 10:46:16 2022 -0300 + + elf: Fix initial-exec TLS access on audit modules (BZ #28096) + + For audit modules and dependencies with initial-exec TLS, we can not + set the initial TLS image on default loader initialization because it + would already be set by the audit setup. However, subsequent thread + creation would need to follow the default behaviour. + + This patch fixes it by setting l_auditing link_map field not only + for the audit modules, but also for all its dependencies. This is + used on _dl_allocate_tls_init to avoid the static TLS initialization + at load time. + + Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu. + + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + (cherry picked from commit 254d3d5aef2fd8430c469e1938209ac100ebf132) + +diff --git a/elf/Makefile b/elf/Makefile +index 407aaeaeb8c84020..3ccf78f62985e2d0 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -367,6 +367,7 @@ tests += \ + tst-audit18 \ + tst-audit19b \ + tst-audit20 \ ++ tst-audit21 \ + tst-audit22 \ + tst-audit23 \ + tst-auditmany \ +@@ -654,6 +655,8 @@ modules-names = \ + tst-auditmod19a \ + tst-auditmod19b \ + tst-auditmod20 \ ++ tst-auditmod21a \ ++ tst-auditmod21b \ + tst-auditmod22 \ + tst-auditmod23 \ + tst-auxvalmod \ +@@ -2045,6 +2048,11 @@ tst-audit19b-ARGS = -- $(host-test-program-cmd) + $(objpfx)tst-audit20.out: $(objpfx)tst-auditmod20.so + tst-audit20-ENV = LD_AUDIT=$(objpfx)tst-auditmod20.so + ++$(objpfx)tst-audit21: $(shared-thread-library) ++$(objpfx)tst-audit21.out: $(objpfx)tst-auditmod21a.so ++$(objpfx)tst-auditmod21a.so: $(objpfx)tst-auditmod21b.so ++tst-audit21-ENV = LD_AUDIT=$(objpfx)tst-auditmod21a.so ++ + $(objpfx)tst-audit22.out: $(objpfx)tst-auditmod22.so + tst-audit22-ARGS = -- $(host-test-program-cmd) + +diff --git a/elf/dl-tls.c b/elf/dl-tls.c +index e2012d0cd515103b..fab6546e2d31edd4 100644 +--- a/elf/dl-tls.c ++++ b/elf/dl-tls.c +@@ -519,8 +519,12 @@ _dl_resize_dtv (dtv_t *dtv, size_t max_modid) + } + + ++/* Allocate initial TLS. RESULT should be a non-NULL pointer to storage ++ for the TLS space. The DTV may be resized, and so this function may ++ call malloc to allocate that space. The loader's GL(dl_load_tls_lock) ++ is taken when manipulating global TLS-related data in the loader. */ + void * +-_dl_allocate_tls_init (void *result) ++_dl_allocate_tls_init (void *result, bool init_tls) + { + if (result == NULL) + /* The memory allocation failed. */ +@@ -593,7 +597,14 @@ _dl_allocate_tls_init (void *result) + some platforms use in static programs requires it. */ + dtv[map->l_tls_modid].pointer.val = dest; + +- /* Copy the initialization image and clear the BSS part. */ ++ /* Copy the initialization image and clear the BSS part. For ++ audit modules or dependencies with initial-exec TLS, we can not ++ set the initial TLS image on default loader initialization ++ because it would already be set by the audit setup. However, ++ subsequent thread creation would need to follow the default ++ behaviour. */ ++ if (map->l_ns != LM_ID_BASE && !init_tls) ++ continue; + memset (__mempcpy (dest, map->l_tls_initimage, + map->l_tls_initimage_size), '\0', + map->l_tls_blocksize - map->l_tls_initimage_size); +@@ -620,7 +631,7 @@ _dl_allocate_tls (void *mem) + { + return _dl_allocate_tls_init (mem == NULL + ? _dl_allocate_tls_storage () +- : allocate_dtv (mem)); ++ : allocate_dtv (mem), true); + } + rtld_hidden_def (_dl_allocate_tls) + +diff --git a/elf/rtld.c b/elf/rtld.c +index b089e5cf4740443e..26c6fb6479c9008c 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -2429,7 +2429,7 @@ dl_main (const ElfW(Phdr) *phdr, + into the main thread's TLS area, which we allocated above. + Note: thread-local variables must only be accessed after completing + the next step. */ +- _dl_allocate_tls_init (tcbp); ++ _dl_allocate_tls_init (tcbp, false); + + /* And finally install it for the main thread. */ + if (! tls_init_tp_called) +diff --git a/elf/tst-audit21.c b/elf/tst-audit21.c +new file mode 100644 +index 0000000000000000..3a47ab64d44421ee +--- /dev/null ++++ b/elf/tst-audit21.c +@@ -0,0 +1,42 @@ ++/* Check LD_AUDIT with static TLS. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static volatile __thread int out __attribute__ ((tls_model ("initial-exec"))); ++ ++static void * ++tf (void *arg) ++{ ++ TEST_COMPARE (out, 0); ++ out = isspace (' '); ++ return NULL; ++} ++ ++int main (int argc, char *argv[]) ++{ ++ TEST_COMPARE (out, 0); ++ out = isspace (' '); ++ ++ pthread_t t = xpthread_create (NULL, tf, NULL); ++ xpthread_join (t); ++ ++ return 0; ++} +diff --git a/elf/tst-auditmod21a.c b/elf/tst-auditmod21a.c +new file mode 100644 +index 0000000000000000..f6d51b5c0531c49d +--- /dev/null ++++ b/elf/tst-auditmod21a.c +@@ -0,0 +1,80 @@ ++/* Check LD_AUDIT with static TLS. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#define tls_ie __attribute__ ((tls_model ("initial-exec"))) ++ ++__thread int tls_var0 tls_ie; ++__thread int tls_var1 tls_ie = 0x10; ++ ++/* Defined at tst-auditmod21b.so */ ++extern __thread int tls_var2; ++extern __thread int tls_var3; ++ ++static volatile int out; ++ ++static void ++call_libc (void) ++{ ++ /* isspace accesses the initial-exec glibc TLS variables, which are ++ setup in glibc initialization. */ ++ out = isspace (' '); ++} ++ ++unsigned int ++la_version (unsigned int v) ++{ ++ tls_var0 = 0x1; ++ if (tls_var1 != 0x10) ++ abort (); ++ tls_var1 = 0x20; ++ ++ tls_var2 = 0x2; ++ if (tls_var3 != 0x20) ++ abort (); ++ tls_var3 = 0x40; ++ ++ call_libc (); ++ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map* map, Lmid_t lmid, uintptr_t* cookie) ++{ ++ call_libc (); ++ *cookie = (uintptr_t) map; ++ return 0; ++} ++ ++void ++la_activity (uintptr_t* cookie, unsigned int flag) ++{ ++ if (tls_var0 != 0x1 || tls_var1 != 0x20) ++ abort (); ++ call_libc (); ++} ++ ++void ++la_preinit (uintptr_t* cookie) ++{ ++ call_libc (); ++} +diff --git a/elf/tst-auditmod21b.c b/elf/tst-auditmod21b.c +new file mode 100644 +index 0000000000000000..6ba5335b7514c674 +--- /dev/null ++++ b/elf/tst-auditmod21b.c +@@ -0,0 +1,22 @@ ++/* Check LD_AUDIT with static TLS. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define tls_ie __attribute__ ((tls_model ("initial-exec"))) ++ ++__thread int tls_var2 tls_ie; ++__thread int tls_var3 tls_ie = 0x20; +diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c +index 50065bc9bd8a28e5..554a721f814b53c4 100644 +--- a/nptl/allocatestack.c ++++ b/nptl/allocatestack.c +@@ -139,7 +139,7 @@ get_cached_stack (size_t *sizep, void **memp) + memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t)); + + /* Re-initialize the TLS. */ +- _dl_allocate_tls_init (TLS_TPADJ (result)); ++ _dl_allocate_tls_init (TLS_TPADJ (result), true); + + return result; + } +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 686f0a7b9709eb10..a56060d0204cc453 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1254,7 +1254,7 @@ extern void _dl_allocate_static_tls (struct link_map *map) attribute_hidden; + /* These are internal entry points to the two halves of _dl_allocate_tls, + only used within rtld.c itself at startup time. */ + extern void *_dl_allocate_tls_storage (void) attribute_hidden; +-extern void *_dl_allocate_tls_init (void *); ++extern void *_dl_allocate_tls_init (void *, bool); + rtld_hidden_proto (_dl_allocate_tls_init) + + /* Deallocate memory allocated with _dl_allocate_tls. */ diff --git a/glibc-upstream-2.34-156.patch b/glibc-upstream-2.34-156.patch new file mode 100644 index 0000000..528b157 --- /dev/null +++ b/glibc-upstream-2.34-156.patch @@ -0,0 +1,1778 @@ +commit 056fc1c0e367b9682d89f60fd5c249f80074ff30 +Author: Adhemerval Zanella +Date: Mon Jan 24 10:46:17 2022 -0300 + + elf: Issue la_symbind for bind-now (BZ #23734) + + The audit symbind callback is not called for binaries built with + -Wl,-z,now or when LD_BIND_NOW=1 is used, nor the PLT tracking callbacks + (plt_enter and plt_exit) since this would change the expected + program semantics (where no PLT is expected) and would have performance + implications (such as for BZ#15533). + + LAV_CURRENT is also bumped to indicate the audit ABI change (where + la_symbind flags are set by the loader to indicate no possible PLT + trace). + + To handle powerpc64 ELFv1 function descriptor, _dl_audit_symbind + requires to know whether bind-now is used so the symbol value is + updated to function text segment instead of the OPD (for lazy binding + this is done by PPC64_LOAD_FUNCPTR on _dl_runtime_resolve). + + Checked on x86_64-linux-gnu, i686-linux-gnu, aarch64-linux-gnu, + powerpc64-linux-gnu. + + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + (cherry picked from commit 32612615c58b394c3eb09f020f31310797ad3854) + + Resolved conflicts: + NEWS - Manual merge. + +diff --git a/bits/link_lavcurrent.h b/bits/link_lavcurrent.h +index 44fbea1e8060997f..c48835d12b512355 100644 +--- a/bits/link_lavcurrent.h ++++ b/bits/link_lavcurrent.h +@@ -22,4 +22,4 @@ + #endif + + /* Version numbers for la_version handshake interface. */ +-#define LAV_CURRENT 1 ++#define LAV_CURRENT 2 +diff --git a/elf/Makefile b/elf/Makefile +index 3ccf78f62985e2d0..0ab3e885f5e35671 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -370,6 +370,12 @@ tests += \ + tst-audit21 \ + tst-audit22 \ + tst-audit23 \ ++ tst-audit24a \ ++ tst-audit24b \ ++ tst-audit24c \ ++ tst-audit24d \ ++ tst-audit25a \ ++ tst-audit25b \ + tst-auditmany \ + tst-auxobj \ + tst-auxobj-dlopen \ +@@ -634,6 +640,18 @@ modules-names = \ + tst-audit18mod \ + tst-audit19bmod \ + tst-audit23mod \ ++ tst-audit24amod1 \ ++ tst-audit24amod2 \ ++ tst-audit24bmod1 \ ++ tst-audit24bmod2 \ ++ tst-audit24dmod1 \ ++ tst-audit24dmod2 \ ++ tst-audit24dmod3 \ ++ tst-audit24dmod4 \ ++ tst-audit25mod1 \ ++ tst-audit25mod2 \ ++ tst-audit25mod3 \ ++ tst-audit25mod4 \ + tst-auditlogmod-1 \ + tst-auditlogmod-2 \ + tst-auditlogmod-3 \ +@@ -659,6 +677,11 @@ modules-names = \ + tst-auditmod21b \ + tst-auditmod22 \ + tst-auditmod23 \ ++ tst-auditmod24a \ ++ tst-auditmod24b \ ++ tst-auditmod24c \ ++ tst-auditmod24d \ ++ tst-auditmod25 \ + tst-auxvalmod \ + tst-big-note-lib \ + tst-deep1mod1 \ +@@ -864,7 +887,8 @@ extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) + + # filtmod1.so, tst-big-note-lib.so, tst-ro-dynamic-mod.so have special + # rules. +-modules-names-nobuild := filtmod1 tst-big-note-lib tst-ro-dynamic-mod ++modules-names-nobuild := filtmod1 tst-big-note-lib tst-ro-dynamic-mod \ ++ tst-audit24bmod1 tst-audit24bmod2.so + + tests += $(tests-static) + +@@ -2060,6 +2084,69 @@ $(objpfx)tst-audit23.out: $(objpfx)tst-auditmod23.so \ + $(objpfx)tst-audit23mod.so + tst-audit23-ARGS = -- $(host-test-program-cmd) + ++$(objpfx)tst-audit24a.out: $(objpfx)tst-auditmod24a.so ++$(objpfx)tst-audit24a: $(objpfx)tst-audit24amod1.so \ ++ $(objpfx)tst-audit24amod2.so ++tst-audit24a-ENV = LD_AUDIT=$(objpfx)tst-auditmod24a.so ++LDFLAGS-tst-audit24a = -Wl,-z,now ++ ++$(objpfx)tst-audit24b.out: $(objpfx)tst-auditmod24b.so ++$(objpfx)tst-audit24b: $(objpfx)tst-audit24bmod1.so \ ++ $(objpfx)tst-audit24bmod2.so ++$(objpfx)tst-audit24bmod1: $(objpfx)tst-audit24bmod2.so ++# The test checks if a library without .gnu.version correctly calls the ++# audit callbacks. So it uses an explicit link rule to avoid linking ++# against libc.so. ++$(objpfx)tst-audit24bmod1.so: $(objpfx)tst-audit24bmod1.os ++ $(CC) -nostdlib -nostartfiles -shared -o $@.new $(objpfx)tst-audit24bmod1.os \ ++ -Wl,-z,now ++ $(call after-link,$@.new) ++ mv -f $@.new $@ ++CFLAGS-.os += $(call elide-stack-protector,.os,tst-audit24bmod1) ++$(objpfx)tst-audit24bmod2.so: $(objpfx)tst-audit24bmod2.os ++ $(CC) -nostdlib -nostartfiles -shared -o $@.new $(objpfx)tst-audit24bmod2.os ++ $(call after-link,$@.new) ++ mv -f $@.new $@ ++CFLAGS-.os += $(call elide-stack-protector,.os,tst-audit24bmod2) ++tst-audit24b-ENV = LD_AUDIT=$(objpfx)tst-auditmod24b.so ++LDFLAGS-tst-audit24b = -Wl,-z,now ++ ++# Same as tst-audit24a, but tests LD_BIND_NOW ++$(objpfx)tst-audit24c.out: $(objpfx)tst-auditmod24c.so ++$(objpfx)tst-audit24c: $(objpfx)tst-audit24amod1.so \ ++ $(objpfx)tst-audit24amod2.so ++tst-audit24c-ENV = LD_BIND_NOW=1 LD_AUDIT=$(objpfx)tst-auditmod24c.so ++LDFLAGS-tst-audit24b = -Wl,-z,lazy ++ ++$(objpfx)tst-audit24d.out: $(objpfx)tst-auditmod24d.so ++$(objpfx)tst-audit24d: $(objpfx)tst-audit24dmod1.so \ ++ $(objpfx)tst-audit24dmod2.so ++$(objpfx)tst-audit24dmod1.so: $(objpfx)tst-audit24dmod3.so ++LDFLAGS-tst-audit24dmod1.so = -Wl,-z,now ++$(objpfx)tst-audit24dmod2.so: $(objpfx)tst-audit24dmod4.so ++LDFLAGS-tst-audit24dmod2.so = -Wl,-z,lazy ++tst-audit24d-ENV = LD_AUDIT=$(objpfx)tst-auditmod24d.so ++LDFLAGS-tst-audit24d = -Wl,-z,lazy ++ ++$(objpfx)tst-audit25a.out: $(objpfx)tst-auditmod25.so ++$(objpfx)tst-audit25a: $(objpfx)tst-audit25mod1.so \ ++ $(objpfx)tst-audit25mod2.so \ ++ $(objpfx)tst-audit25mod3.so \ ++ $(objpfx)tst-audit25mod4.so ++$(objpfx)tst-audit25mod1.so: $(objpfx)tst-audit25mod3.so ++LDFLAGS-tst-audit25mod1.so = -Wl,-z,now ++$(objpfx)tst-audit25mod2.so: $(objpfx)tst-audit25mod4.so ++LDFLAGS-tst-audit25mod2.so = -Wl,-z,lazy ++tst-audit25a-ARGS = -- $(host-test-program-cmd) ++ ++$(objpfx)tst-audit25b.out: $(objpfx)tst-auditmod25.so ++$(objpfx)tst-audit25b: $(objpfx)tst-audit25mod1.so \ ++ $(objpfx)tst-audit25mod2.so \ ++ $(objpfx)tst-audit25mod3.so \ ++ $(objpfx)tst-audit25mod4.so ++LDFLAGS-tst-audit25b = -Wl,-z,now ++tst-audit25b-ARGS = -- $(host-test-program-cmd) ++ + # tst-sonamemove links against an older implementation of the library. + LDFLAGS-tst-sonamemove-linkmod1.so = \ + -Wl,--version-script=tst-sonamemove-linkmod1.map \ +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 152712b12fed6de2..72a50717ef60a357 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -178,16 +178,23 @@ _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + const ElfW(Sym) *defsym, DL_FIXUP_VALUE_TYPE *value, + lookup_t result) + { +- reloc_result->bound = result; +- /* Compute index of the symbol entry in the symbol table of the DSO with the +- definition. */ +- reloc_result->boundndx = (defsym - (ElfW(Sym) *) D_PTR (result, +- l_info[DT_SYMTAB])); ++ bool for_jmp_slot = reloc_result == NULL; ++ ++ /* Compute index of the symbol entry in the symbol table of the DSO ++ with the definition. */ ++ unsigned int boundndx = defsym - (ElfW(Sym) *) D_PTR (result, ++ l_info[DT_SYMTAB]); ++ if (!for_jmp_slot) ++ { ++ reloc_result->bound = result; ++ reloc_result->boundndx = boundndx; ++ } + + if ((l->l_audit_any_plt | result->l_audit_any_plt) == 0) + { + /* Set all bits since this symbol binding is not interesting. */ +- reloc_result->enterexit = (1u << DL_NNS) - 1; ++ if (!for_jmp_slot) ++ reloc_result->enterexit = (1u << DL_NNS) - 1; + return; + } + +@@ -199,12 +206,13 @@ _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + two bits. */ + assert (DL_NNS * 2 <= sizeof (reloc_result->flags) * 8); + assert ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) == 3); +- reloc_result->enterexit = LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT; ++ uint32_t enterexit = LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT; + + const char *strtab2 = (const void *) D_PTR (result, l_info[DT_STRTAB]); + + unsigned int flags = 0; + struct audit_ifaces *afct = GLRO(dl_audit); ++ uintptr_t new_value = (uintptr_t) sym.st_value; + for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt) + { + /* XXX Check whether both DSOs must request action or only one */ +@@ -215,37 +223,41 @@ _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + { + if (afct->symbind != NULL) + { +- uintptr_t new_value = afct->symbind (&sym, +- reloc_result->boundndx, +- &l_state->cookie, +- &result_state->cookie, +- &flags, +- strtab2 + defsym->st_name); ++ flags |= for_jmp_slot ? LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT ++ : 0; ++ new_value = afct->symbind (&sym, boundndx, ++ &l_state->cookie, ++ &result_state->cookie, &flags, ++ strtab2 + defsym->st_name); + if (new_value != (uintptr_t) sym.st_value) + { + flags |= LA_SYMB_ALTVALUE; +- sym.st_value = new_value; ++ sym.st_value = for_jmp_slot ++ ? DL_FIXUP_BINDNOW_ADDR_VALUE (new_value) : new_value; + } + } + + /* Remember the results for every audit library and store a summary + in the first two bits. */ +- reloc_result->enterexit &= flags & (LA_SYMB_NOPLTENTER +- | LA_SYMB_NOPLTEXIT); +- reloc_result->enterexit |= ((flags & (LA_SYMB_NOPLTENTER +- | LA_SYMB_NOPLTEXIT)) +- << ((cnt + 1) * 2)); ++ enterexit &= flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT); ++ enterexit |= ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT)) ++ << ((cnt + 1) * 2)); + } + else + /* If the bind flags say this auditor is not interested, set the bits + manually. */ +- reloc_result->enterexit |= ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) +- << ((cnt + 1) * 2)); ++ enterexit |= ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) ++ << ((cnt + 1) * 2)); + afct = afct->next; + } + +- reloc_result->flags = flags; +- *value = DL_FIXUP_ADDR_VALUE (sym.st_value); ++ if (!for_jmp_slot) ++ { ++ reloc_result->enterexit = enterexit; ++ reloc_result->flags = flags; ++ } ++ ++ DL_FIXUP_BINDNOW_RELOC (value, new_value, sym.st_value); + } + + void +diff --git a/elf/do-rel.h b/elf/do-rel.h +index f441b749190c2641..4b7fc14f74cb5d09 100644 +--- a/elf/do-rel.h ++++ b/elf/do-rel.h +@@ -16,6 +16,8 @@ + License along with the GNU C Library; if not, see + . */ + ++#include ++ + /* This file may be included twice, to define both + `elf_dynamic_do_rel' and `elf_dynamic_do_rela'. */ + +@@ -123,6 +125,10 @@ elf_dynamic_do_Rel (struct link_map *map, struct r_scope_elem *scope[], + + for (; r < end; ++r) + { ++ ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff; ++ const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (r->r_info)]; ++ void *const r_addr_arg = (void *) (l_addr + r->r_offset); ++ const struct r_found_version *rversion = &map->l_versions[ndx]; + #if defined ELF_MACHINE_IRELATIVE && !defined RTLD_BOOTSTRAP + if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE) + { +@@ -133,10 +139,19 @@ elf_dynamic_do_Rel (struct link_map *map, struct r_scope_elem *scope[], + } + #endif + +- ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff; +- elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], +- &map->l_versions[ndx], +- (void *) (l_addr + r->r_offset), skip_ifunc); ++ elf_machine_rel (map, scope, r, sym, rversion, r_addr_arg, ++ skip_ifunc); ++#if defined SHARED && !defined RTLD_BOOTSTRAP ++ if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_JMP_SLOT ++ && GLRO(dl_naudit) > 0) ++ { ++ struct link_map *sym_map ++ = RESOLVE_MAP (map, scope, &sym, rversion, ++ ELF_MACHINE_JMP_SLOT); ++ if (sym != NULL) ++ _dl_audit_symbind (map, NULL, sym, r_addr_arg, sym_map); ++ } ++#endif + } + + #if defined ELF_MACHINE_IRELATIVE && !defined RTLD_BOOTSTRAP +@@ -158,17 +173,33 @@ elf_dynamic_do_Rel (struct link_map *map, struct r_scope_elem *scope[], + else + { + for (; r < end; ++r) ++ { ++ const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (r->r_info)]; ++ void *const r_addr_arg = (void *) (l_addr + r->r_offset); + # ifdef ELF_MACHINE_IRELATIVE +- if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE) +- { +- if (r2 == NULL) +- r2 = r; +- end2 = r; +- } +- else ++ if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE) ++ { ++ if (r2 == NULL) ++ r2 = r; ++ end2 = r; ++ continue; ++ } + # endif +- elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL, +- (void *) (l_addr + r->r_offset), skip_ifunc); ++ elf_machine_rel (map, scope, r, sym, NULL, r_addr_arg, ++ skip_ifunc); ++# if defined SHARED && !defined RTLD_BOOTSTRAP ++ if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_JMP_SLOT ++ && GLRO(dl_naudit) > 0) ++ { ++ struct link_map *sym_map ++ = RESOLVE_MAP (map, scope, &sym, ++ (struct r_found_version *) NULL, ++ ELF_MACHINE_JMP_SLOT); ++ if (sym != NULL) ++ _dl_audit_symbind (map, NULL , sym,r_addr_arg, sym_map); ++ } ++# endif ++ } + + # ifdef ELF_MACHINE_IRELATIVE + if (r2 != NULL) +diff --git a/elf/sotruss-lib.c b/elf/sotruss-lib.c +index b711f7b0c892a972..e4ebc8dbc697df3f 100644 +--- a/elf/sotruss-lib.c ++++ b/elf/sotruss-lib.c +@@ -17,6 +17,7 @@ + License along with the GNU C Library; if not, see + . */ + ++#include + #include + #include + #include +@@ -232,6 +233,12 @@ uintptr_t + la_symbind (Elf_Sym *sym, unsigned int ndx, uintptr_t *refcook, + uintptr_t *defcook, unsigned int *flags, const char *symname) + { ++ if (*flags & LA_SYMB_NOPLTENTER) ++ warnx ("cannot trace PLT enter (bind-now enabled)"); ++ ++ if (do_exit && *flags & LA_SYMB_NOPLTEXIT) ++ warnx ("cannot trace PLT exit (bind-now enabled)"); ++ + if (!do_exit) + *flags = LA_SYMB_NOPLTEXIT; + +diff --git a/elf/tst-audit24a.c b/elf/tst-audit24a.c +new file mode 100644 +index 0000000000000000..a1781c9b45f18fa0 +--- /dev/null ++++ b/elf/tst-audit24a.c +@@ -0,0 +1,36 @@ ++/* LD_AUDIT test for la_symbind and bind-now. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++int tst_audit24amod1_func1 (void); ++int tst_audit24amod1_func2 (void); ++int tst_audit24amod2_func1 (void); ++ ++int ++do_test (void) ++{ ++ TEST_COMPARE (tst_audit24amod1_func1 (), 1); ++ TEST_COMPARE (tst_audit24amod1_func2 (), 2); ++ TEST_COMPARE (tst_audit24amod2_func1 (), 10); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-audit24amod1.c b/elf/tst-audit24amod1.c +new file mode 100644 +index 0000000000000000..0289a4abefbc7bbb +--- /dev/null ++++ b/elf/tst-audit24amod1.c +@@ -0,0 +1,31 @@ ++/* Module used by tst-audit24a. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++_Noreturn int ++tst_audit24amod1_func1 (void) ++{ ++ abort (); ++} ++ ++int ++tst_audit24amod1_func2 (void) ++{ ++ return 2; ++} +diff --git a/elf/tst-audit24amod2.c b/elf/tst-audit24amod2.c +new file mode 100644 +index 0000000000000000..1562afc9dfc1b9b3 +--- /dev/null ++++ b/elf/tst-audit24amod2.c +@@ -0,0 +1,25 @@ ++/* Module used by tst-audit24a. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++_Noreturn int ++tst_audit24amod2_func1 (void) ++{ ++ abort (); ++} +diff --git a/elf/tst-audit24b.c b/elf/tst-audit24b.c +new file mode 100644 +index 0000000000000000..567bee52c27f4361 +--- /dev/null ++++ b/elf/tst-audit24b.c +@@ -0,0 +1,37 @@ ++/* LD_AUDIT test for la_symbind and bind-now. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This is similar to tst-audit24a, with the difference this modules ++ does not have the .gnu.version section header. */ ++ ++#include ++#include ++ ++int tst_audit24bmod1_func1 (void); ++int tst_audit24bmod1_func2 (void); ++ ++int ++do_test (void) ++{ ++ TEST_COMPARE (tst_audit24bmod1_func1 (), 1); ++ TEST_COMPARE (tst_audit24bmod1_func2 (), 2); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-audit24bmod1.c b/elf/tst-audit24bmod1.c +new file mode 100644 +index 0000000000000000..57ce14a01bf72fb6 +--- /dev/null ++++ b/elf/tst-audit24bmod1.c +@@ -0,0 +1,31 @@ ++/* Module used by tst-audit24c. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++int tst_audit24bmod2_func1 (void); ++ ++int ++tst_audit24bmod1_func1 (void) ++{ ++ return -1; ++} ++ ++int ++tst_audit24bmod1_func2 (void) ++{ ++ return tst_audit24bmod2_func1 (); ++} +diff --git a/elf/tst-audit24bmod2.c b/elf/tst-audit24bmod2.c +new file mode 100644 +index 0000000000000000..b298ce0a05bf2db2 +--- /dev/null ++++ b/elf/tst-audit24bmod2.c +@@ -0,0 +1,23 @@ ++/* Module used by tst-audit24b. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++int ++tst_audit24bmod2_func1 (void) ++{ ++ return -1; ++} +diff --git a/elf/tst-audit24c.c b/elf/tst-audit24c.c +new file mode 100644 +index 0000000000000000..46ed328756067276 +--- /dev/null ++++ b/elf/tst-audit24c.c +@@ -0,0 +1,2 @@ ++/* It tests LD_BIND_NOW=1 instead of linking with -Wl,-z,now */ ++#include "tst-audit24a.c" +diff --git a/elf/tst-audit24d.c b/elf/tst-audit24d.c +new file mode 100644 +index 0000000000000000..543f3b86a6bbdead +--- /dev/null ++++ b/elf/tst-audit24d.c +@@ -0,0 +1,36 @@ ++/* LD_AUDIT test for la_symbind and bind-now. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++int tst_audit24dmod1_func1 (void); ++int tst_audit24dmod1_func2 (void); ++int tst_audit24dmod2_func1 (void); ++ ++int ++do_test (void) ++{ ++ TEST_COMPARE (tst_audit24dmod1_func1 (), 1); ++ TEST_COMPARE (tst_audit24dmod1_func2 (), 32); ++ TEST_COMPARE (tst_audit24dmod2_func1 (), 10); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-audit24dmod1.c b/elf/tst-audit24dmod1.c +new file mode 100644 +index 0000000000000000..e563f69d638ac3f5 +--- /dev/null ++++ b/elf/tst-audit24dmod1.c +@@ -0,0 +1,33 @@ ++/* Module used by tst-audit24d. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++int tst_audit24dmod3_func1 (void); ++ ++_Noreturn int ++tst_audit24dmod1_func1 (void) ++{ ++ abort (); ++} ++ ++int ++tst_audit24dmod1_func2 (void) ++{ ++ return 2 + tst_audit24dmod3_func1 ();; ++} +diff --git a/elf/tst-audit24dmod2.c b/elf/tst-audit24dmod2.c +new file mode 100644 +index 0000000000000000..03fe9381281e5790 +--- /dev/null ++++ b/elf/tst-audit24dmod2.c +@@ -0,0 +1,28 @@ ++/* Module for tst-audit24d. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++int tst_audit24dmod4_func1 (void); ++ ++_Noreturn int ++tst_audit24dmod2_func1 (void) ++{ ++ tst_audit24dmod4_func1 (); ++ abort (); ++} +diff --git a/elf/tst-audit24dmod3.c b/elf/tst-audit24dmod3.c +new file mode 100644 +index 0000000000000000..106d517d2887d76c +--- /dev/null ++++ b/elf/tst-audit24dmod3.c +@@ -0,0 +1,31 @@ ++/* Module for tst-audit24d. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++_Noreturn int ++tst_audit24dmod3_func1 (void) ++{ ++ abort (); ++} ++ ++int ++tst_audit24dmod3_func2 (void) ++{ ++ return 4; ++} +diff --git a/elf/tst-audit24dmod4.c b/elf/tst-audit24dmod4.c +new file mode 100644 +index 0000000000000000..1da3b46917ba1083 +--- /dev/null ++++ b/elf/tst-audit24dmod4.c +@@ -0,0 +1,25 @@ ++/* Module for tst-audit24d. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++_Noreturn int ++tst_audit24dmod4_func1 (void) ++{ ++ abort (); ++} +diff --git a/elf/tst-audit25a.c b/elf/tst-audit25a.c +new file mode 100644 +index 0000000000000000..49173e862516e876 +--- /dev/null ++++ b/elf/tst-audit25a.c +@@ -0,0 +1,129 @@ ++/* Check LD_AUDIT and LD_BIND_NOW. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int restart; ++#define CMDLINE_OPTIONS \ ++ { "restart", no_argument, &restart, 1 }, ++ ++void tst_audit25mod1_func1 (void); ++void tst_audit25mod1_func2 (void); ++void tst_audit25mod2_func1 (void); ++void tst_audit25mod2_func2 (void); ++ ++static int ++handle_restart (void) ++{ ++ tst_audit25mod1_func1 (); ++ tst_audit25mod1_func2 (); ++ tst_audit25mod2_func1 (); ++ tst_audit25mod2_func2 (); ++ ++ return 0; ++} ++ ++static inline bool ++startswith (const char *str, const char *pre) ++{ ++ size_t lenpre = strlen (pre); ++ size_t lenstr = strlen (str); ++ return lenstr < lenpre ? false : memcmp (pre, str, lenpre) == 0; ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ /* We must have either: ++ - One or four parameters left if called initially: ++ + path to ld.so optional ++ + "--library-path" optional ++ + the library path optional ++ + the application name */ ++ ++ if (restart) ++ return handle_restart (); ++ ++ setenv ("LD_AUDIT", "tst-auditmod25.so", 0); ++ ++ char *spargv[9]; ++ int i = 0; ++ for (; i < argc - 1; i++) ++ spargv[i] = argv[i + 1]; ++ spargv[i++] = (char *) "--direct"; ++ spargv[i++] = (char *) "--restart"; ++ spargv[i] = NULL; ++ TEST_VERIFY_EXIT (i < array_length (spargv)); ++ ++ { ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit25a", 0, ++ sc_allow_stderr); ++ ++ /* tst-audit25a is build with -Wl,-z,lazy and tst-audit25mod1 with ++ -Wl,-z,now; so only tst_audit25mod3_func1 should be expected to ++ have LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT. */ ++ TEST_COMPARE_STRING (result.err.buffer, ++ "la_symbind: tst_audit25mod3_func1 1\n" ++ "la_symbind: tst_audit25mod1_func1 0\n" ++ "la_symbind: tst_audit25mod1_func2 0\n" ++ "la_symbind: tst_audit25mod2_func1 0\n" ++ "la_symbind: tst_audit25mod4_func1 0\n" ++ "la_symbind: tst_audit25mod2_func2 0\n"); ++ ++ support_capture_subprocess_free (&result); ++ } ++ ++ { ++ setenv ("LD_BIND_NOW", "1", 0); ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit25a", 0, ++ sc_allow_stderr); ++ ++ /* With LD_BIND_NOW all symbols are expected to have ++ LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT. Also the resolution ++ order is done in breadth-first order. */ ++ TEST_COMPARE_STRING (result.err.buffer, ++ "la_symbind: tst_audit25mod4_func1 1\n" ++ "la_symbind: tst_audit25mod3_func1 1\n" ++ "la_symbind: tst_audit25mod1_func1 1\n" ++ "la_symbind: tst_audit25mod2_func1 1\n" ++ "la_symbind: tst_audit25mod1_func2 1\n" ++ "la_symbind: tst_audit25mod2_func2 1\n"); ++ ++ support_capture_subprocess_free (&result); ++ } ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION_ARGV do_test ++#include +diff --git a/elf/tst-audit25b.c b/elf/tst-audit25b.c +new file mode 100644 +index 0000000000000000..a56638d501f9bff5 +--- /dev/null ++++ b/elf/tst-audit25b.c +@@ -0,0 +1,128 @@ ++/* Check LD_AUDIT and LD_BIND_NOW. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int restart; ++#define CMDLINE_OPTIONS \ ++ { "restart", no_argument, &restart, 1 }, ++ ++void tst_audit25mod1_func1 (void); ++void tst_audit25mod1_func2 (void); ++void tst_audit25mod2_func1 (void); ++void tst_audit25mod2_func2 (void); ++ ++static int ++handle_restart (void) ++{ ++ tst_audit25mod1_func1 (); ++ tst_audit25mod1_func2 (); ++ tst_audit25mod2_func1 (); ++ tst_audit25mod2_func2 (); ++ ++ return 0; ++} ++ ++static inline bool ++startswith (const char *str, const char *pre) ++{ ++ size_t lenpre = strlen (pre); ++ size_t lenstr = strlen (str); ++ return lenstr >= lenpre && memcmp (pre, str, lenpre) == 0; ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ /* We must have either: ++ - One or four parameters left if called initially: ++ + path to ld.so optional ++ + "--library-path" optional ++ + the library path optional ++ + the application name */ ++ ++ if (restart) ++ return handle_restart (); ++ ++ setenv ("LD_AUDIT", "tst-auditmod25.so", 0); ++ ++ char *spargv[9]; ++ int i = 0; ++ for (; i < argc - 1; i++) ++ spargv[i] = argv[i + 1]; ++ spargv[i++] = (char *) "--direct"; ++ spargv[i++] = (char *) "--restart"; ++ spargv[i] = NULL; ++ ++ { ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit25a", 0, ++ sc_allow_stderr); ++ ++ /* tst-audit25a and tst-audit25mod1 are built with -Wl,-z,now, but ++ tst-audit25mod2 is built with -Wl,-z,lazy. So only ++ tst_audit25mod4_func1 (called by tst_audit25mod2_func1) should not ++ have LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT. */ ++ TEST_COMPARE_STRING (result.err.buffer, ++ "la_symbind: tst_audit25mod3_func1 1\n" ++ "la_symbind: tst_audit25mod1_func1 1\n" ++ "la_symbind: tst_audit25mod2_func1 1\n" ++ "la_symbind: tst_audit25mod1_func2 1\n" ++ "la_symbind: tst_audit25mod2_func2 1\n" ++ "la_symbind: tst_audit25mod4_func1 0\n"); ++ ++ support_capture_subprocess_free (&result); ++ } ++ ++ { ++ setenv ("LD_BIND_NOW", "1", 0); ++ struct support_capture_subprocess result ++ = support_capture_subprogram (spargv[0], spargv); ++ support_capture_subprocess_check (&result, "tst-audit25a", 0, ++ sc_allow_stderr); ++ ++ /* With LD_BIND_NOW all symbols are expected to have ++ LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT. Also the resolution ++ order is done in breadth-first order. */ ++ TEST_COMPARE_STRING (result.err.buffer, ++ "la_symbind: tst_audit25mod4_func1 1\n" ++ "la_symbind: tst_audit25mod3_func1 1\n" ++ "la_symbind: tst_audit25mod1_func1 1\n" ++ "la_symbind: tst_audit25mod2_func1 1\n" ++ "la_symbind: tst_audit25mod1_func2 1\n" ++ "la_symbind: tst_audit25mod2_func2 1\n"); ++ ++ support_capture_subprocess_free (&result); ++ } ++ ++ return 0; ++} ++ ++#define TEST_FUNCTION_ARGV do_test ++#include +diff --git a/elf/tst-audit25mod1.c b/elf/tst-audit25mod1.c +new file mode 100644 +index 0000000000000000..a132e34a9b2cf51f +--- /dev/null ++++ b/elf/tst-audit25mod1.c +@@ -0,0 +1,30 @@ ++/* Module used by tst-audit25. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++void tst_audit25mod3_func1 (void); ++ ++void ++tst_audit25mod1_func1 (void) ++{ ++ tst_audit25mod3_func1 (); ++} ++ ++void ++tst_audit25mod1_func2 (void) ++{ ++} +diff --git a/elf/tst-audit25mod2.c b/elf/tst-audit25mod2.c +new file mode 100644 +index 0000000000000000..92da26fa80b202c2 +--- /dev/null ++++ b/elf/tst-audit25mod2.c +@@ -0,0 +1,30 @@ ++/* Module used by tst-audit25. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++void tst_audit25mod4_func1 (void); ++ ++void ++tst_audit25mod2_func1 (void) ++{ ++ tst_audit25mod4_func1 (); ++} ++ ++void ++tst_audit25mod2_func2 (void) ++{ ++} +diff --git a/elf/tst-audit25mod3.c b/elf/tst-audit25mod3.c +new file mode 100644 +index 0000000000000000..af83e8919083adef +--- /dev/null ++++ b/elf/tst-audit25mod3.c +@@ -0,0 +1,22 @@ ++/* Module used by tst-audit25. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++void ++tst_audit25mod3_func1 (void) ++{ ++} +diff --git a/elf/tst-audit25mod4.c b/elf/tst-audit25mod4.c +new file mode 100644 +index 0000000000000000..6cdf34357582da16 +--- /dev/null ++++ b/elf/tst-audit25mod4.c +@@ -0,0 +1,22 @@ ++/* Module used by tst-audit25. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++void ++tst_audit25mod4_func1 (void) ++{ ++} +diff --git a/elf/tst-auditmod24.h b/elf/tst-auditmod24.h +new file mode 100644 +index 0000000000000000..5fdbfef12dac2b2a +--- /dev/null ++++ b/elf/tst-auditmod24.h +@@ -0,0 +1,29 @@ ++/* Auxiliary functions for tst-audit24x. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _TST_AUDITMOD24_H ++#define _TST_AUDITMOD24_H ++ ++static void ++test_symbind_flags (unsigned int flags) ++{ ++ if ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT)) == 0) ++ abort (); ++} ++ ++#endif +diff --git a/elf/tst-auditmod24a.c b/elf/tst-auditmod24a.c +new file mode 100644 +index 0000000000000000..d8e88f3984af1707 +--- /dev/null ++++ b/elf/tst-auditmod24a.c +@@ -0,0 +1,114 @@ ++/* Audit modules for tst-audit24a. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define AUDIT24_COOKIE 0x1 ++#define AUDIT24MOD1_COOKIE 0x2 ++#define AUDIT24MOD2_COOKIE 0x3 ++ ++#ifndef TEST_NAME ++# define TEST_NAME "tst-audit24a" ++#endif ++#ifndef TEST_MOD ++# define TEST_MOD TEST_NAME ++#endif ++#ifndef TEST_FUNC ++# define TEST_FUNC "tst_audit24a" ++#endif ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *p = strrchr (map->l_name, '/'); ++ const char *l_name = p == NULL ? TEST_NAME : p + 1; ++ ++ uintptr_t ck = -1; ++ if (strcmp (l_name, TEST_MOD "mod1.so") == 0) ++ ck = AUDIT24MOD1_COOKIE; ++ else if (strcmp (l_name, TEST_MOD "mod2.so") == 0) ++ ck = AUDIT24MOD2_COOKIE; ++ else if (strcmp (l_name, TEST_NAME) == 0) ++ ck = AUDIT24_COOKIE; ++ ++ *cookie = ck; ++ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO; ++} ++ ++static int ++tst_func1 (void) ++{ ++ return 1; ++} ++ ++static int ++tst_func2 (void) ++{ ++ return 10; ++} ++ ++#if __ELF_NATIVE_CLASS == 64 ++uintptr_t ++la_symbind64 (Elf64_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#else ++uintptr_t ++la_symbind32 (Elf32_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#endif ++{ ++ if (*refcook == AUDIT24_COOKIE) ++ { ++ if (*defcook == AUDIT24MOD1_COOKIE) ++ { ++ /* Check if bind-now symbols are advertised to not call the PLT ++ hooks. */ ++ test_symbind_flags (*flags); ++ ++ if (strcmp (symname, TEST_FUNC "mod1_func1") == 0) ++ return (uintptr_t) tst_func1; ++ else if (strcmp (symname, TEST_FUNC "mod1_func2") == 0) ++ return sym->st_value; ++ abort (); ++ } ++ if (*defcook == AUDIT24MOD2_COOKIE ++ && (strcmp (symname, TEST_FUNC "mod2_func1") == 0)) ++ { ++ test_symbind_flags (*flags); ++ ++ return (uintptr_t) tst_func2; ++ } ++ ++ /* malloc functions. */ ++ return sym->st_value; ++ } ++ ++ abort (); ++} +diff --git a/elf/tst-auditmod24b.c b/elf/tst-auditmod24b.c +new file mode 100644 +index 0000000000000000..e98f6d5ec528fe03 +--- /dev/null ++++ b/elf/tst-auditmod24b.c +@@ -0,0 +1,104 @@ ++/* Audit modules for tst-audit24b. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define TEST_NAME "tst-audit24b" ++#define TEST_FUNC "tst_audit24b" ++ ++#define AUDIT24_COOKIE 0x1 ++#define AUDIT24MOD1_COOKIE 0x2 ++#define AUDIT24MOD2_COOKIE 0x3 ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *p = strrchr (map->l_name, '/'); ++ const char *l_name = p == NULL ? TEST_NAME : p + 1; ++ ++ uintptr_t ck = -1; ++ if (strcmp (l_name, TEST_NAME "mod1.so") == 0) ++ ck = AUDIT24MOD1_COOKIE; ++ else if (strcmp (l_name, TEST_NAME "mod2.so") == 0) ++ ck = AUDIT24MOD2_COOKIE; ++ else if (strcmp (l_name, TEST_NAME) == 0) ++ ck = AUDIT24_COOKIE; ++ ++ *cookie = ck; ++ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO; ++} ++ ++static int ++tst_func1 (void) ++{ ++ return 1; ++} ++ ++static int ++tst_func2 (void) ++{ ++ return 2; ++} ++ ++#if __ELF_NATIVE_CLASS == 64 ++uintptr_t ++la_symbind64 (Elf64_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#else ++uintptr_t ++la_symbind32 (Elf32_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#endif ++{ ++ if (*refcook == AUDIT24_COOKIE) ++ { ++ if (*defcook == AUDIT24MOD1_COOKIE) ++ { ++ if (strcmp (symname, TEST_FUNC "mod1_func1") == 0) ++ return (uintptr_t) tst_func1; ++ else if (strcmp (symname, TEST_FUNC "mod1_func2") == 0) ++ return sym->st_value; ++ abort (); ++ } ++ /* malloc functions. */ ++ return sym->st_value; ++ } ++ else if (*refcook == AUDIT24MOD1_COOKIE) ++ { ++ if (*defcook == AUDIT24MOD2_COOKIE ++ && (strcmp (symname, TEST_FUNC "mod2_func1") == 0)) ++ { ++ test_symbind_flags (*flags); ++ return (uintptr_t) tst_func2; ++ } ++ } ++ ++ abort (); ++} +diff --git a/elf/tst-auditmod24c.c b/elf/tst-auditmod24c.c +new file mode 100644 +index 0000000000000000..67e62c9d332f48a7 +--- /dev/null ++++ b/elf/tst-auditmod24c.c +@@ -0,0 +1,3 @@ ++#define TEST_NAME "tst-audit24c" ++#define TEST_MOD "tst-audit24a" ++#include "tst-auditmod24a.c" +diff --git a/elf/tst-auditmod24d.c b/elf/tst-auditmod24d.c +new file mode 100644 +index 0000000000000000..8c803ecc0a48f21b +--- /dev/null ++++ b/elf/tst-auditmod24d.c +@@ -0,0 +1,120 @@ ++/* Audit module for tst-audit24d. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define AUDIT24_COOKIE 0x0 ++#define AUDIT24MOD1_COOKIE 0x1 ++#define AUDIT24MOD2_COOKIE 0x2 ++#define AUDIT24MOD3_COOKIE 0x3 ++#define AUDIT24MOD4_COOKIE 0x4 ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *p = strrchr (map->l_name, '/'); ++ const char *l_name = p == NULL ? "tst-audit24d" : p + 1; ++ ++ uintptr_t ck = -1; ++ if (strcmp (l_name, "tst-audit24dmod1.so") == 0) ++ ck = AUDIT24MOD1_COOKIE; ++ else if (strcmp (l_name, "tst-audit24dmod2.so") == 0) ++ ck = AUDIT24MOD2_COOKIE; ++ else if (strcmp (l_name, "tst-audit24dmod3.so") == 0) ++ ck = AUDIT24MOD3_COOKIE; ++ else if (strcmp (l_name, "tst-audit24dmod.so") == 0) ++ ck = AUDIT24MOD4_COOKIE; ++ else if (strcmp (l_name, "tst-audit24d") == 0) ++ ck = AUDIT24_COOKIE; ++ ++ *cookie = ck; ++ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO; ++} ++ ++static int ++tst_audit24dmod1_func1 (void) ++{ ++ return 1; ++} ++ ++static int ++tst_audit24dmod2_func1 (void) ++{ ++ return 10; ++} ++ ++static int ++tst_audit24dmod3_func1 (void) ++{ ++ return 30; ++} ++ ++#include ++ ++#if __ELF_NATIVE_CLASS == 64 ++uintptr_t ++la_symbind64 (Elf64_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#else ++uintptr_t ++la_symbind32 (Elf32_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#endif ++{ ++ if (*refcook == AUDIT24_COOKIE) ++ { ++ if (*defcook == AUDIT24MOD1_COOKIE) ++ { ++ if (strcmp (symname, "tst_audit24dmod1_func1") == 0) ++ return (uintptr_t) tst_audit24dmod1_func1; ++ else if (strcmp (symname, "tst_audit24dmod1_func2") == 0) ++ return sym->st_value; ++ abort (); ++ } ++ if (*defcook == AUDIT24MOD2_COOKIE ++ && (strcmp (symname, "tst_audit24dmod2_func1") == 0)) ++ return (uintptr_t) tst_audit24dmod2_func1; ++ ++ /* malloc functions. */ ++ return sym->st_value; ++ } ++ else if (*refcook == AUDIT24MOD1_COOKIE) ++ { ++ if (*defcook == AUDIT24MOD3_COOKIE ++ && strcmp (symname, "tst_audit24dmod3_func1") == 0) ++ { ++ test_symbind_flags (*flags); ++ ++ return (uintptr_t) tst_audit24dmod3_func1; ++ } ++ } ++ ++ abort (); ++} +diff --git a/elf/tst-auditmod25.c b/elf/tst-auditmod25.c +new file mode 100644 +index 0000000000000000..526f5c54bc2c3b8c +--- /dev/null ++++ b/elf/tst-auditmod25.c +@@ -0,0 +1,79 @@ ++/* Audit modules for tst-audit25a. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define AUDIT25_COOKIE 0x1 ++#define AUDIT25MOD1_COOKIE 0x2 ++#define AUDIT25MOD2_COOKIE 0x3 ++#define AUDIT25MOD3_COOKIE 0x2 ++#define AUDIT25MOD4_COOKIE 0x3 ++ ++#define TEST_NAME "tst-audit25" ++#define TEST_MOD "tst-audit25" ++#define TEST_FUNC "tst_audit25" ++ ++unsigned int ++la_version (unsigned int version) ++{ ++ return LAV_CURRENT; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *p = strrchr (map->l_name, '/'); ++ const char *l_name = p == NULL ? TEST_NAME : p + 1; ++ ++ uintptr_t ck = -1; ++ if (strcmp (l_name, TEST_MOD "mod1.so") == 0) ++ ck = AUDIT25MOD1_COOKIE; ++ else if (strcmp (l_name, TEST_MOD "mod2.so") == 0) ++ ck = AUDIT25MOD2_COOKIE; ++ else if (strcmp (l_name, TEST_MOD "mod3.so") == 0) ++ ck = AUDIT25MOD3_COOKIE; ++ else if (strcmp (l_name, TEST_MOD "mod4.so") == 0) ++ ck = AUDIT25MOD4_COOKIE; ++ else if (strncmp (l_name, TEST_NAME, strlen (TEST_NAME)) == 0) ++ ck = AUDIT25_COOKIE; ++ ++ *cookie = ck; ++ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO; ++} ++ ++#if __ELF_NATIVE_CLASS == 64 ++uintptr_t ++la_symbind64 (Elf64_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#else ++uintptr_t ++la_symbind32 (Elf32_Sym *sym, unsigned int ndx, ++ uintptr_t *refcook, uintptr_t *defcook, ++ unsigned int *flags, const char *symname) ++#endif ++{ ++ if (*refcook != -1 && *defcook != -1) ++ fprintf (stderr, "la_symbind: %s %u\n", symname, ++ *flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) ? 1 : 0); ++ return sym->st_value; ++} +diff --git a/sysdeps/generic/dl-lookupcfg.h b/sysdeps/generic/dl-lookupcfg.h +index c038c31ce6550059..a15fd32771d42f2a 100644 +--- a/sysdeps/generic/dl-lookupcfg.h ++++ b/sysdeps/generic/dl-lookupcfg.h +@@ -26,3 +26,6 @@ + #define DL_FIXUP_VALUE_CODE_ADDR(value) (value) + #define DL_FIXUP_VALUE_ADDR(value) (value) + #define DL_FIXUP_ADDR_VALUE(addr) (addr) ++#define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr) ++#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ ++ (*value) = st_value; +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index a56060d0204cc453..a38de94bf7ea8e93 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1403,7 +1403,10 @@ void _dl_audit_objclose (struct link_map *l) + /* Call the la_preinit from the audit modules for the link_map L. */ + void _dl_audit_preinit (struct link_map *l); + +-/* Call the la_symbind{32,64} from the audit modules for the link_map L. */ ++/* Call the la_symbind{32,64} from the audit modules for the link_map L. If ++ RELOC_RESULT is NULL it assumes the symbol to be bind-now and will set ++ the flags with LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT prior calling ++ la_symbind{32,64}. */ + void _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + const ElfW(Sym) *defsym, DL_FIXUP_VALUE_TYPE *value, + lookup_t result) +diff --git a/sysdeps/hppa/dl-lookupcfg.h b/sysdeps/hppa/dl-lookupcfg.h +index 2f6991aa16e87a00..f4f00714fa158e18 100644 +--- a/sysdeps/hppa/dl-lookupcfg.h ++++ b/sysdeps/hppa/dl-lookupcfg.h +@@ -81,3 +81,6 @@ void attribute_hidden _dl_unmap (struct link_map *map); + #define DL_FIXUP_VALUE_CODE_ADDR(value) ((value).ip) + #define DL_FIXUP_VALUE_ADDR(value) ((uintptr_t) &(value)) + #define DL_FIXUP_ADDR_VALUE(addr) (*(struct fdesc *) (addr)) ++#define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr) ++#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ ++ (*value) = *(struct fdesc *) (st_value) +diff --git a/sysdeps/ia64/dl-lookupcfg.h b/sysdeps/ia64/dl-lookupcfg.h +index 58ca32424b08aaf4..2b8b2fa5db9d7093 100644 +--- a/sysdeps/ia64/dl-lookupcfg.h ++++ b/sysdeps/ia64/dl-lookupcfg.h +@@ -74,3 +74,6 @@ extern void attribute_hidden _dl_unmap (struct link_map *map); + + #define DL_FIXUP_VALUE_ADDR(value) ((uintptr_t) &(value)) + #define DL_FIXUP_ADDR_VALUE(addr) (*(struct fdesc *) (addr)) ++#define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr) ++#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ ++ (*value) = *(struct fdesc *) (st_value) +diff --git a/sysdeps/powerpc/dl-lookupcfg.h b/sysdeps/powerpc/dl-lookupcfg.h +new file mode 100644 +index 0000000000000000..25abcc1d12b15bfc +--- /dev/null ++++ b/sysdeps/powerpc/dl-lookupcfg.h +@@ -0,0 +1,39 @@ ++/* Configuration of lookup functions. PowerPC version. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define DL_FIXUP_VALUE_TYPE ElfW(Addr) ++#define DL_FIXUP_MAKE_VALUE(map, addr) (addr) ++#define DL_FIXUP_VALUE_CODE_ADDR(value) (value) ++#define DL_FIXUP_VALUE_ADDR(value) (value) ++#define DL_FIXUP_ADDR_VALUE(addr) (addr) ++#if __WORDSIZE == 64 && _CALL_ELF == 1 ++/* We need to correctly set the audit modules value for bind-now. */ ++# define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) \ ++ (((Elf64_FuncDesc *)(addr))->fd_func) ++# define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ ++ ({ \ ++ Elf64_FuncDesc *opd = (Elf64_FuncDesc *) (value); \ ++ opd->fd_func = (st_value); \ ++ if ((new_value) != (uintptr_t) (st_value)) \ ++ opd->fd_toc = ((Elf64_FuncDesc *)(new_value))->fd_toc; \ ++ }) ++#else ++# define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr) ++# define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ ++ (*value) = st_value; ++#endif diff --git a/glibc-upstream-2.34-157.patch b/glibc-upstream-2.34-157.patch new file mode 100644 index 0000000..0f5fd3c --- /dev/null +++ b/glibc-upstream-2.34-157.patch @@ -0,0 +1,1042 @@ +commit b118bce87a7c581cb2d6d7698eb582e45aed3481 +Author: Ben Woodard +Date: Mon Jan 24 10:46:18 2022 -0300 + + elf: Fix runtime linker auditing on aarch64 (BZ #26643) + + The rtld audit support show two problems on aarch64: + + 1. _dl_runtime_resolve does not preserve x8, the indirect result + location register, which might generate wrong result calls + depending of the function signature. + + 2. The NEON Q registers pushed onto the stack by _dl_runtime_resolve + were twice the size of D registers extracted from the stack frame by + _dl_runtime_profile. + + While 2. might result in wrong information passed on the PLT tracing, + 1. generates wrong runtime behaviour. + + The aarch64 rtld audit support is changed to: + + * Both La_aarch64_regs and La_aarch64_retval are expanded to include + both x8 and the full sized NEON V registers, as defined by the + ABI. + + * dl_runtime_profile needed to extract registers saved by + _dl_runtime_resolve and put them into the new correctly sized + La_aarch64_regs structure. + + * The LAV_CURRENT check is change to only accept new audit modules + to avoid the undefined behavior of not save/restore x8. + + * Different than other architectures, audit modules older than + LAV_CURRENT are rejected (both La_aarch64_regs and La_aarch64_retval + changed their layout and there are no requirements to support multiple + audit interface with the inherent aarch64 issues). + + * A new field is also reserved on both La_aarch64_regs and + La_aarch64_retval to support variant pcs symbols. + + Similar to x86, a new La_aarch64_vector type to represent the NEON + register is added on the La_aarch64_regs (so each type can be accessed + directly). + + Since LAV_CURRENT was already bumped to support bind-now, there is + no need to increase it again. + + Checked on aarch64-linux-gnu. + + Co-authored-by: Adhemerval Zanella + Reviewed-by: Szabolcs Nagy + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + (cherry picked from commit ce9a68c57c260c8417afc93972849ac9ad243ec4) + + Resolved conflicts: + NEWS + elf/rtld.c + +diff --git a/elf/rtld.c b/elf/rtld.c +index 26c6fb6479c9008c..434fbeddd5cce74d 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -51,6 +51,7 @@ + #include + #include + #include ++#include + + #include + +@@ -991,7 +992,7 @@ file=%s [%lu]; audit interface function la_version returned zero; ignored.\n", + return; + } + +- if (lav > LAV_CURRENT) ++ if (!_dl_audit_check_version (lav)) + { + _dl_debug_printf ("\ + ERROR: audit interface '%s' requires version %d (maximum supported version %d); ignored.\n", +diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile +index 7c66fb97aa065f99..7183895d04ea8d42 100644 +--- a/sysdeps/aarch64/Makefile ++++ b/sysdeps/aarch64/Makefile +@@ -10,6 +10,26 @@ endif + + ifeq ($(subdir),elf) + sysdep-dl-routines += dl-bti ++ ++tests += tst-audit26 \ ++ tst-audit27 ++ ++modules-names += \ ++ tst-audit26mod \ ++ tst-auditmod26 \ ++ tst-audit27mod \ ++ tst-auditmod27 ++ ++$(objpfx)tst-audit26: $(objpfx)tst-audit26mod.so \ ++ $(objpfx)tst-auditmod26.so ++LDFLAGS-tst-audit26 += -Wl,-z,lazy ++tst-audit26-ENV = LD_AUDIT=$(objpfx)tst-auditmod26.so ++ ++$(objpfx)tst-audit27: $(objpfx)tst-audit27mod.so \ ++ $(objpfx)tst-auditmod27.so ++$(objpfx)tst-audit27mod.so: $(libsupport) ++LDFLAGS-tst-audit27 += -Wl,-z,lazy ++tst-audit27-ENV = LD_AUDIT=$(objpfx)tst-auditmod27.so + endif + + ifeq ($(subdir),elf) +diff --git a/sysdeps/aarch64/bits/link.h b/sysdeps/aarch64/bits/link.h +index 774bbe5f4544f559..c64726947c9addea 100644 +--- a/sysdeps/aarch64/bits/link.h ++++ b/sysdeps/aarch64/bits/link.h +@@ -20,23 +20,31 @@ + # error "Never include directly; use instead." + #endif + ++typedef union ++{ ++ float s; ++ double d; ++ long double q; ++} La_aarch64_vector; ++ + /* Registers for entry into PLT on AArch64. */ + typedef struct La_aarch64_regs + { +- uint64_t lr_xreg[8]; +- uint64_t lr_dreg[8]; +- uint64_t lr_sp; +- uint64_t lr_lr; ++ uint64_t lr_xreg[9]; ++ La_aarch64_vector lr_vreg[8]; ++ uint64_t lr_sp; ++ uint64_t lr_lr; ++ void *lr_vpcs; + } La_aarch64_regs; + + /* Return values for calls from PLT on AArch64. */ + typedef struct La_aarch64_retval + { +- /* Up to two integer registers can be used for a return value. */ +- uint64_t lrv_xreg[2]; +- /* Up to four D registers can be used for a return value. */ +- uint64_t lrv_dreg[4]; +- ++ /* Up to eight integer registers can be used for a return value. */ ++ uint64_t lrv_xreg[8]; ++ /* Up to eight V registers can be used for a return value. */ ++ La_aarch64_vector lrv_vreg[8]; ++ void *lrv_vpcs; + } La_aarch64_retval; + __BEGIN_DECLS + +diff --git a/sysdeps/aarch64/dl-audit-check.h b/sysdeps/aarch64/dl-audit-check.h +new file mode 100644 +index 0000000000000000..e324339a1d4abec3 +--- /dev/null ++++ b/sysdeps/aarch64/dl-audit-check.h +@@ -0,0 +1,28 @@ ++/* rtld-audit version check. AArch64 version. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++static inline bool ++_dl_audit_check_version (unsigned int lav) ++{ ++ /* Audit version 1 do not save x8 or NEON registers, which required ++ changing La_aarch64_regs and La_aarch64_retval layout (BZ#26643). The ++ missing indirect result save/restore makes _dl_runtime_profile ++ potentially trigger undefined behavior if the function returns a large ++ struct (even when PLT trace is not requested). */ ++ return lav == LAV_CURRENT; ++} +diff --git a/sysdeps/aarch64/dl-link.sym b/sysdeps/aarch64/dl-link.sym +index d67d28b40ce7d4ff..cb4dcdcbed0db492 100644 +--- a/sysdeps/aarch64/dl-link.sym ++++ b/sysdeps/aarch64/dl-link.sym +@@ -7,9 +7,11 @@ DL_SIZEOF_RG sizeof(struct La_aarch64_regs) + DL_SIZEOF_RV sizeof(struct La_aarch64_retval) + + DL_OFFSET_RG_X0 offsetof(struct La_aarch64_regs, lr_xreg) +-DL_OFFSET_RG_D0 offsetof(struct La_aarch64_regs, lr_dreg) ++DL_OFFSET_RG_V0 offsetof(struct La_aarch64_regs, lr_vreg) + DL_OFFSET_RG_SP offsetof(struct La_aarch64_regs, lr_sp) + DL_OFFSET_RG_LR offsetof(struct La_aarch64_regs, lr_lr) ++DL_OFFSET_RG_VPCS offsetof(struct La_aarch64_regs, lr_vpcs) + + DL_OFFSET_RV_X0 offsetof(struct La_aarch64_retval, lrv_xreg) +-DL_OFFSET_RV_D0 offsetof(struct La_aarch64_retval, lrv_dreg) ++DL_OFFSET_RV_V0 offsetof(struct La_aarch64_retval, lrv_vreg) ++DL_OFFSET_RV_VPCS offsetof(struct La_aarch64_retval, lrv_vpcs) +diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S +index 9b352b1d0f7d62e7..457570e7df5148c0 100644 +--- a/sysdeps/aarch64/dl-trampoline.S ++++ b/sysdeps/aarch64/dl-trampoline.S +@@ -45,7 +45,8 @@ _dl_runtime_resolve: + + cfi_rel_offset (lr, 8) + +- /* Save arguments. */ ++ /* Note: Saving x9 is not required by the ABI but the assembler requires ++ the immediate values of operand 3 to be a multiple of 16 */ + stp x8, x9, [sp, #-(80+8*16)]! + cfi_adjust_cfa_offset (80+8*16) + cfi_rel_offset (x8, 0) +@@ -142,7 +143,7 @@ _dl_runtime_profile: + Stack frame layout: + [sp, #...] lr + [sp, #...] &PLTGOT[n] +- [sp, #96] La_aarch64_regs ++ [sp, #256] La_aarch64_regs + [sp, #48] La_aarch64_retval + [sp, #40] frame size return from pltenter + [sp, #32] dl_profile_call saved x1 +@@ -183,19 +184,25 @@ _dl_runtime_profile: + stp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] + cfi_rel_offset (x6, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 0) + cfi_rel_offset (x7, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 8) +- +- stp d0, d1, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] +- cfi_rel_offset (d0, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0) +- cfi_rel_offset (d1, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0 + 8) +- stp d2, d3, [X29, #OFFSET_RG+ DL_OFFSET_RG_D0 + 16*1] +- cfi_rel_offset (d2, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 0) +- cfi_rel_offset (d3, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 8) +- stp d4, d5, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] +- cfi_rel_offset (d4, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 0) +- cfi_rel_offset (d5, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 8) +- stp d6, d7, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] +- cfi_rel_offset (d6, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 0) +- cfi_rel_offset (d7, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 8) ++ str x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4 + 0] ++ cfi_rel_offset (x8, OFFSET_RG + DL_OFFSET_RG_X0 + 16*4 + 0) ++ /* Note 8 bytes of padding is in the stack frame for alignment */ ++ ++ stp q0, q1, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0] ++ cfi_rel_offset (q0, OFFSET_RG + DL_OFFSET_RG_V0 + 32*0) ++ cfi_rel_offset (q1, OFFSET_RG + DL_OFFSET_RG_V0 + 32*0 + 16) ++ stp q2, q3, [X29, #OFFSET_RG+ DL_OFFSET_RG_V0 + 32*1] ++ cfi_rel_offset (q2, OFFSET_RG + DL_OFFSET_RG_V0 + 32*1 + 0) ++ cfi_rel_offset (q3, OFFSET_RG + DL_OFFSET_RG_V0 + 32*1 + 16) ++ stp q4, q5, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2] ++ cfi_rel_offset (q4, OFFSET_RG + DL_OFFSET_RG_V0 + 32*2 + 0) ++ cfi_rel_offset (q5, OFFSET_RG + DL_OFFSET_RG_V0 + 32*2 + 16) ++ stp q6, q7, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3] ++ cfi_rel_offset (q6, OFFSET_RG + DL_OFFSET_RG_V0 + 32*3 + 0) ++ cfi_rel_offset (q7, OFFSET_RG + DL_OFFSET_RG_V0 + 32*3 + 16) ++ ++ /* No APCS extension supported. */ ++ str xzr, [X29, #OFFSET_RG + DL_OFFSET_RG_VPCS] + + add x0, x29, #SF_SIZE + 16 + ldr x1, [x29, #OFFSET_LR] +@@ -234,10 +241,11 @@ _dl_runtime_profile: + ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] + ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] + ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] +- ldp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] +- ldp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1] +- ldp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] +- ldp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] ++ ldr x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4] ++ ldp q0, q1, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0] ++ ldp q2, q3, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*1] ++ ldp q4, q5, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2] ++ ldp q6, q7, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3] + + cfi_def_cfa_register (sp) + ldp x29, x30, [x29, #0] +@@ -280,14 +288,22 @@ _dl_runtime_profile: + ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1] + ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2] + ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3] +- ldp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0] +- ldp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1] +- ldp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2] +- ldp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3] ++ ldr x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4] ++ ldp q0, q1, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0] ++ ldp q2, q3, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*1] ++ ldp q4, q5, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2] ++ ldp q6, q7, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3] + blr ip0 +- stp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0] +- stp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0] +- stp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1] ++ stp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*0] ++ stp x2, x3, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*1] ++ stp x4, x5, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*2] ++ stp x6, x7, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*3] ++ str x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4] ++ stp q0, q1, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*0] ++ stp q2, q3, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*1] ++ stp q4, q5, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*2] ++ stp q6, q7, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*3] ++ str xzr, [X29, #OFFSET_RV + DL_OFFSET_RG_VPCS] + + /* Setup call to pltexit */ + ldp x0, x1, [x29, #OFFSET_SAVED_CALL_X0] +@@ -295,9 +311,16 @@ _dl_runtime_profile: + add x3, x29, #OFFSET_RV + bl _dl_audit_pltexit + +- ldp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0] +- ldp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0] +- ldp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1] ++ ldp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*0] ++ ldp x2, x3, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*1] ++ ldp x4, x5, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*2] ++ ldp x6, x7, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*3] ++ ldr x8, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*4] ++ ldp q0, q1, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*0] ++ ldp q2, q3, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*1] ++ ldp q4, q5, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*2] ++ ldp q6, q7, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*3] ++ + /* LR from within La_aarch64_reg */ + ldr lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR] + cfi_restore(lr) +diff --git a/sysdeps/aarch64/tst-audit26.c b/sysdeps/aarch64/tst-audit26.c +new file mode 100644 +index 0000000000000000..46de8acd219cb8bc +--- /dev/null ++++ b/sysdeps/aarch64/tst-audit26.c +@@ -0,0 +1,37 @@ ++/* Check LD_AUDIT for aarch64 ABI specifics. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include "tst-audit26mod.h" ++ ++int ++do_test (void) ++{ ++ /* Returning a large struct uses 'x8' as indirect result location. */ ++ struct large_struct r = tst_audit26_func (ARG1, ARG2, ARG3); ++ ++ struct large_struct e = set_large_struct (ARG1, ARG2, ARG3); ++ ++ TEST_COMPARE_BLOB (r.a, sizeof (r.a), e.a, sizeof (e.a)); ++ ++ return 0; ++} ++ ++#include +diff --git a/sysdeps/aarch64/tst-audit26mod.c b/sysdeps/aarch64/tst-audit26mod.c +new file mode 100644 +index 0000000000000000..67d5ffce7288b34c +--- /dev/null ++++ b/sysdeps/aarch64/tst-audit26mod.c +@@ -0,0 +1,33 @@ ++/* Check LD_AUDIT for aarch64 ABI specifics. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include "tst-audit26mod.h" ++ ++struct large_struct ++tst_audit26_func (char a, short b, long int c) ++{ ++ if (a != ARG1) ++ abort (); ++ if (b != ARG2) ++ abort (); ++ if (c != ARG3) ++ abort (); ++ ++ return set_large_struct (a, b, c); ++} +diff --git a/sysdeps/aarch64/tst-audit26mod.h b/sysdeps/aarch64/tst-audit26mod.h +new file mode 100644 +index 0000000000000000..f80409f96bae6c82 +--- /dev/null ++++ b/sysdeps/aarch64/tst-audit26mod.h +@@ -0,0 +1,50 @@ ++/* Check LD_AUDIT for aarch64 specific ABI. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _TST_AUDIT27MOD_H ++#define _TST_AUDIT27MOD_H 1 ++ ++#include ++ ++struct large_struct ++{ ++ char a[16]; ++ short b[8]; ++ long int c[4]; ++}; ++ ++static inline struct large_struct ++set_large_struct (char a, short b, long int c) ++{ ++ struct large_struct r; ++ for (int i = 0; i < array_length (r.a); i++) ++ r.a[i] = a; ++ for (int i = 0; i < array_length (r.b); i++) ++ r.b[i] = b; ++ for (int i = 0; i < array_length (r.c); i++) ++ r.c[i] = c; ++ return r; ++} ++ ++#define ARG1 0x12 ++#define ARG2 0x1234 ++#define ARG3 0x12345678 ++ ++struct large_struct tst_audit26_func (char a, short b, long int c); ++ ++#endif +diff --git a/sysdeps/aarch64/tst-audit27.c b/sysdeps/aarch64/tst-audit27.c +new file mode 100644 +index 0000000000000000..5ebc09771f845af0 +--- /dev/null ++++ b/sysdeps/aarch64/tst-audit27.c +@@ -0,0 +1,64 @@ ++/* Check LD_AUDIT for aarch64 ABI specifics. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include "tst-audit27mod.h" ++ ++int ++do_test (void) ++{ ++ { ++ float r = tst_audit27_func_float (FUNC_FLOAT_ARG0, FUNC_FLOAT_ARG1, ++ FUNC_FLOAT_ARG2, FUNC_FLOAT_ARG3, ++ FUNC_FLOAT_ARG4, FUNC_FLOAT_ARG5, ++ FUNC_FLOAT_ARG6, FUNC_FLOAT_ARG7); ++ if (r != FUNC_FLOAT_RET) ++ FAIL_EXIT1 ("tst_audit27_func_float() returned %a, expected %a", ++ r, FUNC_FLOAT_RET); ++ } ++ ++ { ++ double r = tst_audit27_func_double (FUNC_DOUBLE_ARG0, FUNC_DOUBLE_ARG1, ++ FUNC_DOUBLE_ARG2, FUNC_DOUBLE_ARG3, ++ FUNC_DOUBLE_ARG4, FUNC_DOUBLE_ARG5, ++ FUNC_DOUBLE_ARG6, FUNC_DOUBLE_ARG7); ++ if (r != FUNC_DOUBLE_RET) ++ FAIL_EXIT1 ("tst_audit27_func_double() returned %la, expected %la", ++ r, FUNC_DOUBLE_RET); ++ } ++ ++ { ++ long double r = tst_audit27_func_ldouble (FUNC_LDOUBLE_ARG0, ++ FUNC_LDOUBLE_ARG1, ++ FUNC_LDOUBLE_ARG2, ++ FUNC_LDOUBLE_ARG3, ++ FUNC_LDOUBLE_ARG4, ++ FUNC_LDOUBLE_ARG5, ++ FUNC_LDOUBLE_ARG6, ++ FUNC_LDOUBLE_ARG7); ++ if (r != FUNC_LDOUBLE_RET) ++ FAIL_EXIT1 ("tst_audit27_func_ldouble() returned %La, expected %La", ++ r, FUNC_LDOUBLE_RET); ++ } ++ ++ return 0; ++} ++ ++#include +diff --git a/sysdeps/aarch64/tst-audit27mod.c b/sysdeps/aarch64/tst-audit27mod.c +new file mode 100644 +index 0000000000000000..922b518f0af4b97b +--- /dev/null ++++ b/sysdeps/aarch64/tst-audit27mod.c +@@ -0,0 +1,95 @@ ++/* Check LD_AUDIT for aarch64 ABI specifics. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include "tst-audit27mod.h" ++ ++float ++tst_audit27_func_float (float a0, float a1, float a2, float a3, float a4, ++ float a5, float a6, float a7) ++{ ++ if (a0 != FUNC_FLOAT_ARG0) ++ FAIL_EXIT1 ("a0: %a != %a", a0, FUNC_FLOAT_ARG0); ++ if (a1 != FUNC_FLOAT_ARG1) ++ FAIL_EXIT1 ("a1: %a != %a", a1, FUNC_FLOAT_ARG1); ++ if (a2 != FUNC_FLOAT_ARG2) ++ FAIL_EXIT1 ("a2: %a != %a", a2, FUNC_FLOAT_ARG2); ++ if (a3 != FUNC_FLOAT_ARG3) ++ FAIL_EXIT1 ("a3: %a != %a", a3, FUNC_FLOAT_ARG3); ++ if (a4 != FUNC_FLOAT_ARG4) ++ FAIL_EXIT1 ("a4: %a != %a", a4, FUNC_FLOAT_ARG4); ++ if (a5 != FUNC_FLOAT_ARG5) ++ FAIL_EXIT1 ("a5: %a != %a", a5, FUNC_FLOAT_ARG5); ++ if (a6 != FUNC_FLOAT_ARG6) ++ FAIL_EXIT1 ("a6: %a != %a", a6, FUNC_FLOAT_ARG6); ++ if (a7 != FUNC_FLOAT_ARG7) ++ FAIL_EXIT1 ("a7: %a != %a", a7, FUNC_FLOAT_ARG7); ++ ++ return FUNC_FLOAT_RET; ++} ++ ++double ++tst_audit27_func_double (double a0, double a1, double a2, double a3, double a4, ++ double a5, double a6, double a7) ++{ ++ if (a0 != FUNC_DOUBLE_ARG0) ++ FAIL_EXIT1 ("a0: %la != %la", a0, FUNC_DOUBLE_ARG0); ++ if (a1 != FUNC_DOUBLE_ARG1) ++ FAIL_EXIT1 ("a1: %la != %la", a1, FUNC_DOUBLE_ARG1); ++ if (a2 != FUNC_DOUBLE_ARG2) ++ FAIL_EXIT1 ("a2: %la != %la", a2, FUNC_DOUBLE_ARG2); ++ if (a3 != FUNC_DOUBLE_ARG3) ++ FAIL_EXIT1 ("a3: %la != %la", a3, FUNC_DOUBLE_ARG3); ++ if (a4 != FUNC_DOUBLE_ARG4) ++ FAIL_EXIT1 ("a4: %la != %la", a4, FUNC_DOUBLE_ARG4); ++ if (a5 != FUNC_DOUBLE_ARG5) ++ FAIL_EXIT1 ("a5: %la != %la", a5, FUNC_DOUBLE_ARG5); ++ if (a6 != FUNC_DOUBLE_ARG6) ++ FAIL_EXIT1 ("a6: %la != %la", a6, FUNC_DOUBLE_ARG6); ++ if (a7 != FUNC_DOUBLE_ARG7) ++ FAIL_EXIT1 ("a7: %la != %la", a7, FUNC_DOUBLE_ARG7); ++ ++ return FUNC_DOUBLE_RET; ++} ++ ++long double ++tst_audit27_func_ldouble (long double a0, long double a1, long double a2, ++ long double a3, long double a4, long double a5, ++ long double a6, long double a7) ++{ ++ if (a0 != FUNC_LDOUBLE_ARG0) ++ FAIL_EXIT1 ("a0: %La != %La", a0, FUNC_LDOUBLE_ARG0); ++ if (a1 != FUNC_LDOUBLE_ARG1) ++ FAIL_EXIT1 ("a1: %La != %La", a1, FUNC_LDOUBLE_ARG1); ++ if (a2 != FUNC_LDOUBLE_ARG2) ++ FAIL_EXIT1 ("a2: %La != %La", a2, FUNC_LDOUBLE_ARG2); ++ if (a3 != FUNC_LDOUBLE_ARG3) ++ FAIL_EXIT1 ("a3: %La != %La", a3, FUNC_LDOUBLE_ARG3); ++ if (a4 != FUNC_LDOUBLE_ARG4) ++ FAIL_EXIT1 ("a4: %La != %La", a4, FUNC_LDOUBLE_ARG4); ++ if (a5 != FUNC_LDOUBLE_ARG5) ++ FAIL_EXIT1 ("a5: %La != %La", a5, FUNC_LDOUBLE_ARG5); ++ if (a6 != FUNC_LDOUBLE_ARG6) ++ FAIL_EXIT1 ("a6: %La != %La", a6, FUNC_LDOUBLE_ARG6); ++ if (a7 != FUNC_LDOUBLE_ARG7) ++ FAIL_EXIT1 ("a7: %La != %La", a7, FUNC_LDOUBLE_ARG7); ++ ++ return FUNC_LDOUBLE_RET; ++} +diff --git a/sysdeps/aarch64/tst-audit27mod.h b/sysdeps/aarch64/tst-audit27mod.h +new file mode 100644 +index 0000000000000000..1709d222ca251e3b +--- /dev/null ++++ b/sysdeps/aarch64/tst-audit27mod.h +@@ -0,0 +1,67 @@ ++/* Check LD_AUDIT for aarch64 specific ABI. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _TST_AUDIT27MOD_H ++#define _TST_AUDIT27MOD_H 1 ++ ++#include ++ ++#define FUNC_FLOAT_ARG0 FLT_MIN ++#define FUNC_FLOAT_ARG1 FLT_MAX ++#define FUNC_FLOAT_ARG2 FLT_EPSILON ++#define FUNC_FLOAT_ARG3 FLT_TRUE_MIN ++#define FUNC_FLOAT_ARG4 0.0f ++#define FUNC_FLOAT_ARG5 1.0f ++#define FUNC_FLOAT_ARG6 2.0f ++#define FUNC_FLOAT_ARG7 3.0f ++#define FUNC_FLOAT_RET 4.0f ++ ++float ++tst_audit27_func_float (float a0, float a1, float a2, float a3, float a4, ++ float a5, float a6, float a7); ++ ++#define FUNC_DOUBLE_ARG0 DBL_MIN ++#define FUNC_DOUBLE_ARG1 DBL_MAX ++#define FUNC_DOUBLE_ARG2 DBL_EPSILON ++#define FUNC_DOUBLE_ARG3 DBL_TRUE_MIN ++#define FUNC_DOUBLE_ARG4 0.0 ++#define FUNC_DOUBLE_ARG5 1.0 ++#define FUNC_DOUBLE_ARG6 2.0 ++#define FUNC_DOUBLE_ARG7 3.0 ++#define FUNC_DOUBLE_RET 0x1.fffffe0000001p+127 ++ ++double ++tst_audit27_func_double (double a0, double a1, double a2, double a3, double a4, ++ double a5, double a6, double a7); ++ ++#define FUNC_LDOUBLE_ARG0 DBL_MAX + 1.0L ++#define FUNC_LDOUBLE_ARG1 DBL_MAX + 2.0L ++#define FUNC_LDOUBLE_ARG2 DBL_MAX + 3.0L ++#define FUNC_LDOUBLE_ARG3 DBL_MAX + 4.0L ++#define FUNC_LDOUBLE_ARG4 DBL_MAX + 5.0L ++#define FUNC_LDOUBLE_ARG5 DBL_MAX + 6.0L ++#define FUNC_LDOUBLE_ARG6 DBL_MAX + 7.0L ++#define FUNC_LDOUBLE_ARG7 DBL_MAX + 8.0L ++#define FUNC_LDOUBLE_RET 0x1.fffffffffffff000000000000001p+1023L ++ ++long double ++tst_audit27_func_ldouble (long double a0, long double a1, long double a2, ++ long double a3, long double a4, long double a5, ++ long double a6, long double a7); ++ ++#endif +diff --git a/sysdeps/aarch64/tst-auditmod26.c b/sysdeps/aarch64/tst-auditmod26.c +new file mode 100644 +index 0000000000000000..b03b6baed9aeb528 +--- /dev/null ++++ b/sysdeps/aarch64/tst-auditmod26.c +@@ -0,0 +1,103 @@ ++/* Check LD_AUDIT for aarch64 specific ABI. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "tst-audit26mod.h" ++ ++#define TEST_NAME "tst-audit26" ++ ++#define AUDIT26_COOKIE 0 ++ ++unsigned int ++la_version (unsigned int v) ++{ ++ return v; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *p = strrchr (map->l_name, '/'); ++ const char *l_name = p == NULL ? map->l_name : p + 1; ++ uintptr_t ck = -1; ++ if (strncmp (l_name, TEST_NAME, strlen (TEST_NAME)) == 0) ++ ck = AUDIT26_COOKIE; ++ *cookie = ck; ++ printf ("objopen: %ld, %s [cookie=%ld]\n", lmid, l_name, ck); ++ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO; ++} ++ ++ElfW(Addr) ++la_aarch64_gnu_pltenter (ElfW(Sym) *sym __attribute__ ((unused)), ++ unsigned int ndx __attribute__ ((unused)), ++ uintptr_t *refcook, uintptr_t *defcook, ++ La_aarch64_regs *regs, unsigned int *flags, ++ const char *symname, long int *framesizep) ++{ ++ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", ++ symname, (long int) sym->st_value, ndx, *flags); ++ ++ if (strcmp (symname, "tst_audit26_func") == 0) ++ { ++ assert (regs->lr_xreg[0] == ARG1); ++ assert (regs->lr_xreg[1] == ARG2); ++ assert (regs->lr_xreg[2] == ARG3); ++ } ++ else ++ abort (); ++ ++ assert (regs->lr_vpcs == 0); ++ ++ /* Clobber 'x8'. */ ++ asm volatile ("mov x8, -1" : : : "x8"); ++ ++ *framesizep = 1024; ++ ++ return sym->st_value; ++} ++ ++unsigned int ++la_aarch64_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, ++ const struct La_aarch64_regs *inregs, ++ struct La_aarch64_retval *outregs, const char *symname) ++{ ++ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u\n", ++ symname, (long int) sym->st_value, ndx); ++ ++ if (strcmp (symname, "tst_audit26_func") == 0) ++ { ++ assert (inregs->lr_xreg[0] == ARG1); ++ assert (inregs->lr_xreg[1] == ARG2); ++ assert (inregs->lr_xreg[2] == ARG3); ++ } ++ else ++ abort (); ++ ++ assert (inregs->lr_vpcs == 0); ++ assert (outregs->lrv_vpcs == 0); ++ ++ /* Clobber 'x8'. */ ++ asm volatile ("mov x8, -1" : : : "x8"); ++ ++ return 0; ++} +diff --git a/sysdeps/aarch64/tst-auditmod27.c b/sysdeps/aarch64/tst-auditmod27.c +new file mode 100644 +index 0000000000000000..21132c2985dab7b2 +--- /dev/null ++++ b/sysdeps/aarch64/tst-auditmod27.c +@@ -0,0 +1,180 @@ ++/* Check LD_AUDIT for aarch64 specific ABI. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "tst-audit27mod.h" ++ ++#define TEST_NAME "tst-audit27" ++ ++#define AUDIT27_COOKIE 0 ++ ++unsigned int ++la_version (unsigned int v) ++{ ++ return v; ++} ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ const char *p = strrchr (map->l_name, '/'); ++ const char *l_name = p == NULL ? map->l_name : p + 1; ++ uintptr_t ck = -1; ++ if (strncmp (l_name, TEST_NAME, strlen (TEST_NAME)) == 0) ++ ck = AUDIT27_COOKIE; ++ *cookie = ck; ++ printf ("objopen: %ld, %s [%ld]\n", lmid, l_name, ck); ++ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO; ++} ++ ++ElfW(Addr) ++la_aarch64_gnu_pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, La_aarch64_regs *regs, ++ unsigned int *flags, const char *symname, ++ long int *framesizep) ++{ ++ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n", ++ symname, (long int) sym->st_value, ndx, *flags); ++ ++ if (strcmp (symname, "tst_audit27_func_float") == 0) ++ { ++ assert (regs->lr_vreg[0].s == FUNC_FLOAT_ARG0); ++ assert (regs->lr_vreg[1].s == FUNC_FLOAT_ARG1); ++ assert (regs->lr_vreg[2].s == FUNC_FLOAT_ARG2); ++ assert (regs->lr_vreg[3].s == FUNC_FLOAT_ARG3); ++ assert (regs->lr_vreg[4].s == FUNC_FLOAT_ARG4); ++ assert (regs->lr_vreg[5].s == FUNC_FLOAT_ARG5); ++ assert (regs->lr_vreg[6].s == FUNC_FLOAT_ARG6); ++ assert (regs->lr_vreg[7].s == FUNC_FLOAT_ARG7); ++ } ++ else if (strcmp (symname, "tst_audit27_func_double") == 0) ++ { ++ assert (regs->lr_vreg[0].d == FUNC_DOUBLE_ARG0); ++ assert (regs->lr_vreg[1].d == FUNC_DOUBLE_ARG1); ++ assert (regs->lr_vreg[2].d == FUNC_DOUBLE_ARG2); ++ assert (regs->lr_vreg[3].d == FUNC_DOUBLE_ARG3); ++ assert (regs->lr_vreg[4].d == FUNC_DOUBLE_ARG4); ++ assert (regs->lr_vreg[5].d == FUNC_DOUBLE_ARG5); ++ assert (regs->lr_vreg[6].d == FUNC_DOUBLE_ARG6); ++ assert (regs->lr_vreg[7].d == FUNC_DOUBLE_ARG7); ++ } ++ else if (strcmp (symname, "tst_audit27_func_ldouble") == 0) ++ { ++ assert (regs->lr_vreg[0].q == FUNC_LDOUBLE_ARG0); ++ assert (regs->lr_vreg[1].q == FUNC_LDOUBLE_ARG1); ++ assert (regs->lr_vreg[2].q == FUNC_LDOUBLE_ARG2); ++ assert (regs->lr_vreg[3].q == FUNC_LDOUBLE_ARG3); ++ assert (regs->lr_vreg[4].q == FUNC_LDOUBLE_ARG4); ++ assert (regs->lr_vreg[5].q == FUNC_LDOUBLE_ARG5); ++ assert (regs->lr_vreg[6].q == FUNC_LDOUBLE_ARG6); ++ assert (regs->lr_vreg[7].q == FUNC_LDOUBLE_ARG7); ++ } ++ else ++ abort (); ++ ++ assert (regs->lr_vpcs == 0); ++ ++ /* Clobber the q registers on exit. */ ++ uint8_t v = 0xff; ++ asm volatile ("dup v0.8b, %w0" : : "r" (v) : "v0"); ++ asm volatile ("dup v1.8b, %w0" : : "r" (v) : "v1"); ++ asm volatile ("dup v2.8b, %w0" : : "r" (v) : "v2"); ++ asm volatile ("dup v3.8b, %w0" : : "r" (v) : "v3"); ++ asm volatile ("dup v4.8b, %w0" : : "r" (v) : "v4"); ++ asm volatile ("dup v5.8b, %w0" : : "r" (v) : "v5"); ++ asm volatile ("dup v6.8b, %w0" : : "r" (v) : "v6"); ++ asm volatile ("dup v7.8b, %w0" : : "r" (v) : "v7"); ++ ++ *framesizep = 1024; ++ ++ return sym->st_value; ++} ++ ++unsigned int ++la_aarch64_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook, ++ uintptr_t *defcook, ++ const struct La_aarch64_regs *inregs, ++ struct La_aarch64_retval *outregs, ++ const char *symname) ++{ ++ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u\n", ++ symname, (long int) sym->st_value, ndx); ++ ++ if (strcmp (symname, "tst_audit27_func_float") == 0) ++ { ++ assert (inregs->lr_vreg[0].s == FUNC_FLOAT_ARG0); ++ assert (inregs->lr_vreg[1].s == FUNC_FLOAT_ARG1); ++ assert (inregs->lr_vreg[2].s == FUNC_FLOAT_ARG2); ++ assert (inregs->lr_vreg[3].s == FUNC_FLOAT_ARG3); ++ assert (inregs->lr_vreg[4].s == FUNC_FLOAT_ARG4); ++ assert (inregs->lr_vreg[5].s == FUNC_FLOAT_ARG5); ++ assert (inregs->lr_vreg[6].s == FUNC_FLOAT_ARG6); ++ assert (inregs->lr_vreg[7].s == FUNC_FLOAT_ARG7); ++ ++ assert (outregs->lrv_vreg[0].s == FUNC_FLOAT_RET); ++ } ++ else if (strcmp (symname, "tst_audit27_func_double") == 0) ++ { ++ assert (inregs->lr_vreg[0].d == FUNC_DOUBLE_ARG0); ++ assert (inregs->lr_vreg[1].d == FUNC_DOUBLE_ARG1); ++ assert (inregs->lr_vreg[2].d == FUNC_DOUBLE_ARG2); ++ assert (inregs->lr_vreg[3].d == FUNC_DOUBLE_ARG3); ++ assert (inregs->lr_vreg[4].d == FUNC_DOUBLE_ARG4); ++ assert (inregs->lr_vreg[5].d == FUNC_DOUBLE_ARG5); ++ assert (inregs->lr_vreg[6].d == FUNC_DOUBLE_ARG6); ++ assert (inregs->lr_vreg[7].d == FUNC_DOUBLE_ARG7); ++ ++ assert (outregs->lrv_vreg[0].d == FUNC_DOUBLE_RET); ++ } ++ else if (strcmp (symname, "tst_audit27_func_ldouble") == 0) ++ { ++ assert (inregs->lr_vreg[0].q == FUNC_LDOUBLE_ARG0); ++ assert (inregs->lr_vreg[1].q == FUNC_LDOUBLE_ARG1); ++ assert (inregs->lr_vreg[2].q == FUNC_LDOUBLE_ARG2); ++ assert (inregs->lr_vreg[3].q == FUNC_LDOUBLE_ARG3); ++ assert (inregs->lr_vreg[4].q == FUNC_LDOUBLE_ARG4); ++ assert (inregs->lr_vreg[5].q == FUNC_LDOUBLE_ARG5); ++ assert (inregs->lr_vreg[6].q == FUNC_LDOUBLE_ARG6); ++ assert (inregs->lr_vreg[7].q == FUNC_LDOUBLE_ARG7); ++ ++ assert (outregs->lrv_vreg[0].q == FUNC_LDOUBLE_RET); ++ } ++ else ++ abort (); ++ ++ assert (inregs->lr_vpcs == 0); ++ assert (outregs->lrv_vpcs == 0); ++ ++ /* Clobber the q registers on exit. */ ++ uint8_t v = 0xff; ++ asm volatile ("dup v0.8b, %w0" : : "r" (v) : "v0"); ++ asm volatile ("dup v1.8b, %w0" : : "r" (v) : "v1"); ++ asm volatile ("dup v2.8b, %w0" : : "r" (v) : "v2"); ++ asm volatile ("dup v3.8b, %w0" : : "r" (v) : "v3"); ++ asm volatile ("dup v4.8b, %w0" : : "r" (v) : "v4"); ++ asm volatile ("dup v5.8b, %w0" : : "r" (v) : "v5"); ++ asm volatile ("dup v6.8b, %w0" : : "r" (v) : "v6"); ++ asm volatile ("dup v7.8b, %w0" : : "r" (v) : "v7"); ++ ++ return 0; ++} +diff --git a/sysdeps/generic/dl-audit-check.h b/sysdeps/generic/dl-audit-check.h +new file mode 100644 +index 0000000000000000..3ab76532868b5895 +--- /dev/null ++++ b/sysdeps/generic/dl-audit-check.h +@@ -0,0 +1,23 @@ ++/* rtld-audit version check. Generic version. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++static inline bool ++_dl_audit_check_version (unsigned int lav) ++{ ++ return lav <= LAV_CURRENT; ++} diff --git a/glibc-upstream-2.34-158.patch b/glibc-upstream-2.34-158.patch new file mode 100644 index 0000000..1cb44e2 --- /dev/null +++ b/glibc-upstream-2.34-158.patch @@ -0,0 +1,23 @@ +commit 165e7ad459fbba2f89708fba04a55bb3981e884c +Author: Szabolcs Nagy +Date: Wed Feb 2 14:03:58 2022 +0000 + + Fix elf/tst-audit25a with default bind now toolchains + + This test relies on lazy binding for the executable so request that + explicitly in case the toolchain defaults to bind now. + + (cherry picked from commit 80a08d0faa9b224019f895800c4d97de4e23e1aa) + +diff --git a/elf/Makefile b/elf/Makefile +index 0ab3e885f5e35671..9e4e056938a75ddb 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -2133,6 +2133,7 @@ $(objpfx)tst-audit25a: $(objpfx)tst-audit25mod1.so \ + $(objpfx)tst-audit25mod2.so \ + $(objpfx)tst-audit25mod3.so \ + $(objpfx)tst-audit25mod4.so ++LDFLAGS-tst-audit25a = -Wl,-z,lazy + $(objpfx)tst-audit25mod1.so: $(objpfx)tst-audit25mod3.so + LDFLAGS-tst-audit25mod1.so = -Wl,-z,now + $(objpfx)tst-audit25mod2.so: $(objpfx)tst-audit25mod4.so diff --git a/glibc-upstream-2.34-159.patch b/glibc-upstream-2.34-159.patch new file mode 100644 index 0000000..e45d205 --- /dev/null +++ b/glibc-upstream-2.34-159.patch @@ -0,0 +1,27 @@ +commit aabdad371f44defc6046aabdc96af7782a2e94be +Author: H.J. Lu +Date: Sun Feb 6 11:12:24 2022 -0800 + + elf: Replace tst-audit24bmod2.so with tst-audit24bmod2 + + Replace tst-audit24bmod2.so with tst-audit24bmod2 to silence: + + make[2]: Entering directory '/export/gnu/import/git/gitlab/x86-glibc/elf' + Makefile:2201: warning: overriding recipe for target '/export/build/gnu/tools-build/glibc-gitlab/build-x86_64-linux/elf/tst-audit24bmod2.so' + ../Makerules:765: warning: ignoring old recipe for target '/export/build/gnu/tools-build/glibc-gitlab/build-x86_64-linux/elf/tst-audit24bmod2.so' + + (cherry picked from commit fa7ad1df1915c8a62f50e3a5b7e10f9c7118cd7f) + +diff --git a/elf/Makefile b/elf/Makefile +index 9e4e056938a75ddb..57059293d0bc86cb 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -888,7 +888,7 @@ extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) + # filtmod1.so, tst-big-note-lib.so, tst-ro-dynamic-mod.so have special + # rules. + modules-names-nobuild := filtmod1 tst-big-note-lib tst-ro-dynamic-mod \ +- tst-audit24bmod1 tst-audit24bmod2.so ++ tst-audit24bmod1 tst-audit24bmod2 + + tests += $(tests-static) + diff --git a/glibc-upstream-2.34-160.patch b/glibc-upstream-2.34-160.patch new file mode 100644 index 0000000..2ce930a --- /dev/null +++ b/glibc-upstream-2.34-160.patch @@ -0,0 +1,114 @@ +commit 4dca2d3a7b43bf99bd6a567870a3144af4e763ef +Author: Adhemerval Zanella +Date: Fri Feb 4 15:54:59 2022 -0300 + + hppa: Fix bind-now audit (BZ #28857) + + On hppa, a function pointer returned by la_symbind is actually a function + descriptor has the plabel bit set (bit 30). This must be cleared to get + the actual address of the descriptor. If the descriptor has been bound, + the first word of the descriptor is the physical address of theA function, + otherwise, the first word of the descriptor points to a trampoline in the + PLT. + + This patch also adds a workaround on tests because on hppa (and it seems + to be the only ABI I have see it), some shared library adds a dynamic PLT + relocation to am empty symbol name: + + $ readelf -r elf/tst-audit25mod1.so + [...] + Relocation section '.rela.plt' at offset 0x464 contains 6 entries: + Offset Info Type Sym.Value Sym. Name + Addend + 00002008 00000081 R_PARISC_IPLT 508 + [...] + + It breaks some assumptions on the test, where a symbol with an empty + name ("") is passed on la_symbind. + + Checked on x86_64-linux-gnu and hppa-linux-gnu. + + (cherry picked from commit 9e94f57484a2aba0fe67ea2059b5843f651887c2) + +diff --git a/elf/Makefile b/elf/Makefile +index 57059293d0bc86cb..3e17a0706f5ec2df 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -2116,7 +2116,7 @@ $(objpfx)tst-audit24c.out: $(objpfx)tst-auditmod24c.so + $(objpfx)tst-audit24c: $(objpfx)tst-audit24amod1.so \ + $(objpfx)tst-audit24amod2.so + tst-audit24c-ENV = LD_BIND_NOW=1 LD_AUDIT=$(objpfx)tst-auditmod24c.so +-LDFLAGS-tst-audit24b = -Wl,-z,lazy ++LDFLAGS-tst-audit24c = -Wl,-z,lazy + + $(objpfx)tst-audit24d.out: $(objpfx)tst-auditmod24d.so + $(objpfx)tst-audit24d: $(objpfx)tst-audit24dmod1.so \ +diff --git a/elf/dl-audit.c b/elf/dl-audit.c +index 72a50717ef60a357..ec9b032eae37c103 100644 +--- a/elf/dl-audit.c ++++ b/elf/dl-audit.c +@@ -257,7 +257,8 @@ _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result, + reloc_result->flags = flags; + } + +- DL_FIXUP_BINDNOW_RELOC (value, new_value, sym.st_value); ++ if (flags & LA_SYMB_ALTVALUE) ++ DL_FIXUP_BINDNOW_RELOC (value, new_value, sym.st_value); + } + + void +diff --git a/elf/tst-auditmod24a.c b/elf/tst-auditmod24a.c +index d8e88f3984af1707..3075dfae2fd3d288 100644 +--- a/elf/tst-auditmod24a.c ++++ b/elf/tst-auditmod24a.c +@@ -110,5 +110,7 @@ la_symbind32 (Elf32_Sym *sym, unsigned int ndx, + return sym->st_value; + } + +- abort (); ++ if (symname[0] != '\0') ++ abort (); ++ return sym->st_value; + } +diff --git a/elf/tst-auditmod24d.c b/elf/tst-auditmod24d.c +index 8c803ecc0a48f21b..badc6be451ee0357 100644 +--- a/elf/tst-auditmod24d.c ++++ b/elf/tst-auditmod24d.c +@@ -116,5 +116,7 @@ la_symbind32 (Elf32_Sym *sym, unsigned int ndx, + } + } + +- abort (); ++ if (symname[0] != '\0') ++ abort (); ++ return sym->st_value; + } +diff --git a/elf/tst-auditmod25.c b/elf/tst-auditmod25.c +index 526f5c54bc2c3b8c..20640a8daf346b5f 100644 +--- a/elf/tst-auditmod25.c ++++ b/elf/tst-auditmod25.c +@@ -72,7 +72,7 @@ la_symbind32 (Elf32_Sym *sym, unsigned int ndx, + unsigned int *flags, const char *symname) + #endif + { +- if (*refcook != -1 && *defcook != -1) ++ if (*refcook != -1 && *defcook != -1 && symname[0] != '\0') + fprintf (stderr, "la_symbind: %s %u\n", symname, + *flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) ? 1 : 0); + return sym->st_value; +diff --git a/sysdeps/hppa/dl-lookupcfg.h b/sysdeps/hppa/dl-lookupcfg.h +index f4f00714fa158e18..92fd0b7c844713ce 100644 +--- a/sysdeps/hppa/dl-lookupcfg.h ++++ b/sysdeps/hppa/dl-lookupcfg.h +@@ -80,7 +80,9 @@ void attribute_hidden _dl_unmap (struct link_map *map); + /* Extract the code address from a fixup value */ + #define DL_FIXUP_VALUE_CODE_ADDR(value) ((value).ip) + #define DL_FIXUP_VALUE_ADDR(value) ((uintptr_t) &(value)) +-#define DL_FIXUP_ADDR_VALUE(addr) (*(struct fdesc *) (addr)) ++/* Clear the plabel bit to get the actual address of the descriptor. */ ++#define DL_FIXUP_ADDR_VALUE(addr) \ ++ (*(DL_FIXUP_VALUE_TYPE *) ((uintptr_t) (addr) & ~2)) + #define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr) +-#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ +- (*value) = *(struct fdesc *) (st_value) ++#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \ ++ *(value) = *(DL_FIXUP_VALUE_TYPE *) ((uintptr_t) (new_value) & ~2) diff --git a/glibc-upstream-2.34-162.patch b/glibc-upstream-2.34-162.patch new file mode 100644 index 0000000..d409366 --- /dev/null +++ b/glibc-upstream-2.34-162.patch @@ -0,0 +1,242 @@ +commit 0c03cb54c808173d8e7ba96f6152dfcf627ac496 +Author: Stefan Liebler +Date: Wed Apr 13 14:36:09 2022 +0200 + + S390: Add new s390 platform z16. + + The new IBM z16 is added to platform string array. + The macro _DL_PLATFORMS_COUNT is incremented. + + _dl_hwcaps_subdir is extended by "z16" if HWCAP_S390_VXRS_PDE2 + is set. HWCAP_S390_NNPA is not tested in _dl_hwcaps_subdirs_active + as those instructions may be replaced or removed in future. + + tst-glibc-hwcaps.c is extended in order to test z16 via new marker5. + + A fatal glibc error is dumped if glibc was build with architecture + level set for z16, but run on an older machine. (See dl-hwcap-check.h) + + (cherry picked from commit 2376944b9e5c0364b9fb473e4d8dabca31b57167) + +Conflicts: + sysdeps/s390/s390-64/dl-hwcap-check.h - Use GCCMACRO__ARCH__. + - Backported f01d482f0355a7029d0715ace0ccf3323e7e94bc requires it. + +diff --git a/elf/Makefile b/elf/Makefile +index 3e17a0706f5ec2df..8e2dd91c583f9a62 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -571,6 +571,11 @@ modules-names = \ + libmarkermod4-2 \ + libmarkermod4-3 \ + libmarkermod4-4 \ ++ libmarkermod5-1 \ ++ libmarkermod5-2 \ ++ libmarkermod5-3 \ ++ libmarkermod5-4 \ ++ libmarkermod5-5 \ + ltglobmod1 \ + ltglobmod2 \ + neededobj1 \ +@@ -2412,6 +2417,7 @@ LDFLAGS-libmarkermod1-1.so += -Wl,-soname,libmarkermod1.so + LDFLAGS-libmarkermod2-1.so += -Wl,-soname,libmarkermod2.so + LDFLAGS-libmarkermod3-1.so += -Wl,-soname,libmarkermod3.so + LDFLAGS-libmarkermod4-1.so += -Wl,-soname,libmarkermod4.so ++LDFLAGS-libmarkermod5-1.so += -Wl,-soname,libmarkermod5.so + $(objpfx)libmarkermod%.os : markermodMARKER-VALUE.c + $(compile-command.c) \ + -DMARKER=marker$(firstword $(subst -, ,$*)) \ +@@ -2424,6 +2430,8 @@ $(objpfx)libmarkermod3.so: $(objpfx)libmarkermod3-1.so + cp $< $@ + $(objpfx)libmarkermod4.so: $(objpfx)libmarkermod4-1.so + cp $< $@ ++$(objpfx)libmarkermod5.so: $(objpfx)libmarkermod5-1.so ++ cp $< $@ + + # tst-glibc-hwcaps-prepend checks that --glibc-hwcaps-prepend is + # preferred over auto-detected subdirectories. +diff --git a/elf/tst-glibc-hwcaps-cache.script b/elf/tst-glibc-hwcaps-cache.script +index c3271f61f9e50f2e..d58fc8c5de3c5198 100644 +--- a/elf/tst-glibc-hwcaps-cache.script ++++ b/elf/tst-glibc-hwcaps-cache.script +@@ -4,6 +4,7 @@ + cp $B/elf/libmarkermod2-1.so $L/libmarkermod2.so + cp $B/elf/libmarkermod3-1.so $L/libmarkermod3.so + cp $B/elf/libmarkermod4-1.so $L/libmarkermod4.so ++cp $B/elf/libmarkermod5-1.so $L/libmarkermod5.so + + mkdirp 0770 $L/glibc-hwcaps/power9 + cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/power9/libmarkermod2.so +@@ -20,6 +21,11 @@ mkdirp 0770 $L/glibc-hwcaps/z15 + cp $B/elf/libmarkermod4-2.so $L/glibc-hwcaps/z13/libmarkermod4.so + cp $B/elf/libmarkermod4-3.so $L/glibc-hwcaps/z14/libmarkermod4.so + cp $B/elf/libmarkermod4-4.so $L/glibc-hwcaps/z15/libmarkermod4.so ++mkdirp 0770 $L/glibc-hwcaps/z16 ++cp $B/elf/libmarkermod5-2.so $L/glibc-hwcaps/z13/libmarkermod5.so ++cp $B/elf/libmarkermod5-3.so $L/glibc-hwcaps/z14/libmarkermod5.so ++cp $B/elf/libmarkermod5-4.so $L/glibc-hwcaps/z15/libmarkermod5.so ++cp $B/elf/libmarkermod5-5.so $L/glibc-hwcaps/z16/libmarkermod5.so + + mkdirp 0770 $L/glibc-hwcaps/x86-64-v2 + cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/x86-64-v2/libmarkermod2.so +diff --git a/sysdeps/s390/dl-procinfo.c b/sysdeps/s390/dl-procinfo.c +index 155f0bd99eccb3f9..755b54ff13a0fa2f 100644 +--- a/sysdeps/s390/dl-procinfo.c ++++ b/sysdeps/s390/dl-procinfo.c +@@ -64,11 +64,12 @@ PROCINFO_CLASS const char _dl_s390_cap_flags[23][9] + #if !defined PROCINFO_DECL && defined SHARED + ._dl_s390_platforms + #else +-PROCINFO_CLASS const char _dl_s390_platforms[10][7] ++PROCINFO_CLASS const char _dl_s390_platforms[11][7] + #endif + #ifndef PROCINFO_DECL + = { +- "g5", "z900", "z990", "z9-109", "z10", "z196", "zEC12", "z13", "z14", "z15" ++ "g5", "z900", "z990", "z9-109", "z10", "z196", "zEC12", "z13", "z14", "z15", ++ "z16" + } + #endif + #if !defined SHARED || defined PROCINFO_DECL +diff --git a/sysdeps/s390/dl-procinfo.h b/sysdeps/s390/dl-procinfo.h +index e4e3e334a5b3d47c..d44e1dd97441bd90 100644 +--- a/sysdeps/s390/dl-procinfo.h ++++ b/sysdeps/s390/dl-procinfo.h +@@ -23,7 +23,7 @@ + + #define _DL_HWCAP_COUNT 23 + +-#define _DL_PLATFORMS_COUNT 10 ++#define _DL_PLATFORMS_COUNT 11 + + /* The kernel provides up to 32 capability bits with elf_hwcap. */ + #define _DL_FIRST_PLATFORM 32 +diff --git a/sysdeps/s390/s390-64/Makefile b/sysdeps/s390/s390-64/Makefile +index e5da26871c862e63..66ed844e68df5159 100644 +--- a/sysdeps/s390/s390-64/Makefile ++++ b/sysdeps/s390/s390-64/Makefile +@@ -7,8 +7,11 @@ CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused + CFLAGS-dl-load.c += -Wno-unused + CFLAGS-dl-reloc.c += -Wno-unused + +-$(objpfx)tst-glibc-hwcaps: $(objpfx)libmarkermod2-1.so \ +- $(objpfx)libmarkermod3-1.so $(objpfx)libmarkermod4-1.so ++$(objpfx)tst-glibc-hwcaps: \ ++ $(objpfx)libmarkermod2-1.so \ ++ $(objpfx)libmarkermod3-1.so \ ++ $(objpfx)libmarkermod4-1.so \ ++ $(objpfx)libmarkermod5-1.so + $(objpfx)tst-glibc-hwcaps.out: \ + $(objpfx)libmarkermod2.so \ + $(objpfx)glibc-hwcaps/z13/libmarkermod2.so \ +@@ -19,6 +22,11 @@ $(objpfx)tst-glibc-hwcaps.out: \ + $(objpfx)glibc-hwcaps/z13/libmarkermod4.so \ + $(objpfx)glibc-hwcaps/z14/libmarkermod4.so \ + $(objpfx)glibc-hwcaps/z15/libmarkermod4.so \ ++ $(objpfx)libmarkermod5.so \ ++ $(objpfx)glibc-hwcaps/z13/libmarkermod5.so \ ++ $(objpfx)glibc-hwcaps/z14/libmarkermod5.so \ ++ $(objpfx)glibc-hwcaps/z15/libmarkermod5.so \ ++ $(objpfx)glibc-hwcaps/z16/libmarkermod5.so + + $(objpfx)glibc-hwcaps/z13/libmarkermod2.so: $(objpfx)libmarkermod2-2.so + $(make-target-directory) +@@ -38,6 +46,19 @@ $(objpfx)glibc-hwcaps/z14/libmarkermod4.so: $(objpfx)libmarkermod4-3.so + $(objpfx)glibc-hwcaps/z15/libmarkermod4.so: $(objpfx)libmarkermod4-4.so + $(make-target-directory) + cp $< $@ ++$(objpfx)glibc-hwcaps/z13/libmarkermod5.so: $(objpfx)libmarkermod5-2.so ++ $(make-target-directory) ++ cp $< $@ ++$(objpfx)glibc-hwcaps/z14/libmarkermod5.so: $(objpfx)libmarkermod5-3.so ++ $(make-target-directory) ++ cp $< $@ ++$(objpfx)glibc-hwcaps/z15/libmarkermod5.so: $(objpfx)libmarkermod5-4.so ++ $(make-target-directory) ++ cp $< $@ ++$(objpfx)glibc-hwcaps/z16/libmarkermod5.so: $(objpfx)libmarkermod5-5.so ++ $(make-target-directory) ++ cp $< $@ ++ + + ifeq (no,$(build-hardcoded-path-in-tests)) + # This is an ld.so.cache test, and RPATH/RUNPATH in the executable +diff --git a/sysdeps/s390/s390-64/dl-hwcap-check.h b/sysdeps/s390/s390-64/dl-hwcap-check.h +index 27f7e245b1d1a9e9..52c609571b32f4ab 100644 +--- a/sysdeps/s390/s390-64/dl-hwcap-check.h ++++ b/sysdeps/s390/s390-64/dl-hwcap-check.h +@@ -26,7 +26,11 @@ static inline void + dl_hwcap_check (void) + { + #if defined __ARCH__ +-# if GCCMACRO__ARCH__ >= 13 ++# if GCCMACRO__ARCH__ >= 14 ++ if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_PDE2)) ++ _dl_fatal_printf ("\ ++Fatal glibc error: CPU lacks VXRS_PDE2 support (z16 or later required)\n"); ++# elif GCCMACRO__ARCH__ >= 13 + if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_EXT2)) + _dl_fatal_printf ("\ + Fatal glibc error: CPU lacks VXRS_EXT2 support (z15 or later required)\n"); +diff --git a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c +index b9d094f3d73c2d7a..187d732d560c4a62 100644 +--- a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c ++++ b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c +@@ -19,8 +19,8 @@ + #include + #include + +-const char _dl_hwcaps_subdirs[] = "z15:z14:z13"; +-enum { subdirs_count = 3 }; /* Number of components in _dl_hwcaps_subdirs. */ ++const char _dl_hwcaps_subdirs[] = "z16:z15:z14:z13"; ++enum { subdirs_count = 4 }; /* Number of components in _dl_hwcaps_subdirs. */ + + uint32_t + _dl_hwcaps_subdirs_active (void) +@@ -50,5 +50,12 @@ _dl_hwcaps_subdirs_active (void) + return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active); + ++active; + ++ /* z16. ++ Note: We do not list HWCAP_S390_NNPA here as, according to the Principles of ++ Operation, those instructions may be replaced or removed in future. */ ++ if (!(GLRO (dl_hwcap) & HWCAP_S390_VXRS_PDE2)) ++ return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active); ++ ++active; ++ + return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active); + } +diff --git a/sysdeps/s390/s390-64/tst-glibc-hwcaps.c b/sysdeps/s390/s390-64/tst-glibc-hwcaps.c +index 02397a478c552516..f3b8ef3dec80d2d1 100644 +--- a/sysdeps/s390/s390-64/tst-glibc-hwcaps.c ++++ b/sysdeps/s390/s390-64/tst-glibc-hwcaps.c +@@ -25,6 +25,7 @@ + extern int marker2 (void); + extern int marker3 (void); + extern int marker4 (void); ++extern int marker5 (void); + + /* Return the arch level, 10 for the baseline libmarkermod*.so's. */ + static int +@@ -63,9 +64,11 @@ compute_level (void) + return 12; + if (strcmp (platform, "z15") == 0) + return 13; ++ if (strcmp (platform, "z16") == 0) ++ return 14; + printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform); +- /* Assume that the new platform supports z15. */ +- return 13; ++ /* Assume that the new platform supports z16. */ ++ return 14; + } + + static int +@@ -76,6 +79,7 @@ do_test (void) + TEST_COMPARE (marker2 (), MIN (level - 9, 2)); + TEST_COMPARE (marker3 (), MIN (level - 9, 3)); + TEST_COMPARE (marker4 (), MIN (level - 9, 4)); ++ TEST_COMPARE (marker5 (), MIN (level - 9, 5)); + return 0; + } + diff --git a/glibc-upstream-2.34-163.patch b/glibc-upstream-2.34-163.patch new file mode 100644 index 0000000..251939d --- /dev/null +++ b/glibc-upstream-2.34-163.patch @@ -0,0 +1,834 @@ +commit 290db09546b260a30137d03ce97a857e6f15b648 +Author: Adhemerval Zanella +Date: Wed Apr 6 12:24:42 2022 -0300 + + nptl: Handle spurious EINTR when thread cancellation is disabled (BZ#29029) + + Some Linux interfaces never restart after being interrupted by a signal + handler, regardless of the use of SA_RESTART [1]. It means that for + pthread cancellation, if the target thread disables cancellation with + pthread_setcancelstate and calls such interfaces (like poll or select), + it should not see spurious EINTR failures due the internal SIGCANCEL. + + However recent changes made pthread_cancel to always sent the internal + signal, regardless of the target thread cancellation status or type. + To fix it, the previous semantic is restored, where the cancel signal + is only sent if the target thread has cancelation enabled in + asynchronous mode. + + The cancel state and cancel type is moved back to cancelhandling + and atomic operation are used to synchronize between threads. The + patch essentially revert the following commits: + + 8c1c0aae20 nptl: Move cancel type out of cancelhandling + 2b51742531 nptl: Move cancel state out of cancelhandling + 26cfbb7162 nptl: Remove CANCELING_BITMASK + + However I changed the atomic operation to follow the internal C11 + semantic and removed the MACRO usage, it simplifies a bit the + resulting code (and removes another usage of the old atomic macros). + + Checked on x86_64-linux-gnu, i686-linux-gnu, aarch64-linux-gnu, + and powerpc64-linux-gnu. + + [1] https://man7.org/linux/man-pages/man7/signal.7.html + + Reviewed-by: Florian Weimer + Tested-by: Aurelien Jarno + + (cherry-picked from commit 404656009b459658138ed1bd18f3c6cf3863e6a6) + +diff --git a/manual/process.texi b/manual/process.texi +index 28c9531f4294f56e..9307379194c6f666 100644 +--- a/manual/process.texi ++++ b/manual/process.texi +@@ -68,8 +68,7 @@ until the subprogram terminates before you can do anything else. + @c CLEANUP_HANDLER @ascuplugin @ascuheap @acsmem + @c libc_cleanup_region_start @ascuplugin @ascuheap @acsmem + @c pthread_cleanup_push_defer @ascuplugin @ascuheap @acsmem +-@c __pthread_testcancel @ascuplugin @ascuheap @acsmem +-@c CANCEL_ENABLED_AND_CANCELED ok ++@c cancel_enabled_and_canceled @ascuplugin @ascuheap @acsmem + @c do_cancel @ascuplugin @ascuheap @acsmem + @c cancel_handler ok + @c kill syscall ok +diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c +index 554a721f814b53c4..96101753ec2f4323 100644 +--- a/nptl/allocatestack.c ++++ b/nptl/allocatestack.c +@@ -120,8 +120,6 @@ get_cached_stack (size_t *sizep, void **memp) + + /* Cancellation handling is back to the default. */ + result->cancelhandling = 0; +- result->cancelstate = PTHREAD_CANCEL_ENABLE; +- result->canceltype = PTHREAD_CANCEL_DEFERRED; + result->cleanup = NULL; + result->setup_failed = 0; + +diff --git a/nptl/cancellation.c b/nptl/cancellation.c +index 05962784d51fb98b..e97d56f97d7a5698 100644 +--- a/nptl/cancellation.c ++++ b/nptl/cancellation.c +@@ -31,19 +31,26 @@ int + __pthread_enable_asynccancel (void) + { + struct pthread *self = THREAD_SELF; ++ int oldval = atomic_load_relaxed (&self->cancelhandling); + +- int oldval = THREAD_GETMEM (self, canceltype); +- THREAD_SETMEM (self, canceltype, PTHREAD_CANCEL_ASYNCHRONOUS); ++ while (1) ++ { ++ int newval = oldval | CANCELTYPE_BITMASK; + +- int ch = THREAD_GETMEM (self, cancelhandling); ++ if (newval == oldval) ++ break; + +- if (self->cancelstate == PTHREAD_CANCEL_ENABLE +- && (ch & CANCELED_BITMASK) +- && !(ch & EXITING_BITMASK) +- && !(ch & TERMINATED_BITMASK)) +- { +- THREAD_SETMEM (self, result, PTHREAD_CANCELED); +- __do_cancel (); ++ if (atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &oldval, newval)) ++ { ++ if (cancel_enabled_and_canceled_and_async (newval)) ++ { ++ self->result = PTHREAD_CANCELED; ++ __do_cancel (); ++ } ++ ++ break; ++ } + } + + return oldval; +@@ -57,10 +64,29 @@ __pthread_disable_asynccancel (int oldtype) + { + /* If asynchronous cancellation was enabled before we do not have + anything to do. */ +- if (oldtype == PTHREAD_CANCEL_ASYNCHRONOUS) ++ if (oldtype & CANCELTYPE_BITMASK) + return; + + struct pthread *self = THREAD_SELF; +- self->canceltype = PTHREAD_CANCEL_DEFERRED; ++ int newval; ++ int oldval = atomic_load_relaxed (&self->cancelhandling); ++ do ++ { ++ newval = oldval & ~CANCELTYPE_BITMASK; ++ } ++ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &oldval, newval)); ++ ++ /* We cannot return when we are being canceled. Upon return the ++ thread might be things which would have to be undone. The ++ following loop should loop until the cancellation signal is ++ delivered. */ ++ while (__glibc_unlikely ((newval & (CANCELING_BITMASK | CANCELED_BITMASK)) ++ == CANCELING_BITMASK)) ++ { ++ futex_wait_simple ((unsigned int *) &self->cancelhandling, newval, ++ FUTEX_PRIVATE); ++ newval = atomic_load_relaxed (&self->cancelhandling); ++ } + } + libc_hidden_def (__pthread_disable_asynccancel) +diff --git a/nptl/cleanup_defer.c b/nptl/cleanup_defer.c +index 7e858d0df068276b..35ba40fb0247c7cc 100644 +--- a/nptl/cleanup_defer.c ++++ b/nptl/cleanup_defer.c +@@ -31,9 +31,22 @@ ___pthread_register_cancel_defer (__pthread_unwind_buf_t *buf) + ibuf->priv.data.prev = THREAD_GETMEM (self, cleanup_jmp_buf); + ibuf->priv.data.cleanup = THREAD_GETMEM (self, cleanup); + +- /* Disable asynchronous cancellation for now. */ +- ibuf->priv.data.canceltype = THREAD_GETMEM (self, canceltype); +- THREAD_SETMEM (self, canceltype, PTHREAD_CANCEL_DEFERRED); ++ int cancelhandling = atomic_load_relaxed (&self->cancelhandling); ++ if (__glibc_unlikely (cancelhandling & CANCELTYPE_BITMASK)) ++ { ++ int newval; ++ do ++ { ++ newval = cancelhandling & ~CANCELTYPE_BITMASK; ++ } ++ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &cancelhandling, ++ newval)); ++ } ++ ++ ibuf->priv.data.canceltype = (cancelhandling & CANCELTYPE_BITMASK ++ ? PTHREAD_CANCEL_ASYNCHRONOUS ++ : PTHREAD_CANCEL_DEFERRED); + + /* Store the new cleanup handler info. */ + THREAD_SETMEM (self, cleanup_jmp_buf, (struct pthread_unwind_buf *) buf); +@@ -55,9 +68,26 @@ ___pthread_unregister_cancel_restore (__pthread_unwind_buf_t *buf) + + THREAD_SETMEM (self, cleanup_jmp_buf, ibuf->priv.data.prev); + +- THREAD_SETMEM (self, canceltype, ibuf->priv.data.canceltype); +- if (ibuf->priv.data.canceltype == PTHREAD_CANCEL_ASYNCHRONOUS) +- __pthread_testcancel (); ++ if (ibuf->priv.data.canceltype == PTHREAD_CANCEL_DEFERRED) ++ return; ++ ++ int cancelhandling = atomic_load_relaxed (&self->cancelhandling); ++ if (cancelhandling & CANCELTYPE_BITMASK) ++ { ++ int newval; ++ do ++ { ++ newval = cancelhandling | CANCELTYPE_BITMASK; ++ } ++ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &cancelhandling, newval)); ++ ++ if (cancel_enabled_and_canceled (cancelhandling)) ++ { ++ self->result = PTHREAD_CANCELED; ++ __do_cancel (); ++ } ++ } + } + versioned_symbol (libc, ___pthread_unregister_cancel_restore, + __pthread_unregister_cancel_restore, GLIBC_2_34); +diff --git a/nptl/descr.h b/nptl/descr.h +index dabf980e29615db3..dfef9c4bda075d13 100644 +--- a/nptl/descr.h ++++ b/nptl/descr.h +@@ -280,18 +280,27 @@ struct pthread + + /* Flags determining processing of cancellation. */ + int cancelhandling; ++ /* Bit set if cancellation is disabled. */ ++#define CANCELSTATE_BIT 0 ++#define CANCELSTATE_BITMASK (1 << CANCELSTATE_BIT) ++ /* Bit set if asynchronous cancellation mode is selected. */ ++#define CANCELTYPE_BIT 1 ++#define CANCELTYPE_BITMASK (1 << CANCELTYPE_BIT) ++ /* Bit set if canceling has been initiated. */ ++#define CANCELING_BIT 2 ++#define CANCELING_BITMASK (1 << CANCELING_BIT) + /* Bit set if canceled. */ + #define CANCELED_BIT 3 +-#define CANCELED_BITMASK (0x01 << CANCELED_BIT) ++#define CANCELED_BITMASK (1 << CANCELED_BIT) + /* Bit set if thread is exiting. */ + #define EXITING_BIT 4 +-#define EXITING_BITMASK (0x01 << EXITING_BIT) ++#define EXITING_BITMASK (1 << EXITING_BIT) + /* Bit set if thread terminated and TCB is freed. */ + #define TERMINATED_BIT 5 +-#define TERMINATED_BITMASK (0x01 << TERMINATED_BIT) ++#define TERMINATED_BITMASK (1 << TERMINATED_BIT) + /* Bit set if thread is supposed to change XID. */ + #define SETXID_BIT 6 +-#define SETXID_BITMASK (0x01 << SETXID_BIT) ++#define SETXID_BITMASK (1 << SETXID_BIT) + + /* Flags. Including those copied from the thread attribute. */ + int flags; +@@ -391,14 +400,6 @@ struct pthread + /* Indicates whether is a C11 thread created by thrd_creat. */ + bool c11; + +- /* Thread cancel state (PTHREAD_CANCEL_ENABLE or +- PTHREAD_CANCEL_DISABLE). */ +- unsigned char cancelstate; +- +- /* Thread cancel type (PTHREAD_CANCEL_DEFERRED or +- PTHREAD_CANCEL_ASYNCHRONOUS). */ +- unsigned char canceltype; +- + /* Used in __pthread_kill_internal to detected a thread that has + exited or is about to exit. exit_lock must only be acquired + after blocking signals. */ +@@ -418,6 +419,22 @@ struct pthread + (sizeof (struct pthread) - offsetof (struct pthread, end_padding)) + } __attribute ((aligned (TCB_ALIGNMENT))); + ++static inline bool ++cancel_enabled_and_canceled (int value) ++{ ++ return (value & (CANCELSTATE_BITMASK | CANCELED_BITMASK | EXITING_BITMASK ++ | TERMINATED_BITMASK)) ++ == CANCELED_BITMASK; ++} ++ ++static inline bool ++cancel_enabled_and_canceled_and_async (int value) ++{ ++ return ((value) & (CANCELSTATE_BITMASK | CANCELTYPE_BITMASK | CANCELED_BITMASK ++ | EXITING_BITMASK | TERMINATED_BITMASK)) ++ == (CANCELTYPE_BITMASK | CANCELED_BITMASK); ++} ++ + /* This yields the pointer that TLS support code calls the thread pointer. */ + #if TLS_TCB_AT_TP + # define TLS_TPADJ(pd) (pd) +diff --git a/nptl/libc-cleanup.c b/nptl/libc-cleanup.c +index 180d15bc9e9a8368..fccb1abe69aa693c 100644 +--- a/nptl/libc-cleanup.c ++++ b/nptl/libc-cleanup.c +@@ -27,9 +27,24 @@ __libc_cleanup_push_defer (struct _pthread_cleanup_buffer *buffer) + + buffer->__prev = THREAD_GETMEM (self, cleanup); + ++ int cancelhandling = atomic_load_relaxed (&self->cancelhandling); ++ + /* Disable asynchronous cancellation for now. */ +- buffer->__canceltype = THREAD_GETMEM (self, canceltype); +- THREAD_SETMEM (self, canceltype, PTHREAD_CANCEL_DEFERRED); ++ if (__glibc_unlikely (cancelhandling & CANCELTYPE_BITMASK)) ++ { ++ int newval; ++ do ++ { ++ newval = cancelhandling & ~CANCELTYPE_BITMASK; ++ } ++ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &cancelhandling, ++ newval)); ++ } ++ ++ buffer->__canceltype = (cancelhandling & CANCELTYPE_BITMASK ++ ? PTHREAD_CANCEL_ASYNCHRONOUS ++ : PTHREAD_CANCEL_DEFERRED); + + THREAD_SETMEM (self, cleanup, buffer); + } +@@ -42,8 +57,22 @@ __libc_cleanup_pop_restore (struct _pthread_cleanup_buffer *buffer) + + THREAD_SETMEM (self, cleanup, buffer->__prev); + +- THREAD_SETMEM (self, canceltype, buffer->__canceltype); +- if (buffer->__canceltype == PTHREAD_CANCEL_ASYNCHRONOUS) +- __pthread_testcancel (); ++ int cancelhandling = atomic_load_relaxed (&self->cancelhandling); ++ if (cancelhandling & CANCELTYPE_BITMASK) ++ { ++ int newval; ++ do ++ { ++ newval = cancelhandling | CANCELTYPE_BITMASK; ++ } ++ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &cancelhandling, newval)); ++ ++ if (cancel_enabled_and_canceled (cancelhandling)) ++ { ++ self->result = PTHREAD_CANCELED; ++ __do_cancel (); ++ } ++ } + } + libc_hidden_def (__libc_cleanup_pop_restore) +diff --git a/nptl/pthread_cancel.c b/nptl/pthread_cancel.c +index 9bac6e3b76a20312..2680b55586e035fe 100644 +--- a/nptl/pthread_cancel.c ++++ b/nptl/pthread_cancel.c +@@ -43,18 +43,29 @@ sigcancel_handler (int sig, siginfo_t *si, void *ctx) + + struct pthread *self = THREAD_SELF; + +- int ch = atomic_load_relaxed (&self->cancelhandling); +- /* Cancelation not enabled, not cancelled, or already exitting. */ +- if (self->cancelstate == PTHREAD_CANCEL_DISABLE +- || (ch & CANCELED_BITMASK) == 0 +- || (ch & EXITING_BITMASK) != 0) +- return; +- +- /* Set the return value. */ +- THREAD_SETMEM (self, result, PTHREAD_CANCELED); +- /* Make sure asynchronous cancellation is still enabled. */ +- if (self->canceltype == PTHREAD_CANCEL_ASYNCHRONOUS) +- __do_cancel (); ++ int oldval = atomic_load_relaxed (&self->cancelhandling); ++ while (1) ++ { ++ /* We are canceled now. When canceled by another thread this flag ++ is already set but if the signal is directly send (internally or ++ from another process) is has to be done here. */ ++ int newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK; ++ ++ if (oldval == newval || (oldval & EXITING_BITMASK) != 0) ++ /* Already canceled or exiting. */ ++ break; ++ ++ if (atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &oldval, newval)) ++ { ++ self->result = PTHREAD_CANCELED; ++ ++ /* Make sure asynchronous cancellation is still enabled. */ ++ if ((oldval & CANCELTYPE_BITMASK) != 0) ++ /* Run the registered destructors and terminate the thread. */ ++ __do_cancel (); ++ } ++ } + } + + int +@@ -93,29 +104,70 @@ __pthread_cancel (pthread_t th) + } + #endif + +- int oldch = atomic_fetch_or_acquire (&pd->cancelhandling, CANCELED_BITMASK); +- if ((oldch & CANCELED_BITMASK) != 0) +- return 0; +- +- if (pd == THREAD_SELF) ++ /* Some syscalls are never restarted after being interrupted by a signal ++ handler, regardless of the use of SA_RESTART (they always fail with ++ EINTR). So pthread_cancel cannot send SIGCANCEL unless the cancellation ++ is enabled and set as asynchronous (in this case the cancellation will ++ be acted in the cancellation handler instead by the syscall wrapper). ++ Otherwise the target thread is set as 'cancelling' (CANCELING_BITMASK) ++ by atomically setting 'cancelhandling' and the cancelation will be acted ++ upon on next cancellation entrypoing in the target thread. ++ ++ It also requires to atomically check if cancellation is enabled and ++ asynchronous, so both cancellation state and type are tracked on ++ 'cancelhandling'. */ ++ ++ int result = 0; ++ int oldval = atomic_load_relaxed (&pd->cancelhandling); ++ int newval; ++ do + { +- /* A single-threaded process should be able to kill itself, since there +- is nothing in the POSIX specification that says that it cannot. So +- we set multiple_threads to true so that cancellation points get +- executed. */ +- THREAD_SETMEM (THREAD_SELF, header.multiple_threads, 1); ++ newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK; ++ if (oldval == newval) ++ break; ++ ++ /* If the cancellation is handled asynchronously just send a ++ signal. We avoid this if possible since it's more ++ expensive. */ ++ if (cancel_enabled_and_canceled_and_async (newval)) ++ { ++ /* Mark the cancellation as "in progress". */ ++ int newval2 = oldval | CANCELING_BITMASK; ++ if (!atomic_compare_exchange_weak_acquire (&pd->cancelhandling, ++ &oldval, newval2)) ++ continue; ++ ++ if (pd == THREAD_SELF) ++ /* This is not merely an optimization: An application may ++ call pthread_cancel (pthread_self ()) without calling ++ pthread_create, so the signal handler may not have been ++ set up for a self-cancel. */ ++ { ++ pd->result = PTHREAD_CANCELED; ++ if ((newval & CANCELTYPE_BITMASK) != 0) ++ __do_cancel (); ++ } ++ else ++ /* The cancellation handler will take care of marking the ++ thread as canceled. */ ++ result = __pthread_kill_internal (th, SIGCANCEL); ++ ++ break; ++ } ++ ++ /* A single-threaded process should be able to kill itself, since ++ there is nothing in the POSIX specification that says that it ++ cannot. So we set multiple_threads to true so that cancellation ++ points get executed. */ ++ THREAD_SETMEM (THREAD_SELF, header.multiple_threads, 1); + #ifndef TLS_MULTIPLE_THREADS_IN_TCB + __libc_multiple_threads = 1; + #endif +- +- THREAD_SETMEM (pd, result, PTHREAD_CANCELED); +- if (pd->cancelstate == PTHREAD_CANCEL_ENABLE +- && pd->canceltype == PTHREAD_CANCEL_ASYNCHRONOUS) +- __do_cancel (); +- return 0; + } ++ while (!atomic_compare_exchange_weak_acquire (&pd->cancelhandling, &oldval, ++ newval)); + +- return __pthread_kill_internal (th, SIGCANCEL); ++ return result; + } + versioned_symbol (libc, __pthread_cancel, pthread_cancel, GLIBC_2_34); + +diff --git a/nptl/pthread_join_common.c b/nptl/pthread_join_common.c +index 7303069316caef13..617056ef10671607 100644 +--- a/nptl/pthread_join_common.c ++++ b/nptl/pthread_join_common.c +@@ -57,12 +57,9 @@ __pthread_clockjoin_ex (pthread_t threadid, void **thread_return, + if ((pd == self + || (self->joinid == pd + && (pd->cancelhandling +- & (CANCELED_BITMASK | EXITING_BITMASK ++ & (CANCELING_BITMASK | CANCELED_BITMASK | EXITING_BITMASK + | TERMINATED_BITMASK)) == 0)) +- && !(self->cancelstate == PTHREAD_CANCEL_ENABLE +- && (pd->cancelhandling & (CANCELED_BITMASK | EXITING_BITMASK +- | TERMINATED_BITMASK)) +- == CANCELED_BITMASK)) ++ && !cancel_enabled_and_canceled (self->cancelhandling)) + /* This is a deadlock situation. The threads are waiting for each + other to finish. Note that this is a "may" error. To be 100% + sure we catch this error we would have to lock the data +diff --git a/nptl/pthread_setcancelstate.c b/nptl/pthread_setcancelstate.c +index 7e2b6e4974bd58bd..cb567be5926816f1 100644 +--- a/nptl/pthread_setcancelstate.c ++++ b/nptl/pthread_setcancelstate.c +@@ -31,9 +31,29 @@ __pthread_setcancelstate (int state, int *oldstate) + + self = THREAD_SELF; + +- if (oldstate != NULL) +- *oldstate = self->cancelstate; +- self->cancelstate = state; ++ int oldval = atomic_load_relaxed (&self->cancelhandling); ++ while (1) ++ { ++ int newval = (state == PTHREAD_CANCEL_DISABLE ++ ? oldval | CANCELSTATE_BITMASK ++ : oldval & ~CANCELSTATE_BITMASK); ++ ++ if (oldstate != NULL) ++ *oldstate = ((oldval & CANCELSTATE_BITMASK) ++ ? PTHREAD_CANCEL_DISABLE : PTHREAD_CANCEL_ENABLE); ++ ++ if (oldval == newval) ++ break; ++ ++ if (atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &oldval, newval)) ++ { ++ if (cancel_enabled_and_canceled_and_async (newval)) ++ __do_cancel (); ++ ++ break; ++ } ++ } + + return 0; + } +diff --git a/nptl/pthread_setcanceltype.c b/nptl/pthread_setcanceltype.c +index e7b24ae733dcc0f2..e08ff7b141f904f1 100644 +--- a/nptl/pthread_setcanceltype.c ++++ b/nptl/pthread_setcanceltype.c +@@ -29,11 +29,32 @@ __pthread_setcanceltype (int type, int *oldtype) + + volatile struct pthread *self = THREAD_SELF; + +- if (oldtype != NULL) +- *oldtype = self->canceltype; +- self->canceltype = type; +- if (type == PTHREAD_CANCEL_ASYNCHRONOUS) +- __pthread_testcancel (); ++ int oldval = atomic_load_relaxed (&self->cancelhandling); ++ while (1) ++ { ++ int newval = (type == PTHREAD_CANCEL_ASYNCHRONOUS ++ ? oldval | CANCELTYPE_BITMASK ++ : oldval & ~CANCELTYPE_BITMASK); ++ ++ if (oldtype != NULL) ++ *oldtype = ((oldval & CANCELTYPE_BITMASK) ++ ? PTHREAD_CANCEL_ASYNCHRONOUS : PTHREAD_CANCEL_DEFERRED); ++ ++ if (oldval == newval) ++ break; ++ ++ if (atomic_compare_exchange_weak_acquire (&self->cancelhandling, ++ &oldval, newval)) ++ { ++ if (cancel_enabled_and_canceled_and_async (newval)) ++ { ++ THREAD_SETMEM (self, result, PTHREAD_CANCELED); ++ __do_cancel (); ++ } ++ ++ break; ++ } ++ } + + return 0; + } +diff --git a/nptl/pthread_testcancel.c b/nptl/pthread_testcancel.c +index 31185d89f2ab84c6..25230215fd607e8b 100644 +--- a/nptl/pthread_testcancel.c ++++ b/nptl/pthread_testcancel.c +@@ -24,13 +24,10 @@ void + ___pthread_testcancel (void) + { + struct pthread *self = THREAD_SELF; +- int cancelhandling = THREAD_GETMEM (self, cancelhandling); +- if (self->cancelstate == PTHREAD_CANCEL_ENABLE +- && (cancelhandling & CANCELED_BITMASK) +- && !(cancelhandling & EXITING_BITMASK) +- && !(cancelhandling & TERMINATED_BITMASK)) ++ int cancelhandling = atomic_load_relaxed (&self->cancelhandling); ++ if (cancel_enabled_and_canceled (cancelhandling)) + { +- THREAD_SETMEM (self, result, PTHREAD_CANCELED); ++ self->result = PTHREAD_CANCELED; + __do_cancel (); + } + } +diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c +index b39dfbff2c6678d5..23aa4cfc0b784dfc 100644 +--- a/sysdeps/nptl/dl-tls_init_tp.c ++++ b/sysdeps/nptl/dl-tls_init_tp.c +@@ -107,7 +107,4 @@ __tls_init_tp (void) + It will be bigger than it actually is, but for unwind.c/pt-longjmp.c + purposes this is good enough. */ + THREAD_SETMEM (pd, stackblock_size, (size_t) __libc_stack_end); +- +- THREAD_SETMEM (pd, cancelstate, PTHREAD_CANCEL_ENABLE); +- THREAD_SETMEM (pd, canceltype, PTHREAD_CANCEL_DEFERRED); + } +diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h +index 374657a2fd0ee19a..b968afc4c6b61b92 100644 +--- a/sysdeps/nptl/pthreadP.h ++++ b/sysdeps/nptl/pthreadP.h +@@ -276,7 +276,7 @@ __do_cancel (void) + struct pthread *self = THREAD_SELF; + + /* Make sure we get no more cancellations. */ +- THREAD_ATOMIC_BIT_SET (self, cancelhandling, EXITING_BIT); ++ atomic_bit_set (&self->cancelhandling, EXITING_BIT); + + __pthread_unwind ((__pthread_unwind_buf_t *) + THREAD_GETMEM (self, cleanup_jmp_buf)); +diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile +index c65710169697ad95..00419c4d199df912 100644 +--- a/sysdeps/pthread/Makefile ++++ b/sysdeps/pthread/Makefile +@@ -69,6 +69,7 @@ tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \ + tst-cancel12 tst-cancel13 tst-cancel14 tst-cancel15 tst-cancel16 \ + tst-cancel18 tst-cancel19 tst-cancel20 tst-cancel21 \ + tst-cancel22 tst-cancel23 tst-cancel26 tst-cancel27 tst-cancel28 \ ++ tst-cancel29 \ + tst-cleanup0 tst-cleanup1 tst-cleanup2 tst-cleanup3 \ + tst-clock1 \ + tst-cond-except \ +diff --git a/sysdeps/pthread/tst-cancel29.c b/sysdeps/pthread/tst-cancel29.c +new file mode 100644 +index 0000000000000000..4f0d99e002883be4 +--- /dev/null ++++ b/sysdeps/pthread/tst-cancel29.c +@@ -0,0 +1,207 @@ ++/* Check if a thread that disables cancellation and which call functions ++ that might be interrupted by a signal do not see the internal SIGCANCEL. ++ ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* On Linux some interfaces are never restarted after being interrupted by ++ a signal handler, regardless of the use of SA_RESTART. It means that ++ if asynchronous cancellation is not enabled, the pthread_cancel can not ++ set the internal SIGCANCEL otherwise the interface might see a spurious ++ EINTR failure. */ ++ ++static pthread_barrier_t b; ++ ++/* Cleanup handling test. */ ++static int cl_called; ++static void ++cl (void *arg) ++{ ++ ++cl_called; ++} ++ ++static void * ++tf_sigtimedwait (void *arg) ++{ ++ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); ++ xpthread_barrier_wait (&b); ++ ++ int r; ++ pthread_cleanup_push (cl, NULL); ++ ++ sigset_t mask; ++ sigemptyset (&mask); ++ r = sigtimedwait (&mask, NULL, &(struct timespec) { 0, 250000000 }); ++ if (r != -1) ++ return (void*) -1; ++ if (errno != EAGAIN) ++ return (void*) -2; ++ ++ pthread_cleanup_pop (0); ++ return NULL; ++} ++ ++static void * ++tf_poll (void *arg) ++{ ++ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); ++ xpthread_barrier_wait (&b); ++ ++ int r; ++ pthread_cleanup_push (cl, NULL); ++ ++ r = poll (NULL, 0, 250); ++ if (r != 0) ++ return (void*) -1; ++ ++ pthread_cleanup_pop (0); ++ return NULL; ++} ++ ++static void * ++tf_ppoll (void *arg) ++{ ++ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); ++ ++ xpthread_barrier_wait (&b); ++ ++ int r; ++ pthread_cleanup_push (cl, NULL); ++ ++ r = ppoll (NULL, 0, &(struct timespec) { 0, 250000000 }, NULL); ++ if (r != 0) ++ return (void*) -1; ++ ++ pthread_cleanup_pop (0); ++ return NULL; ++} ++ ++static void * ++tf_select (void *arg) ++{ ++ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); ++ xpthread_barrier_wait (&b); ++ ++ int r; ++ pthread_cleanup_push (cl, NULL); ++ ++ r = select (0, NULL, NULL, NULL, &(struct timeval) { 0, 250000 }); ++ if (r != 0) ++ return (void*) -1; ++ ++ pthread_cleanup_pop (0); ++ return NULL; ++} ++ ++static void * ++tf_pselect (void *arg) ++{ ++ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); ++ xpthread_barrier_wait (&b); ++ ++ int r; ++ pthread_cleanup_push (cl, NULL); ++ ++ r = pselect (0, NULL, NULL, NULL, &(struct timespec) { 0, 250000000 }, NULL); ++ if (r != 0) ++ return (void*) -1; ++ ++ pthread_cleanup_pop (0); ++ return NULL; ++} ++ ++static void * ++tf_clock_nanosleep (void *arg) ++{ ++ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); ++ xpthread_barrier_wait (&b); ++ ++ int r; ++ pthread_cleanup_push (cl, NULL); ++ ++ r = clock_nanosleep (CLOCK_REALTIME, 0, &(struct timespec) { 0, 250000000 }, ++ NULL); ++ if (r != 0) ++ return (void*) -1; ++ ++ pthread_cleanup_pop (0); ++ return NULL; ++} ++ ++struct cancel_test_t ++{ ++ const char *name; ++ void * (*cf) (void *); ++} tests[] = ++{ ++ { "sigtimedwait", tf_sigtimedwait, }, ++ { "poll", tf_poll, }, ++ { "ppoll", tf_ppoll, }, ++ { "select", tf_select, }, ++ { "pselect", tf_pselect , }, ++ { "clock_nanosleep", tf_clock_nanosleep, }, ++}; ++ ++static int ++do_test (void) ++{ ++ for (int i = 0; i < array_length (tests); i++) ++ { ++ xpthread_barrier_init (&b, NULL, 2); ++ ++ cl_called = 0; ++ ++ pthread_t th = xpthread_create (NULL, tests[i].cf, NULL); ++ ++ xpthread_barrier_wait (&b); ++ ++ struct timespec ts = { .tv_sec = 0, .tv_nsec = 100000000 }; ++ while (nanosleep (&ts, &ts) != 0) ++ continue; ++ ++ xpthread_cancel (th); ++ ++ void *status = xpthread_join (th); ++ if (status != NULL) ++ printf ("test '%s' failed: %" PRIdPTR "\n", tests[i].name, ++ (intptr_t) status); ++ TEST_VERIFY (status == NULL); ++ ++ xpthread_barrier_destroy (&b); ++ ++ TEST_COMPARE (cl_called, 0); ++ ++ printf ("in-time cancel test of '%s' successful\n", tests[i].name); ++ } ++ ++ return 0; ++} ++ ++#include diff --git a/glibc-upstream-2.34-164.patch b/glibc-upstream-2.34-164.patch new file mode 100644 index 0000000..dbe230b --- /dev/null +++ b/glibc-upstream-2.34-164.patch @@ -0,0 +1,23 @@ +commit 5d8c7776343b3f1b96ef7777e4504378f23c041a +Author: Samuel Thibault +Date: Tue Apr 12 22:14:34 2022 +0200 + + hurd: Fix arbitrary error code + + ELIBBAD is Linux-specific. + + (cherry picked from commit 67ab66541dc1164540abda284645e38be90b5119) + +diff --git a/nss/nss_test_errno.c b/nss/nss_test_errno.c +index 680f8a07b97fe263..59a5c717bebd296f 100644 +--- a/nss/nss_test_errno.c ++++ b/nss/nss_test_errno.c +@@ -28,7 +28,7 @@ static void __attribute__ ((constructor)) + init (void) + { + /* An arbitrary error code which is otherwise not used. */ +- errno = ELIBBAD; ++ errno = -1009; + } + + /* Lookup functions for pwd follow that do not return any data. */ diff --git a/glibc-upstream-2.34-165.patch b/glibc-upstream-2.34-165.patch new file mode 100644 index 0000000..1bab4fc --- /dev/null +++ b/glibc-upstream-2.34-165.patch @@ -0,0 +1,104 @@ +commit b87b697f15d6bf7e576a2eeadc1f740172f9d013 +Author: =Joshua Kinard +Date: Mon Apr 18 09:55:08 2022 -0300 + + mips: Fix mips64n32 64 bit time_t stat support (BZ#29069) + + Add missing support initially added by 4e8521333bea6e89fcef1020 + (which missed n32 stat). + + (cherry picked from commit 78fb88827362fbd2cc8aa32892ae5b015106e25c) + +diff --git a/sysdeps/unix/sysv/linux/mips/bits/struct_stat.h b/sysdeps/unix/sysv/linux/mips/bits/struct_stat.h +index ab9f474cbc271b7c..ed5b1bc00ba52406 100644 +--- a/sysdeps/unix/sysv/linux/mips/bits/struct_stat.h ++++ b/sysdeps/unix/sysv/linux/mips/bits/struct_stat.h +@@ -131,27 +131,30 @@ struct stat64 + + struct stat + { ++# ifdef __USE_TIME_BITS64 ++# include ++# else + __dev_t st_dev; + int st_pad1[3]; /* Reserved for st_dev expansion */ +-# ifndef __USE_FILE_OFFSET64 ++# ifndef __USE_FILE_OFFSET64 + __ino_t st_ino; +-# else ++# else + __ino64_t st_ino; +-# endif ++# endif + __mode_t st_mode; + __nlink_t st_nlink; + __uid_t st_uid; + __gid_t st_gid; + __dev_t st_rdev; +-# if !defined __USE_FILE_OFFSET64 ++# if !defined __USE_FILE_OFFSET64 + unsigned int st_pad2[2]; /* Reserved for st_rdev expansion */ + __off_t st_size; + int st_pad3; +-# else ++# else + unsigned int st_pad2[3]; /* Reserved for st_rdev expansion */ + __off64_t st_size; +-# endif +-# ifdef __USE_XOPEN2K8 ++# endif ++# ifdef __USE_XOPEN2K8 + /* Nanosecond resolution timestamps are stored in a format + equivalent to 'struct timespec'. This is the type used + whenever possible but the Unix namespace rules do not allow the +@@ -161,30 +164,34 @@ struct stat + struct timespec st_atim; /* Time of last access. */ + struct timespec st_mtim; /* Time of last modification. */ + struct timespec st_ctim; /* Time of last status change. */ +-# define st_atime st_atim.tv_sec /* Backward compatibility. */ +-# define st_mtime st_mtim.tv_sec +-# define st_ctime st_ctim.tv_sec +-# else ++# define st_atime st_atim.tv_sec /* Backward compatibility. */ ++# define st_mtime st_mtim.tv_sec ++# define st_ctime st_ctim.tv_sec ++# else + __time_t st_atime; /* Time of last access. */ + unsigned long int st_atimensec; /* Nscecs of last access. */ + __time_t st_mtime; /* Time of last modification. */ + unsigned long int st_mtimensec; /* Nsecs of last modification. */ + __time_t st_ctime; /* Time of last status change. */ + unsigned long int st_ctimensec; /* Nsecs of last status change. */ +-# endif ++# endif + __blksize_t st_blksize; + unsigned int st_pad4; +-# ifndef __USE_FILE_OFFSET64 ++# ifndef __USE_FILE_OFFSET64 + __blkcnt_t st_blocks; +-# else ++# else + __blkcnt64_t st_blocks; +-# endif ++# endif + int st_pad5[14]; ++# endif + }; + + #ifdef __USE_LARGEFILE64 + struct stat64 + { ++# ifdef __USE_TIME_BITS64 ++# include ++# else + __dev_t st_dev; + unsigned int st_pad1[3]; /* Reserved for st_dev expansion */ + __ino64_t st_ino; +@@ -217,6 +224,7 @@ struct stat64 + unsigned int st_pad3; + __blkcnt64_t st_blocks; + int st_pad4[14]; ++# endif /* __USE_TIME_BITS64 */ + }; + #endif + diff --git a/glibc-upstream-2.34-166.patch b/glibc-upstream-2.34-166.patch new file mode 100644 index 0000000..c2db463 --- /dev/null +++ b/glibc-upstream-2.34-166.patch @@ -0,0 +1,35 @@ +commit 71326f1f2fd09dafb9c34404765fb88129e94237 +Author: Adhemerval Zanella +Date: Wed Apr 20 12:01:43 2022 -0300 + + nptl: Fix pthread_cancel cancelhandling atomic operations + + The 404656009b reversion did not setup the atomic loop to set the + cancel bits correctly. The fix is essentially what pthread_cancel + did prior 26cfbb7162ad. + + Checked on x86_64-linux-gnu and aarch64-linux-gnu. + + (cherry picked from commit 62be9681677e7ce820db721c126909979382d379) + +diff --git a/nptl/pthread_cancel.c b/nptl/pthread_cancel.c +index 2680b55586e035fe..64fd183fde59907b 100644 +--- a/nptl/pthread_cancel.c ++++ b/nptl/pthread_cancel.c +@@ -122,6 +122,7 @@ __pthread_cancel (pthread_t th) + int newval; + do + { ++ again: + newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK; + if (oldval == newval) + break; +@@ -135,7 +136,7 @@ __pthread_cancel (pthread_t th) + int newval2 = oldval | CANCELING_BITMASK; + if (!atomic_compare_exchange_weak_acquire (&pd->cancelhandling, + &oldval, newval2)) +- continue; ++ goto again; + + if (pd == THREAD_SELF) + /* This is not merely an optimization: An application may diff --git a/glibc-upstream-2.34-167.patch b/glibc-upstream-2.34-167.patch new file mode 100644 index 0000000..e00042d --- /dev/null +++ b/glibc-upstream-2.34-167.patch @@ -0,0 +1,1446 @@ +commit 3e0a91b79b409a6a4113a0fdb08221c0bb29cfce +Author: Florian Weimer +Date: Mon Apr 11 11:28:08 2022 +0200 + + scripts: Add glibcelf.py module + + Hopefully, this will lead to tests that are easier to maintain. The + current approach of parsing readelf -W output using regular expressions + is not necessarily easier than parsing the ELF data directly. + + This module is still somewhat incomplete (e.g., coverage of relocation + types and versioning information is missing), but it is sufficient to + perform basic symbol analysis or program header analysis. + + The EM_* mapping for architecture-specific constant classes (e.g., + SttX86_64) is not yet implemented. The classes are defined for the + benefit of elf/tst-glibcelf.py. + + Reviewed-by: Siddhesh Poyarekar + (cherry picked from commit 30035d67728a846fa39749cd162afd278ac654c4) + +diff --git a/elf/Makefile b/elf/Makefile +index 8e2dd91c583f9a62..8afbe3f6ab259331 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -1053,6 +1053,13 @@ CFLAGS-tst-prelink.c += -fno-pie + tst-prelink-no-pie = yes + endif + ++tests-special += $(objpfx)tst-glibcelf.out ++$(objpfx)tst-glibcelf.out: tst-glibcelf.py elf.h $(..)/scripts/glibcelf.py \ ++ $(..)/scripts/glibcextract.py ++ PYTHONPATH=$(..)scripts $(PYTHON) tst-glibcelf.py \ ++ --cc="$(CC) $(patsubst -DMODULE_NAME=%,-DMODULE_NAME=testsuite,$(CPPFLAGS))" \ ++ < /dev/null > $@ 2>&1; $(evaluate-test) ++ + # The test requires shared _and_ PIE because the executable + # unit test driver must be able to link with the shared object + # that is going to eventually go into an installed DSO. +diff --git a/elf/tst-glibcelf.py b/elf/tst-glibcelf.py +new file mode 100644 +index 0000000000000000..bf15a3bad4479e08 +--- /dev/null ++++ b/elf/tst-glibcelf.py +@@ -0,0 +1,260 @@ ++#!/usr/bin/python3 ++# Verify scripts/glibcelf.py contents against elf/elf.h. ++# Copyright (C) 2022 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++# ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++import argparse ++import enum ++import sys ++ ++import glibcelf ++import glibcextract ++ ++errors_encountered = 0 ++ ++def error(message): ++ global errors_encountered ++ sys.stdout.write('error: {}\n'.format(message)) ++ errors_encountered += 1 ++ ++# The enum constants in glibcelf are expected to have exactly these ++# prefixes. ++expected_constant_prefixes = tuple( ++ 'ELFCLASS ELFDATA EM_ ET_ DT_ PF_ PT_ SHF_ SHN_ SHT_ STB_ STT_'.split()) ++ ++def find_constant_prefix(name): ++ """Returns a matching prefix from expected_constant_prefixes or None.""" ++ for prefix in expected_constant_prefixes: ++ if name.startswith(prefix): ++ return prefix ++ return None ++ ++def find_enum_types(): ++ """A generator for OpenIntEnum and IntFlag classes in glibcelf.""" ++ for obj in vars(glibcelf).values(): ++ if isinstance(obj, type) and obj.__bases__[0] in ( ++ glibcelf._OpenIntEnum, enum.Enum, enum.IntFlag): ++ yield obj ++ ++def check_duplicates(): ++ """Verifies that enum types do not have duplicate values. ++ ++ Different types must have different member names, too. ++ ++ """ ++ global_seen = {} ++ for typ in find_enum_types(): ++ seen = {} ++ last = None ++ for (name, e) in typ.__members__.items(): ++ if e.value in seen: ++ error('{} has {}={} and {}={}'.format( ++ typ, seen[e.value], e.value, name, e.value)) ++ last = e ++ else: ++ seen[e.value] = name ++ if last is not None and last.value > e.value: ++ error('{} has {}={} after {}={}'.format( ++ typ, name, e.value, last.name, last.value)) ++ if name in global_seen: ++ error('{} used in {} and {}'.format( ++ name, global_seen[name], typ)) ++ else: ++ global_seen[name] = typ ++ ++def check_constant_prefixes(): ++ """Check that the constant prefixes match expected_constant_prefixes.""" ++ seen = set() ++ for typ in find_enum_types(): ++ typ_prefix = None ++ for val in typ: ++ prefix = find_constant_prefix(val.name) ++ if prefix is None: ++ error('constant {!r} for {} has unknown prefix'.format( ++ val, typ)) ++ break ++ elif typ_prefix is None: ++ typ_prefix = prefix ++ seen.add(typ_prefix) ++ elif prefix != typ_prefix: ++ error('prefix {!r} for constant {!r}, expected {!r}'.format( ++ prefix, val, typ_prefix)) ++ if typ_prefix is None: ++ error('empty enum type {}'.format(typ)) ++ ++ for prefix in sorted(set(expected_constant_prefixes) - seen): ++ error('missing constant prefix {!r}'.format(prefix)) ++ # Reverse difference is already covered inside the loop. ++ ++def find_elf_h_constants(cc): ++ """Returns a dictionary of relevant constants from .""" ++ return glibcextract.compute_macro_consts( ++ source_text='#include ', ++ cc=cc, ++ macro_re='|'.join( ++ prefix + '.*' for prefix in expected_constant_prefixes)) ++ ++# The first part of the pair is a name of an constant that is ++# dropped from glibcelf. The second part is the constant as it is ++# used in . ++glibcelf_skipped_aliases = ( ++ ('EM_ARC_A5', 'EM_ARC_COMPACT'), ++ ('PF_PARISC_SBP', 'PF_HP_SBP') ++) ++ ++# Constants that provide little value and are not included in ++# glibcelf: *LO*/*HI* range constants, *NUM constants counting the ++# number of constants. Also includes the alias names from ++# glibcelf_skipped_aliases. ++glibcelf_skipped_constants = frozenset( ++ [e[0] for e in glibcelf_skipped_aliases]) | frozenset(""" ++DT_AARCH64_NUM ++DT_ADDRNUM ++DT_ADDRRNGHI ++DT_ADDRRNGLO ++DT_ALPHA_NUM ++DT_ENCODING ++DT_EXTRANUM ++DT_HIOS ++DT_HIPROC ++DT_IA_64_NUM ++DT_LOOS ++DT_LOPROC ++DT_MIPS_NUM ++DT_NUM ++DT_PPC64_NUM ++DT_PPC_NUM ++DT_PROCNUM ++DT_SPARC_NUM ++DT_VALNUM ++DT_VALRNGHI ++DT_VALRNGLO ++DT_VERSIONTAGNUM ++ELFCLASSNUM ++ELFDATANUM ++ET_HIOS ++ET_HIPROC ++ET_LOOS ++ET_LOPROC ++ET_NUM ++PF_MASKOS ++PF_MASKPROC ++PT_HIOS ++PT_HIPROC ++PT_HISUNW ++PT_LOOS ++PT_LOPROC ++PT_LOSUNW ++SHF_MASKOS ++SHF_MASKPROC ++SHN_HIOS ++SHN_HIPROC ++SHN_HIRESERVE ++SHN_LOOS ++SHN_LOPROC ++SHN_LORESERVE ++SHT_HIOS ++SHT_HIPROC ++SHT_HIPROC ++SHT_HISUNW ++SHT_HIUSER ++SHT_LOOS ++SHT_LOPROC ++SHT_LOSUNW ++SHT_LOUSER ++SHT_NUM ++STB_HIOS ++STB_HIPROC ++STB_LOOS ++STB_LOPROC ++STB_NUM ++STT_HIOS ++STT_HIPROC ++STT_LOOS ++STT_LOPROC ++STT_NUM ++""".strip().split()) ++ ++def check_constant_values(cc): ++ """Checks the values of constants against glibcelf.""" ++ ++ glibcelf_constants = { ++ e.name: e for typ in find_enum_types() for e in typ} ++ elf_h_constants = find_elf_h_constants(cc=cc) ++ ++ missing_in_glibcelf = (set(elf_h_constants) - set(glibcelf_constants) ++ - glibcelf_skipped_constants) ++ for name in sorted(missing_in_glibcelf): ++ error('constant {} is missing from glibcelf'.format(name)) ++ ++ unexpected_in_glibcelf = \ ++ set(glibcelf_constants) & glibcelf_skipped_constants ++ for name in sorted(unexpected_in_glibcelf): ++ error('constant {} is supposed to be filtered from glibcelf'.format( ++ name)) ++ ++ missing_in_elf_h = set(glibcelf_constants) - set(elf_h_constants) ++ for name in sorted(missing_in_elf_h): ++ error('constant {} is missing from '.format(name)) ++ ++ expected_in_elf_h = glibcelf_skipped_constants - set(elf_h_constants) ++ for name in expected_in_elf_h: ++ error('filtered constant {} is missing from '.format(name)) ++ ++ for alias_name, name_in_glibcelf in glibcelf_skipped_aliases: ++ if name_in_glibcelf not in glibcelf_constants: ++ error('alias value {} for {} not in glibcelf'.format( ++ name_in_glibcelf, alias_name)) ++ elif (int(elf_h_constants[alias_name]) ++ != glibcelf_constants[name_in_glibcelf].value): ++ error(' has {}={}, glibcelf has {}={}'.format( ++ alias_name, elf_h_constants[alias_name], ++ name_in_glibcelf, glibcelf_constants[name_in_glibcelf])) ++ ++ # Check for value mismatches: ++ for name in sorted(set(glibcelf_constants) & set(elf_h_constants)): ++ glibcelf_value = glibcelf_constants[name].value ++ elf_h_value = int(elf_h_constants[name]) ++ # On 32-bit architectures as some constants that are ++ # parsed as signed, while they are unsigned in glibcelf. So ++ # far, this only affects some flag constants, so special-case ++ # them here. ++ if (glibcelf_value != elf_h_value ++ and not (isinstance(glibcelf_constants[name], enum.IntFlag) ++ and glibcelf_value == 1 << 31 ++ and elf_h_value == -(1 << 31))): ++ error('{}: glibcelf has {!r}, has {!r}'.format( ++ name, glibcelf_value, elf_h_value)) ++ ++def main(): ++ """The main entry point.""" ++ parser = argparse.ArgumentParser( ++ description="Check glibcelf.py and elf.h against each other.") ++ parser.add_argument('--cc', metavar='CC', ++ help='C compiler (including options) to use') ++ args = parser.parse_args() ++ ++ check_duplicates() ++ check_constant_prefixes() ++ check_constant_values(cc=args.cc) ++ ++ if errors_encountered > 0: ++ print("note: errors encountered:", errors_encountered) ++ sys.exit(1) ++ ++if __name__ == '__main__': ++ main() +diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py +new file mode 100644 +index 0000000000000000..8f7d0ca184845714 +--- /dev/null ++++ b/scripts/glibcelf.py +@@ -0,0 +1,1135 @@ ++#!/usr/bin/python3 ++# ELF support functionality for Python. ++# Copyright (C) 2022 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++# ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++"""Basic ELF parser. ++ ++Use Image.readfile(path) to read an ELF file into memory and begin ++parsing it. ++ ++""" ++ ++import collections ++import enum ++import struct ++ ++class _OpenIntEnum(enum.IntEnum): ++ """Integer enumeration that supports arbitrary int values.""" ++ @classmethod ++ def _missing_(cls, value): ++ # See enum.IntFlag._create_pseudo_member_. This allows ++ # creating of enum constants with arbitrary integer values. ++ pseudo_member = int.__new__(cls, value) ++ pseudo_member._name_ = None ++ pseudo_member._value_ = value ++ return pseudo_member ++ ++ def __repr__(self): ++ name = self._name_ ++ if name is not None: ++ # The names have prefixes like SHT_, implying their type. ++ return name ++ return '{}({})'.format(self.__class__.__name__, self._value_) ++ ++ def __str__(self): ++ name = self._name_ ++ if name is not None: ++ return name ++ return str(self._value_) ++ ++class ElfClass(_OpenIntEnum): ++ """ELF word size. Type of EI_CLASS values.""" ++ ELFCLASSNONE = 0 ++ ELFCLASS32 = 1 ++ ELFCLASS64 = 2 ++ ++class ElfData(_OpenIntEnum): ++ """ELF endianess. Type of EI_DATA values.""" ++ ELFDATANONE = 0 ++ ELFDATA2LSB = 1 ++ ELFDATA2MSB = 2 ++ ++class Machine(_OpenIntEnum): ++ """ELF machine type. Type of values in Ehdr.e_machine field.""" ++ EM_NONE = 0 ++ EM_M32 = 1 ++ EM_SPARC = 2 ++ EM_386 = 3 ++ EM_68K = 4 ++ EM_88K = 5 ++ EM_IAMCU = 6 ++ EM_860 = 7 ++ EM_MIPS = 8 ++ EM_S370 = 9 ++ EM_MIPS_RS3_LE = 10 ++ EM_PARISC = 15 ++ EM_VPP500 = 17 ++ EM_SPARC32PLUS = 18 ++ EM_960 = 19 ++ EM_PPC = 20 ++ EM_PPC64 = 21 ++ EM_S390 = 22 ++ EM_SPU = 23 ++ EM_V800 = 36 ++ EM_FR20 = 37 ++ EM_RH32 = 38 ++ EM_RCE = 39 ++ EM_ARM = 40 ++ EM_FAKE_ALPHA = 41 ++ EM_SH = 42 ++ EM_SPARCV9 = 43 ++ EM_TRICORE = 44 ++ EM_ARC = 45 ++ EM_H8_300 = 46 ++ EM_H8_300H = 47 ++ EM_H8S = 48 ++ EM_H8_500 = 49 ++ EM_IA_64 = 50 ++ EM_MIPS_X = 51 ++ EM_COLDFIRE = 52 ++ EM_68HC12 = 53 ++ EM_MMA = 54 ++ EM_PCP = 55 ++ EM_NCPU = 56 ++ EM_NDR1 = 57 ++ EM_STARCORE = 58 ++ EM_ME16 = 59 ++ EM_ST100 = 60 ++ EM_TINYJ = 61 ++ EM_X86_64 = 62 ++ EM_PDSP = 63 ++ EM_PDP10 = 64 ++ EM_PDP11 = 65 ++ EM_FX66 = 66 ++ EM_ST9PLUS = 67 ++ EM_ST7 = 68 ++ EM_68HC16 = 69 ++ EM_68HC11 = 70 ++ EM_68HC08 = 71 ++ EM_68HC05 = 72 ++ EM_SVX = 73 ++ EM_ST19 = 74 ++ EM_VAX = 75 ++ EM_CRIS = 76 ++ EM_JAVELIN = 77 ++ EM_FIREPATH = 78 ++ EM_ZSP = 79 ++ EM_MMIX = 80 ++ EM_HUANY = 81 ++ EM_PRISM = 82 ++ EM_AVR = 83 ++ EM_FR30 = 84 ++ EM_D10V = 85 ++ EM_D30V = 86 ++ EM_V850 = 87 ++ EM_M32R = 88 ++ EM_MN10300 = 89 ++ EM_MN10200 = 90 ++ EM_PJ = 91 ++ EM_OPENRISC = 92 ++ EM_ARC_COMPACT = 93 ++ EM_XTENSA = 94 ++ EM_VIDEOCORE = 95 ++ EM_TMM_GPP = 96 ++ EM_NS32K = 97 ++ EM_TPC = 98 ++ EM_SNP1K = 99 ++ EM_ST200 = 100 ++ EM_IP2K = 101 ++ EM_MAX = 102 ++ EM_CR = 103 ++ EM_F2MC16 = 104 ++ EM_MSP430 = 105 ++ EM_BLACKFIN = 106 ++ EM_SE_C33 = 107 ++ EM_SEP = 108 ++ EM_ARCA = 109 ++ EM_UNICORE = 110 ++ EM_EXCESS = 111 ++ EM_DXP = 112 ++ EM_ALTERA_NIOS2 = 113 ++ EM_CRX = 114 ++ EM_XGATE = 115 ++ EM_C166 = 116 ++ EM_M16C = 117 ++ EM_DSPIC30F = 118 ++ EM_CE = 119 ++ EM_M32C = 120 ++ EM_TSK3000 = 131 ++ EM_RS08 = 132 ++ EM_SHARC = 133 ++ EM_ECOG2 = 134 ++ EM_SCORE7 = 135 ++ EM_DSP24 = 136 ++ EM_VIDEOCORE3 = 137 ++ EM_LATTICEMICO32 = 138 ++ EM_SE_C17 = 139 ++ EM_TI_C6000 = 140 ++ EM_TI_C2000 = 141 ++ EM_TI_C5500 = 142 ++ EM_TI_ARP32 = 143 ++ EM_TI_PRU = 144 ++ EM_MMDSP_PLUS = 160 ++ EM_CYPRESS_M8C = 161 ++ EM_R32C = 162 ++ EM_TRIMEDIA = 163 ++ EM_QDSP6 = 164 ++ EM_8051 = 165 ++ EM_STXP7X = 166 ++ EM_NDS32 = 167 ++ EM_ECOG1X = 168 ++ EM_MAXQ30 = 169 ++ EM_XIMO16 = 170 ++ EM_MANIK = 171 ++ EM_CRAYNV2 = 172 ++ EM_RX = 173 ++ EM_METAG = 174 ++ EM_MCST_ELBRUS = 175 ++ EM_ECOG16 = 176 ++ EM_CR16 = 177 ++ EM_ETPU = 178 ++ EM_SLE9X = 179 ++ EM_L10M = 180 ++ EM_K10M = 181 ++ EM_AARCH64 = 183 ++ EM_AVR32 = 185 ++ EM_STM8 = 186 ++ EM_TILE64 = 187 ++ EM_TILEPRO = 188 ++ EM_MICROBLAZE = 189 ++ EM_CUDA = 190 ++ EM_TILEGX = 191 ++ EM_CLOUDSHIELD = 192 ++ EM_COREA_1ST = 193 ++ EM_COREA_2ND = 194 ++ EM_ARCV2 = 195 ++ EM_OPEN8 = 196 ++ EM_RL78 = 197 ++ EM_VIDEOCORE5 = 198 ++ EM_78KOR = 199 ++ EM_56800EX = 200 ++ EM_BA1 = 201 ++ EM_BA2 = 202 ++ EM_XCORE = 203 ++ EM_MCHP_PIC = 204 ++ EM_INTELGT = 205 ++ EM_KM32 = 210 ++ EM_KMX32 = 211 ++ EM_EMX16 = 212 ++ EM_EMX8 = 213 ++ EM_KVARC = 214 ++ EM_CDP = 215 ++ EM_COGE = 216 ++ EM_COOL = 217 ++ EM_NORC = 218 ++ EM_CSR_KALIMBA = 219 ++ EM_Z80 = 220 ++ EM_VISIUM = 221 ++ EM_FT32 = 222 ++ EM_MOXIE = 223 ++ EM_AMDGPU = 224 ++ EM_RISCV = 243 ++ EM_BPF = 247 ++ EM_CSKY = 252 ++ EM_NUM = 253 ++ EM_ALPHA = 0x9026 ++ ++class Et(_OpenIntEnum): ++ """ELF file type. Type of ET_* values and the Ehdr.e_type field.""" ++ ET_NONE = 0 ++ ET_REL = 1 ++ ET_EXEC = 2 ++ ET_DYN = 3 ++ ET_CORE = 4 ++ ++class Shn(_OpenIntEnum): ++ """ELF reserved section indices.""" ++ SHN_UNDEF = 0 ++ SHN_BEFORE = 0xff00 ++ SHN_AFTER = 0xff01 ++ SHN_ABS = 0xfff1 ++ SHN_COMMON = 0xfff2 ++ SHN_XINDEX = 0xffff ++ ++class ShnMIPS(enum.Enum): ++ """Supplemental SHN_* constants for EM_MIPS.""" ++ SHN_MIPS_ACOMMON = 0xff00 ++ SHN_MIPS_TEXT = 0xff01 ++ SHN_MIPS_DATA = 0xff02 ++ SHN_MIPS_SCOMMON = 0xff03 ++ SHN_MIPS_SUNDEFINED = 0xff04 ++ ++class ShnPARISC(enum.Enum): ++ """Supplemental SHN_* constants for EM_PARISC.""" ++ SHN_PARISC_ANSI_COMMON = 0xff00 ++ SHN_PARISC_HUGE_COMMON = 0xff01 ++ ++class Sht(_OpenIntEnum): ++ """ELF section types. Type of SHT_* values.""" ++ SHT_NULL = 0 ++ SHT_PROGBITS = 1 ++ SHT_SYMTAB = 2 ++ SHT_STRTAB = 3 ++ SHT_RELA = 4 ++ SHT_HASH = 5 ++ SHT_DYNAMIC = 6 ++ SHT_NOTE = 7 ++ SHT_NOBITS = 8 ++ SHT_REL = 9 ++ SHT_SHLIB = 10 ++ SHT_DYNSYM = 11 ++ SHT_INIT_ARRAY = 14 ++ SHT_FINI_ARRAY = 15 ++ SHT_PREINIT_ARRAY = 16 ++ SHT_GROUP = 17 ++ SHT_SYMTAB_SHNDX = 18 ++ SHT_GNU_ATTRIBUTES = 0x6ffffff5 ++ SHT_GNU_HASH = 0x6ffffff6 ++ SHT_GNU_LIBLIST = 0x6ffffff7 ++ SHT_CHECKSUM = 0x6ffffff8 ++ SHT_SUNW_move = 0x6ffffffa ++ SHT_SUNW_COMDAT = 0x6ffffffb ++ SHT_SUNW_syminfo = 0x6ffffffc ++ SHT_GNU_verdef = 0x6ffffffd ++ SHT_GNU_verneed = 0x6ffffffe ++ SHT_GNU_versym = 0x6fffffff ++ ++class ShtALPHA(enum.Enum): ++ """Supplemental SHT_* constants for EM_ALPHA.""" ++ SHT_ALPHA_DEBUG = 0x70000001 ++ SHT_ALPHA_REGINFO = 0x70000002 ++ ++class ShtARM(enum.Enum): ++ """Supplemental SHT_* constants for EM_ARM.""" ++ SHT_ARM_EXIDX = 0x70000001 ++ SHT_ARM_PREEMPTMAP = 0x70000002 ++ SHT_ARM_ATTRIBUTES = 0x70000003 ++ ++class ShtCSKY(enum.Enum): ++ """Supplemental SHT_* constants for EM_CSKY.""" ++ SHT_CSKY_ATTRIBUTES = 0x70000001 ++ ++class ShtIA_64(enum.Enum): ++ """Supplemental SHT_* constants for EM_IA_64.""" ++ SHT_IA_64_EXT = 0x70000000 ++ SHT_IA_64_UNWIND = 0x70000001 ++ ++class ShtMIPS(enum.Enum): ++ """Supplemental SHT_* constants for EM_MIPS.""" ++ SHT_MIPS_LIBLIST = 0x70000000 ++ SHT_MIPS_MSYM = 0x70000001 ++ SHT_MIPS_CONFLICT = 0x70000002 ++ SHT_MIPS_GPTAB = 0x70000003 ++ SHT_MIPS_UCODE = 0x70000004 ++ SHT_MIPS_DEBUG = 0x70000005 ++ SHT_MIPS_REGINFO = 0x70000006 ++ SHT_MIPS_PACKAGE = 0x70000007 ++ SHT_MIPS_PACKSYM = 0x70000008 ++ SHT_MIPS_RELD = 0x70000009 ++ SHT_MIPS_IFACE = 0x7000000b ++ SHT_MIPS_CONTENT = 0x7000000c ++ SHT_MIPS_OPTIONS = 0x7000000d ++ SHT_MIPS_SHDR = 0x70000010 ++ SHT_MIPS_FDESC = 0x70000011 ++ SHT_MIPS_EXTSYM = 0x70000012 ++ SHT_MIPS_DENSE = 0x70000013 ++ SHT_MIPS_PDESC = 0x70000014 ++ SHT_MIPS_LOCSYM = 0x70000015 ++ SHT_MIPS_AUXSYM = 0x70000016 ++ SHT_MIPS_OPTSYM = 0x70000017 ++ SHT_MIPS_LOCSTR = 0x70000018 ++ SHT_MIPS_LINE = 0x70000019 ++ SHT_MIPS_RFDESC = 0x7000001a ++ SHT_MIPS_DELTASYM = 0x7000001b ++ SHT_MIPS_DELTAINST = 0x7000001c ++ SHT_MIPS_DELTACLASS = 0x7000001d ++ SHT_MIPS_DWARF = 0x7000001e ++ SHT_MIPS_DELTADECL = 0x7000001f ++ SHT_MIPS_SYMBOL_LIB = 0x70000020 ++ SHT_MIPS_EVENTS = 0x70000021 ++ SHT_MIPS_TRANSLATE = 0x70000022 ++ SHT_MIPS_PIXIE = 0x70000023 ++ SHT_MIPS_XLATE = 0x70000024 ++ SHT_MIPS_XLATE_DEBUG = 0x70000025 ++ SHT_MIPS_WHIRL = 0x70000026 ++ SHT_MIPS_EH_REGION = 0x70000027 ++ SHT_MIPS_XLATE_OLD = 0x70000028 ++ SHT_MIPS_PDR_EXCEPTION = 0x70000029 ++ SHT_MIPS_XHASH = 0x7000002b ++ ++class ShtPARISC(enum.Enum): ++ """Supplemental SHT_* constants for EM_PARISC.""" ++ SHT_PARISC_EXT = 0x70000000 ++ SHT_PARISC_UNWIND = 0x70000001 ++ SHT_PARISC_DOC = 0x70000002 ++ ++class Pf(enum.IntFlag): ++ """Program header flags. Type of Phdr.p_flags values.""" ++ PF_X = 1 ++ PF_W = 2 ++ PF_R = 4 ++ ++class PfARM(enum.IntFlag): ++ """Supplemental PF_* flags for EM_ARM.""" ++ PF_ARM_SB = 0x10000000 ++ PF_ARM_PI = 0x20000000 ++ PF_ARM_ABS = 0x40000000 ++ ++class PfPARISC(enum.IntFlag): ++ """Supplemental PF_* flags for EM_PARISC.""" ++ PF_HP_PAGE_SIZE = 0x00100000 ++ PF_HP_FAR_SHARED = 0x00200000 ++ PF_HP_NEAR_SHARED = 0x00400000 ++ PF_HP_CODE = 0x01000000 ++ PF_HP_MODIFY = 0x02000000 ++ PF_HP_LAZYSWAP = 0x04000000 ++ PF_HP_SBP = 0x08000000 ++ ++class PfIA_64(enum.IntFlag): ++ """Supplemental PF_* flags for EM_IA_64.""" ++ PF_IA_64_NORECOV = 0x80000000 ++ ++class PfMIPS(enum.IntFlag): ++ """Supplemental PF_* flags for EM_MIPS.""" ++ PF_MIPS_LOCAL = 0x10000000 ++ ++class Shf(enum.IntFlag): ++ """Section flags. Type of Shdr.sh_type values.""" ++ SHF_WRITE = 1 << 0 ++ SHF_ALLOC = 1 << 1 ++ SHF_EXECINSTR = 1 << 2 ++ SHF_MERGE = 1 << 4 ++ SHF_STRINGS = 1 << 5 ++ SHF_INFO_LINK = 1 << 6 ++ SHF_LINK_ORDER = 1 << 7 ++ SHF_OS_NONCONFORMING = 256 ++ SHF_GROUP = 1 << 9 ++ SHF_TLS = 1 << 10 ++ SHF_COMPRESSED = 1 << 11 ++ SHF_GNU_RETAIN = 1 << 21 ++ SHF_ORDERED = 1 << 30 ++ SHF_EXCLUDE = 1 << 31 ++ ++class ShfALPHA(enum.IntFlag): ++ """Supplemental SHF_* constants for EM_ALPHA.""" ++ SHF_ALPHA_GPREL = 0x10000000 ++ ++class ShfARM(enum.IntFlag): ++ """Supplemental SHF_* constants for EM_ARM.""" ++ SHF_ARM_ENTRYSECT = 0x10000000 ++ SHF_ARM_COMDEF = 0x80000000 ++ ++class ShfIA_64(enum.IntFlag): ++ """Supplemental SHF_* constants for EM_IA_64.""" ++ SHF_IA_64_SHORT = 0x10000000 ++ SHF_IA_64_NORECOV = 0x20000000 ++ ++class ShfMIPS(enum.IntFlag): ++ """Supplemental SHF_* constants for EM_MIPS.""" ++ SHF_MIPS_GPREL = 0x10000000 ++ SHF_MIPS_MERGE = 0x20000000 ++ SHF_MIPS_ADDR = 0x40000000 ++ SHF_MIPS_STRINGS = 0x80000000 ++ SHF_MIPS_NOSTRIP = 0x08000000 ++ SHF_MIPS_LOCAL = 0x04000000 ++ SHF_MIPS_NAMES = 0x02000000 ++ SHF_MIPS_NODUPE = 0x01000000 ++ ++class ShfPARISC(enum.IntFlag): ++ """Supplemental SHF_* constants for EM_PARISC.""" ++ SHF_PARISC_SHORT = 0x20000000 ++ SHF_PARISC_HUGE = 0x40000000 ++ SHF_PARISC_SBP = 0x80000000 ++ ++class Stb(_OpenIntEnum): ++ """ELF symbol binding type.""" ++ STB_LOCAL = 0 ++ STB_GLOBAL = 1 ++ STB_WEAK = 2 ++ STB_GNU_UNIQUE = 10 ++ STB_MIPS_SPLIT_COMMON = 13 ++ ++class Stt(_OpenIntEnum): ++ """ELF symbol type.""" ++ STT_NOTYPE = 0 ++ STT_OBJECT = 1 ++ STT_FUNC = 2 ++ STT_SECTION = 3 ++ STT_FILE = 4 ++ STT_COMMON = 5 ++ STT_TLS = 6 ++ STT_GNU_IFUNC = 10 ++ ++class SttARM(enum.Enum): ++ """Supplemental STT_* constants for EM_ARM.""" ++ STT_ARM_TFUNC = 13 ++ STT_ARM_16BIT = 15 ++ ++class SttPARISC(enum.Enum): ++ """Supplemental STT_* constants for EM_PARISC.""" ++ STT_HP_OPAQUE = 11 ++ STT_HP_STUB = 12 ++ STT_PARISC_MILLICODE = 13 ++ ++class SttSPARC(enum.Enum): ++ """Supplemental STT_* constants for EM_SPARC.""" ++ STT_SPARC_REGISTER = 13 ++ ++class SttX86_64(enum.Enum): ++ """Supplemental STT_* constants for EM_X86_64.""" ++ SHT_X86_64_UNWIND = 0x70000001 ++ ++class Pt(_OpenIntEnum): ++ """ELF program header types. Type of Phdr.p_type.""" ++ PT_NULL = 0 ++ PT_LOAD = 1 ++ PT_DYNAMIC = 2 ++ PT_INTERP = 3 ++ PT_NOTE = 4 ++ PT_SHLIB = 5 ++ PT_PHDR = 6 ++ PT_TLS = 7 ++ PT_NUM = 8 ++ PT_GNU_EH_FRAME = 0x6474e550 ++ PT_GNU_STACK = 0x6474e551 ++ PT_GNU_RELRO = 0x6474e552 ++ PT_GNU_PROPERTY = 0x6474e553 ++ PT_SUNWBSS = 0x6ffffffa ++ PT_SUNWSTACK = 0x6ffffffb ++ ++class PtARM(enum.Enum): ++ """Supplemental PT_* constants for EM_ARM.""" ++ PT_ARM_EXIDX = 0x70000001 ++ ++class PtIA_64(enum.Enum): ++ """Supplemental PT_* constants for EM_IA_64.""" ++ PT_IA_64_HP_OPT_ANOT = 0x60000012 ++ PT_IA_64_HP_HSL_ANOT = 0x60000013 ++ PT_IA_64_HP_STACK = 0x60000014 ++ PT_IA_64_ARCHEXT = 0x70000000 ++ PT_IA_64_UNWIND = 0x70000001 ++ ++class PtMIPS(enum.Enum): ++ """Supplemental PT_* constants for EM_MIPS.""" ++ PT_MIPS_REGINFO = 0x70000000 ++ PT_MIPS_RTPROC = 0x70000001 ++ PT_MIPS_OPTIONS = 0x70000002 ++ PT_MIPS_ABIFLAGS = 0x70000003 ++ ++class PtPARISC(enum.Enum): ++ """Supplemental PT_* constants for EM_PARISC.""" ++ PT_HP_TLS = 0x60000000 ++ PT_HP_CORE_NONE = 0x60000001 ++ PT_HP_CORE_VERSION = 0x60000002 ++ PT_HP_CORE_KERNEL = 0x60000003 ++ PT_HP_CORE_COMM = 0x60000004 ++ PT_HP_CORE_PROC = 0x60000005 ++ PT_HP_CORE_LOADABLE = 0x60000006 ++ PT_HP_CORE_STACK = 0x60000007 ++ PT_HP_CORE_SHM = 0x60000008 ++ PT_HP_CORE_MMF = 0x60000009 ++ PT_HP_PARALLEL = 0x60000010 ++ PT_HP_FASTBIND = 0x60000011 ++ PT_HP_OPT_ANNOT = 0x60000012 ++ PT_HP_HSL_ANNOT = 0x60000013 ++ PT_HP_STACK = 0x60000014 ++ PT_PARISC_ARCHEXT = 0x70000000 ++ PT_PARISC_UNWIND = 0x70000001 ++ ++class Dt(_OpenIntEnum): ++ """ELF dynamic segment tags. Type of Dyn.d_val.""" ++ DT_NULL = 0 ++ DT_NEEDED = 1 ++ DT_PLTRELSZ = 2 ++ DT_PLTGOT = 3 ++ DT_HASH = 4 ++ DT_STRTAB = 5 ++ DT_SYMTAB = 6 ++ DT_RELA = 7 ++ DT_RELASZ = 8 ++ DT_RELAENT = 9 ++ DT_STRSZ = 10 ++ DT_SYMENT = 11 ++ DT_INIT = 12 ++ DT_FINI = 13 ++ DT_SONAME = 14 ++ DT_RPATH = 15 ++ DT_SYMBOLIC = 16 ++ DT_REL = 17 ++ DT_RELSZ = 18 ++ DT_RELENT = 19 ++ DT_PLTREL = 20 ++ DT_DEBUG = 21 ++ DT_TEXTREL = 22 ++ DT_JMPREL = 23 ++ DT_BIND_NOW = 24 ++ DT_INIT_ARRAY = 25 ++ DT_FINI_ARRAY = 26 ++ DT_INIT_ARRAYSZ = 27 ++ DT_FINI_ARRAYSZ = 28 ++ DT_RUNPATH = 29 ++ DT_FLAGS = 30 ++ DT_PREINIT_ARRAY = 32 ++ DT_PREINIT_ARRAYSZ = 33 ++ DT_SYMTAB_SHNDX = 34 ++ DT_GNU_PRELINKED = 0x6ffffdf5 ++ DT_GNU_CONFLICTSZ = 0x6ffffdf6 ++ DT_GNU_LIBLISTSZ = 0x6ffffdf7 ++ DT_CHECKSUM = 0x6ffffdf8 ++ DT_PLTPADSZ = 0x6ffffdf9 ++ DT_MOVEENT = 0x6ffffdfa ++ DT_MOVESZ = 0x6ffffdfb ++ DT_FEATURE_1 = 0x6ffffdfc ++ DT_POSFLAG_1 = 0x6ffffdfd ++ DT_SYMINSZ = 0x6ffffdfe ++ DT_SYMINENT = 0x6ffffdff ++ DT_GNU_HASH = 0x6ffffef5 ++ DT_TLSDESC_PLT = 0x6ffffef6 ++ DT_TLSDESC_GOT = 0x6ffffef7 ++ DT_GNU_CONFLICT = 0x6ffffef8 ++ DT_GNU_LIBLIST = 0x6ffffef9 ++ DT_CONFIG = 0x6ffffefa ++ DT_DEPAUDIT = 0x6ffffefb ++ DT_AUDIT = 0x6ffffefc ++ DT_PLTPAD = 0x6ffffefd ++ DT_MOVETAB = 0x6ffffefe ++ DT_SYMINFO = 0x6ffffeff ++ DT_VERSYM = 0x6ffffff0 ++ DT_RELACOUNT = 0x6ffffff9 ++ DT_RELCOUNT = 0x6ffffffa ++ DT_FLAGS_1 = 0x6ffffffb ++ DT_VERDEF = 0x6ffffffc ++ DT_VERDEFNUM = 0x6ffffffd ++ DT_VERNEED = 0x6ffffffe ++ DT_VERNEEDNUM = 0x6fffffff ++ DT_AUXILIARY = 0x7ffffffd ++ DT_FILTER = 0x7fffffff ++ ++class DtAARCH64(enum.Enum): ++ """Supplemental DT_* constants for EM_AARCH64.""" ++ DT_AARCH64_BTI_PLT = 0x70000001 ++ DT_AARCH64_PAC_PLT = 0x70000003 ++ DT_AARCH64_VARIANT_PCS = 0x70000005 ++ ++class DtALPHA(enum.Enum): ++ """Supplemental DT_* constants for EM_ALPHA.""" ++ DT_ALPHA_PLTRO = 0x70000000 ++ ++class DtALTERA_NIOS2(enum.Enum): ++ """Supplemental DT_* constants for EM_ALTERA_NIOS2.""" ++ DT_NIOS2_GP = 0x70000002 ++ ++class DtIA_64(enum.Enum): ++ """Supplemental DT_* constants for EM_IA_64.""" ++ DT_IA_64_PLT_RESERVE = 0x70000000 ++ ++class DtMIPS(enum.Enum): ++ """Supplemental DT_* constants for EM_MIPS.""" ++ DT_MIPS_RLD_VERSION = 0x70000001 ++ DT_MIPS_TIME_STAMP = 0x70000002 ++ DT_MIPS_ICHECKSUM = 0x70000003 ++ DT_MIPS_IVERSION = 0x70000004 ++ DT_MIPS_FLAGS = 0x70000005 ++ DT_MIPS_BASE_ADDRESS = 0x70000006 ++ DT_MIPS_MSYM = 0x70000007 ++ DT_MIPS_CONFLICT = 0x70000008 ++ DT_MIPS_LIBLIST = 0x70000009 ++ DT_MIPS_LOCAL_GOTNO = 0x7000000a ++ DT_MIPS_CONFLICTNO = 0x7000000b ++ DT_MIPS_LIBLISTNO = 0x70000010 ++ DT_MIPS_SYMTABNO = 0x70000011 ++ DT_MIPS_UNREFEXTNO = 0x70000012 ++ DT_MIPS_GOTSYM = 0x70000013 ++ DT_MIPS_HIPAGENO = 0x70000014 ++ DT_MIPS_RLD_MAP = 0x70000016 ++ DT_MIPS_DELTA_CLASS = 0x70000017 ++ DT_MIPS_DELTA_CLASS_NO = 0x70000018 ++ DT_MIPS_DELTA_INSTANCE = 0x70000019 ++ DT_MIPS_DELTA_INSTANCE_NO = 0x7000001a ++ DT_MIPS_DELTA_RELOC = 0x7000001b ++ DT_MIPS_DELTA_RELOC_NO = 0x7000001c ++ DT_MIPS_DELTA_SYM = 0x7000001d ++ DT_MIPS_DELTA_SYM_NO = 0x7000001e ++ DT_MIPS_DELTA_CLASSSYM = 0x70000020 ++ DT_MIPS_DELTA_CLASSSYM_NO = 0x70000021 ++ DT_MIPS_CXX_FLAGS = 0x70000022 ++ DT_MIPS_PIXIE_INIT = 0x70000023 ++ DT_MIPS_SYMBOL_LIB = 0x70000024 ++ DT_MIPS_LOCALPAGE_GOTIDX = 0x70000025 ++ DT_MIPS_LOCAL_GOTIDX = 0x70000026 ++ DT_MIPS_HIDDEN_GOTIDX = 0x70000027 ++ DT_MIPS_PROTECTED_GOTIDX = 0x70000028 ++ DT_MIPS_OPTIONS = 0x70000029 ++ DT_MIPS_INTERFACE = 0x7000002a ++ DT_MIPS_DYNSTR_ALIGN = 0x7000002b ++ DT_MIPS_INTERFACE_SIZE = 0x7000002c ++ DT_MIPS_RLD_TEXT_RESOLVE_ADDR = 0x7000002d ++ DT_MIPS_PERF_SUFFIX = 0x7000002e ++ DT_MIPS_COMPACT_SIZE = 0x7000002f ++ DT_MIPS_GP_VALUE = 0x70000030 ++ DT_MIPS_AUX_DYNAMIC = 0x70000031 ++ DT_MIPS_PLTGOT = 0x70000032 ++ DT_MIPS_RWPLT = 0x70000034 ++ DT_MIPS_RLD_MAP_REL = 0x70000035 ++ DT_MIPS_XHASH = 0x70000036 ++ ++class DtPPC(enum.Enum): ++ """Supplemental DT_* constants for EM_PPC.""" ++ DT_PPC_GOT = 0x70000000 ++ DT_PPC_OPT = 0x70000001 ++ ++class DtPPC64(enum.Enum): ++ """Supplemental DT_* constants for EM_PPC64.""" ++ DT_PPC64_GLINK = 0x70000000 ++ DT_PPC64_OPD = 0x70000001 ++ DT_PPC64_OPDSZ = 0x70000002 ++ DT_PPC64_OPT = 0x70000003 ++ ++class DtSPARC(enum.Enum): ++ """Supplemental DT_* constants for EM_SPARC.""" ++ DT_SPARC_REGISTER = 0x70000001 ++ ++class StInfo: ++ """ELF symbol binding and type. Type of the Sym.st_info field.""" ++ def __init__(self, arg0, arg1=None): ++ if isinstance(arg0, int) and arg1 is None: ++ self.bind = Stb(arg0 >> 4) ++ self.type = Stt(arg0 & 15) ++ else: ++ self.bind = Stb(arg0) ++ self.type = Stt(arg1) ++ ++ def value(self): ++ """Returns the raw value for the bind/type combination.""" ++ return (self.bind.value() << 4) | (self.type.value()) ++ ++# Type in an ELF file. Used for deserialization. ++_Layout = collections.namedtuple('_Layout', 'unpack size') ++ ++def _define_layouts(baseclass: type, layout32: str, layout64: str, ++ types=None, fields32=None): ++ """Assign variants dict to baseclass. ++ ++ The variants dict is indexed by (ElfClass, ElfData) pairs, and its ++ values are _Layout instances. ++ ++ """ ++ struct32 = struct.Struct(layout32) ++ struct64 = struct.Struct(layout64) ++ ++ # Check that the struct formats yield the right number of components. ++ for s in (struct32, struct64): ++ example = s.unpack(b' ' * s.size) ++ if len(example) != len(baseclass._fields): ++ raise ValueError('{!r} yields wrong field count: {} != {}'.format( ++ s.format, len(example), len(baseclass._fields))) ++ ++ # Check that field names in types are correct. ++ if types is None: ++ types = () ++ for n in types: ++ if n not in baseclass._fields: ++ raise ValueError('{} does not have field {!r}'.format( ++ baseclass.__name__, n)) ++ ++ if fields32 is not None \ ++ and set(fields32) != set(baseclass._fields): ++ raise ValueError('{!r} is not a permutation of the fields {!r}'.format( ++ fields32, baseclass._fields)) ++ ++ def unique_name(name, used_names = (set((baseclass.__name__,)) ++ | set(baseclass._fields) ++ | {n.__name__ ++ for n in (types or {}).values()})): ++ """Find a name that is not used for a class or field name.""" ++ candidate = name ++ n = 0 ++ while candidate in used_names: ++ n += 1 ++ candidate = '{}{}'.format(name, n) ++ used_names.add(candidate) ++ return candidate ++ ++ blob_name = unique_name('blob') ++ struct_unpack_name = unique_name('struct_unpack') ++ comps_name = unique_name('comps') ++ ++ layouts = {} ++ for (bits, elfclass, layout, fields) in ( ++ (32, ElfClass.ELFCLASS32, layout32, fields32), ++ (64, ElfClass.ELFCLASS64, layout64, None), ++ ): ++ for (elfdata, structprefix, funcsuffix) in ( ++ (ElfData.ELFDATA2LSB, '<', 'LE'), ++ (ElfData.ELFDATA2MSB, '>', 'BE'), ++ ): ++ env = { ++ baseclass.__name__: baseclass, ++ struct_unpack_name: struct.unpack, ++ } ++ ++ # Add the type converters. ++ if types: ++ for cls in types.values(): ++ env[cls.__name__] = cls ++ ++ funcname = ''.join( ++ ('unpack_', baseclass.__name__, str(bits), funcsuffix)) ++ ++ code = ''' ++def {funcname}({blob_name}): ++'''.format(funcname=funcname, blob_name=blob_name) ++ ++ indent = ' ' * 4 ++ unpack_call = '{}({!r}, {})'.format( ++ struct_unpack_name, structprefix + layout, blob_name) ++ field_names = ', '.join(baseclass._fields) ++ if types is None and fields is None: ++ code += '{}return {}({})\n'.format( ++ indent, baseclass.__name__, unpack_call) ++ else: ++ # Destructuring tuple assignment. ++ if fields is None: ++ code += '{}{} = {}\n'.format( ++ indent, field_names, unpack_call) ++ else: ++ # Use custom field order. ++ code += '{}{} = {}\n'.format( ++ indent, ', '.join(fields), unpack_call) ++ ++ # Perform the type conversions. ++ for n in baseclass._fields: ++ if n in types: ++ code += '{}{} = {}({})\n'.format( ++ indent, n, types[n].__name__, n) ++ # Create the named tuple. ++ code += '{}return {}({})\n'.format( ++ indent, baseclass.__name__, field_names) ++ ++ exec(code, env) ++ layouts[(elfclass, elfdata)] = _Layout( ++ env[funcname], struct.calcsize(layout)) ++ baseclass.layouts = layouts ++ ++ ++# Corresponds to EI_* indices into Elf*_Ehdr.e_indent. ++class Ident(collections.namedtuple('Ident', ++ 'ei_mag ei_class ei_data ei_version ei_osabi ei_abiversion ei_pad')): ++ ++ def __new__(cls, *args): ++ """Construct an object from a blob or its constituent fields.""" ++ if len(args) == 1: ++ return cls.unpack(args[0]) ++ return cls.__base__.__new__(cls, *args) ++ ++ @staticmethod ++ def unpack(blob: memoryview) -> 'Ident': ++ """Parse raws data into a tuple.""" ++ ei_mag, ei_class, ei_data, ei_version, ei_osabi, ei_abiversion, \ ++ ei_pad = struct.unpack('4s5B7s', blob) ++ return Ident(ei_mag, ElfClass(ei_class), ElfData(ei_data), ++ ei_version, ei_osabi, ei_abiversion, ei_pad) ++ size = 16 ++ ++# Corresponds to Elf32_Ehdr and Elf64_Ehdr. ++Ehdr = collections.namedtuple('Ehdr', ++ 'e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags' ++ + ' e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx') ++_define_layouts(Ehdr, ++ layout32='16s2H5I6H', ++ layout64='16s2HI3QI6H', ++ types=dict(e_ident=Ident, ++ e_machine=Machine, ++ e_type=Et, ++ e_shstrndx=Shn)) ++ ++# Corresponds to Elf32_Phdr and Elf64_Pdhr. Order follows the latter. ++Phdr = collections.namedtuple('Phdr', ++ 'p_type p_flags p_offset p_vaddr p_paddr p_filesz p_memsz p_align') ++_define_layouts(Phdr, ++ layout32='8I', ++ fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr', ++ 'p_filesz', 'p_memsz', 'p_flags', 'p_align'), ++ layout64='2I6Q', ++ types=dict(p_type=Pt, p_flags=Pf)) ++ ++ ++# Corresponds to Elf32_Shdr and Elf64_Shdr. ++class Shdr(collections.namedtuple('Shdr', ++ 'sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info' ++ + ' sh_addralign sh_entsize')): ++ def resolve(self, strtab: 'StringTable') -> 'Shdr': ++ """Resolve sh_name using a string table.""" ++ return self.__class__(strtab.get(self[0]), *self[1:]) ++_define_layouts(Shdr, ++ layout32='10I', ++ layout64='2I4Q2I2Q', ++ types=dict(sh_type=Sht, ++ sh_flags=Shf, ++ sh_link=Shn)) ++ ++# Corresponds to Elf32_Dyn and Elf64_Dyn. The nesting through the ++# d_un union is skipped, and d_ptr is missing (its representation in ++# Python would be identical to d_val). ++Dyn = collections.namedtuple('Dyn', 'd_tag d_val') ++_define_layouts(Dyn, ++ layout32='2i', ++ layout64='2q', ++ types=dict(d_tag=Dt)) ++ ++# Corresponds to Elf32_Sym and Elf64_Sym. ++class Sym(collections.namedtuple('Sym', ++ 'st_name st_info st_other st_shndx st_value st_size')): ++ def resolve(self, strtab: 'StringTable') -> 'Sym': ++ """Resolve st_name using a string table.""" ++ return self.__class__(strtab.get(self[0]), *self[1:]) ++_define_layouts(Sym, ++ layout32='3I2BH', ++ layout64='I2BH2Q', ++ fields32=('st_name', 'st_value', 'st_size', 'st_info', ++ 'st_other', 'st_shndx'), ++ types=dict(st_shndx=Shn, ++ st_info=StInfo)) ++ ++# Corresponds to Elf32_Rel and Elf64_Rel. ++Rel = collections.namedtuple('Rel', 'r_offset r_info') ++_define_layouts(Rel, ++ layout32='2I', ++ layout64='2Q') ++ ++# Corresponds to Elf32_Rel and Elf64_Rel. ++Rela = collections.namedtuple('Rela', 'r_offset r_info r_addend') ++_define_layouts(Rela, ++ layout32='3I', ++ layout64='3Q') ++ ++class StringTable: ++ """ELF string table.""" ++ def __init__(self, blob): ++ """Create a new string table backed by the data in the blob. ++ ++ blob: a memoryview-like object ++ ++ """ ++ self.blob = blob ++ ++ def get(self, index) -> bytes: ++ """Returns the null-terminated byte string at the index.""" ++ blob = self.blob ++ endindex = index ++ while True: ++ if blob[endindex] == 0: ++ return bytes(blob[index:endindex]) ++ endindex += 1 ++ ++class Image: ++ """ELF image parser.""" ++ def __init__(self, image): ++ """Create an ELF image from binary image data. ++ ++ image: a memoryview-like object that supports efficient range ++ subscripting. ++ ++ """ ++ self.image = image ++ ident = self.read(Ident, 0) ++ classdata = (ident.ei_class, ident.ei_data) ++ # Set self.Ehdr etc. to the subtypes with the right parsers. ++ for typ in (Ehdr, Phdr, Shdr, Dyn, Sym, Rel, Rela): ++ setattr(self, typ.__name__, typ.layouts.get(classdata, None)) ++ ++ if self.Ehdr is not None: ++ self.ehdr = self.read(self.Ehdr, 0) ++ self._shdr_num = self._compute_shdr_num() ++ else: ++ self.ehdr = None ++ self._shdr_num = 0 ++ ++ self._section = {} ++ self._stringtab = {} ++ ++ if self._shdr_num > 0: ++ self._shdr_strtab = self._find_shdr_strtab() ++ else: ++ self._shdr_strtab = None ++ ++ @staticmethod ++ def readfile(path: str) -> 'Image': ++ """Reads the ELF file at the specified path.""" ++ with open(path, 'rb') as inp: ++ return Image(memoryview(inp.read())) ++ ++ def _compute_shdr_num(self) -> int: ++ """Computes the actual number of section headers.""" ++ shnum = self.ehdr.e_shnum ++ if shnum == 0: ++ if self.ehdr.e_shoff == 0 or self.ehdr.e_shentsize == 0: ++ # No section headers. ++ return 0 ++ # Otherwise the extension mechanism is used (which may be ++ # needed because e_shnum is just 16 bits). ++ return self.read(self.Shdr, self.ehdr.e_shoff).sh_size ++ return shnum ++ ++ def _find_shdr_strtab(self) -> StringTable: ++ """Finds the section header string table (maybe via extensions).""" ++ shstrndx = self.ehdr.e_shstrndx ++ if shstrndx == Shn.SHN_XINDEX: ++ shstrndx = self.read(self.Shdr, self.ehdr.e_shoff).sh_link ++ return self._find_stringtab(shstrndx) ++ ++ def read(self, typ: type, offset:int ): ++ """Reads an object at a specific offset. ++ ++ The type must have been enhanced using _define_variants. ++ ++ """ ++ return typ.unpack(self.image[offset: offset + typ.size]) ++ ++ def phdrs(self) -> Phdr: ++ """Generator iterating over the program headers.""" ++ if self.ehdr is None: ++ return ++ size = self.ehdr.e_phentsize ++ if size != self.Phdr.size: ++ raise ValueError('Unexpected Phdr size in ELF header: {} != {}' ++ .format(size, self.Phdr.size)) ++ ++ offset = self.ehdr.e_phoff ++ for _ in range(self.ehdr.e_phnum): ++ yield self.read(self.Phdr, offset) ++ offset += size ++ ++ def shdrs(self, resolve: bool=True) -> Shdr: ++ """Generator iterating over the section headers. ++ ++ If resolve, section names are automatically translated ++ using the section header string table. ++ ++ """ ++ if self._shdr_num == 0: ++ return ++ ++ size = self.ehdr.e_shentsize ++ if size != self.Shdr.size: ++ raise ValueError('Unexpected Shdr size in ELF header: {} != {}' ++ .format(size, self.Shdr.size)) ++ ++ offset = self.ehdr.e_shoff ++ for _ in range(self._shdr_num): ++ shdr = self.read(self.Shdr, offset) ++ if resolve: ++ shdr = shdr.resolve(self._shdr_strtab) ++ yield shdr ++ offset += size ++ ++ def dynamic(self) -> Dyn: ++ """Generator iterating over the dynamic segment.""" ++ for phdr in self.phdrs(): ++ if phdr.p_type == Pt.PT_DYNAMIC: ++ # Pick the first dynamic segment, like the loader. ++ if phdr.p_filesz == 0: ++ # Probably separated debuginfo. ++ return ++ offset = phdr.p_offset ++ end = offset + phdr.p_memsz ++ size = self.Dyn.size ++ while True: ++ next_offset = offset + size ++ if next_offset > end: ++ raise ValueError( ++ 'Dynamic segment size {} is not a multiple of Dyn size {}'.format( ++ phdr.p_memsz, size)) ++ yield self.read(self.Dyn, offset) ++ if next_offset == end: ++ return ++ offset = next_offset ++ ++ def syms(self, shdr: Shdr, resolve: bool=True) -> Sym: ++ """A generator iterating over a symbol table. ++ ++ If resolve, symbol names are automatically translated using ++ the string table for the symbol table. ++ ++ """ ++ assert shdr.sh_type == Sht.SHT_SYMTAB ++ size = shdr.sh_entsize ++ if size != self.Sym.size: ++ raise ValueError('Invalid symbol table entry size {}'.format(size)) ++ offset = shdr.sh_offset ++ end = shdr.sh_offset + shdr.sh_size ++ if resolve: ++ strtab = self._find_stringtab(shdr.sh_link) ++ while offset < end: ++ sym = self.read(self.Sym, offset) ++ if resolve: ++ sym = sym.resolve(strtab) ++ yield sym ++ offset += size ++ if offset != end: ++ raise ValueError('Symbol table is not a multiple of entry size') ++ ++ def lookup_string(self, strtab_index: int, strtab_offset: int) -> bytes: ++ """Looks up a string in a string table identified by its link index.""" ++ try: ++ strtab = self._stringtab[strtab_index] ++ except KeyError: ++ strtab = self._find_stringtab(strtab_index) ++ return strtab.get(strtab_offset) ++ ++ def find_section(self, shndx: Shn) -> Shdr: ++ """Returns the section header for the indexed section. ++ ++ The section name is not resolved. ++ """ ++ try: ++ return self._section[shndx] ++ except KeyError: ++ pass ++ if shndx in Shn: ++ raise ValueError('Reserved section index {}'.format(shndx)) ++ idx = shndx.value ++ if idx < 0 or idx > self._shdr_num: ++ raise ValueError('Section index {} out of range [0, {})'.format( ++ idx, self._shdr_num)) ++ shdr = self.read( ++ self.Shdr, self.ehdr.e_shoff + idx * self.Shdr.size) ++ self._section[shndx] = shdr ++ return shdr ++ ++ def _find_stringtab(self, sh_link: int) -> StringTable: ++ if sh_link in self._stringtab: ++ return self._stringtab ++ if sh_link < 0 or sh_link >= self._shdr_num: ++ raise ValueError('Section index {} out of range [0, {})'.format( ++ sh_link, self._shdr_num)) ++ shdr = self.read( ++ self.Shdr, self.ehdr.e_shoff + sh_link * self.Shdr.size) ++ if shdr.sh_type != Sht.SHT_STRTAB: ++ raise ValueError( ++ 'Section {} is not a string table: {}'.format( ++ sh_link, shdr.sh_type)) ++ strtab = StringTable( ++ self.image[shdr.sh_offset:shdr.sh_offset + shdr.sh_size]) ++ # This could retrain essentially arbitrary amounts of data, ++ # but caching string tables seems important for performance. ++ self._stringtab[sh_link] = strtab ++ return strtab ++ ++ ++__all__ = [name for name in dir() if name[0].isupper()] diff --git a/glibc-upstream-2.34-168.patch b/glibc-upstream-2.34-168.patch new file mode 100644 index 0000000..49e07b7 --- /dev/null +++ b/glibc-upstream-2.34-168.patch @@ -0,0 +1,407 @@ +commit f0c71b34f96c816292c49122d50da3a511b67bf2 +Author: Florian Weimer +Date: Mon Apr 11 11:30:31 2022 +0200 + + Default to --with-default-link=no (bug 25812) + + This is necessary to place the libio vtables into the RELRO segment. + New tests elf/tst-relro-ldso and elf/tst-relro-libc are added to + verify that this is what actually happens. + + The new tests fail on ia64 due to lack of (default) RELRO support + inbutils, so they are XFAILed there. + + (cherry picked from commit 198abcbb94618730dae1b3f4393efaa49e0ec8c7) + +diff --git a/INSTALL b/INSTALL +index d8d4e9f155f56616..60d01568d77645c7 100644 +--- a/INSTALL ++++ b/INSTALL +@@ -90,6 +90,12 @@ if 'CFLAGS' is specified it must enable optimization. For example: + library will still be usable, but functionality may be lost--for + example, you can't build a shared libc with old binutils. + ++'--with-default-link=FLAG' ++ With '--with-default-link=yes', the build system does not use a ++ custom linker script for linking shared objects. The default for ++ FLAG is the opposite, 'no', because the custom linker script is ++ needed for full RELRO protection. ++ + '--with-nonshared-cflags=CFLAGS' + Use additional compiler flags CFLAGS to build the parts of the + library which are always statically linked into applications and +diff --git a/configure b/configure +index 03f4e59e754b5463..34c64f8de44e3086 100755 +--- a/configure ++++ b/configure +@@ -3373,7 +3373,7 @@ fi + if test "${with_default_link+set}" = set; then : + withval=$with_default_link; use_default_link=$withval + else +- use_default_link=default ++ use_default_link=no + fi + + +@@ -6085,69 +6085,6 @@ fi + $as_echo "$libc_cv_hashstyle" >&6; } + + +-# The linker's default -shared behavior is good enough if it +-# does these things that our custom linker scripts ensure that +-# all allocated NOTE sections come first. +-if test "$use_default_link" = default; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sufficient default -shared layout" >&5 +-$as_echo_n "checking for sufficient default -shared layout... " >&6; } +-if ${libc_cv_use_default_link+:} false; then : +- $as_echo_n "(cached) " >&6 +-else +- libc_cv_use_default_link=no +- cat > conftest.s <<\EOF +- .section .note.a,"a",%note +- .balign 4 +- .long 4,4,9 +- .string "GNU" +- .string "foo" +- .section .note.b,"a",%note +- .balign 4 +- .long 4,4,9 +- .string "GNU" +- .string "bar" +-EOF +- if { ac_try=' ${CC-cc} $ASFLAGS -shared -o conftest.so conftest.s 1>&5' +- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 +- (eval $ac_try) 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; } && +- ac_try=`$READELF -S conftest.so | sed -n \ +- '${x;p;} +- s/^ *\[ *[1-9][0-9]*\] *\([^ ][^ ]*\) *\([^ ][^ ]*\) .*$/\2 \1/ +- t a +- b +- : a +- H'` +- then +- libc_seen_a=no libc_seen_b=no +- set -- $ac_try +- while test $# -ge 2 -a "$1" = NOTE; do +- case "$2" in +- .note.a) libc_seen_a=yes ;; +- .note.b) libc_seen_b=yes ;; +- esac +- shift 2 +- done +- case "$libc_seen_a$libc_seen_b" in +- yesyes) +- libc_cv_use_default_link=yes +- ;; +- *) +- echo >&5 "\ +-$libc_seen_a$libc_seen_b from: +-$ac_try" +- ;; +- esac +- fi +- rm -f conftest* +-fi +-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_use_default_link" >&5 +-$as_echo "$libc_cv_use_default_link" >&6; } +- use_default_link=$libc_cv_use_default_link +-fi +- + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GLOB_DAT reloc" >&5 + $as_echo_n "checking for GLOB_DAT reloc... " >&6; } + if ${libc_cv_has_glob_dat+:} false; then : +diff --git a/configure.ac b/configure.ac +index eb9431875fae1b0e..2c69af0807266e7e 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -153,7 +153,7 @@ AC_ARG_WITH([default-link], + AS_HELP_STRING([--with-default-link], + [do not use explicit linker scripts]), + [use_default_link=$withval], +- [use_default_link=default]) ++ [use_default_link=no]) + + dnl Additional build flags injection. + AC_ARG_WITH([nonshared-cflags], +@@ -1378,59 +1378,6 @@ fi + rm -f conftest*]) + AC_SUBST(libc_cv_hashstyle) + +-# The linker's default -shared behavior is good enough if it +-# does these things that our custom linker scripts ensure that +-# all allocated NOTE sections come first. +-if test "$use_default_link" = default; then +- AC_CACHE_CHECK([for sufficient default -shared layout], +- libc_cv_use_default_link, [dnl +- libc_cv_use_default_link=no +- cat > conftest.s <<\EOF +- .section .note.a,"a",%note +- .balign 4 +- .long 4,4,9 +- .string "GNU" +- .string "foo" +- .section .note.b,"a",%note +- .balign 4 +- .long 4,4,9 +- .string "GNU" +- .string "bar" +-EOF +- if AC_TRY_COMMAND([dnl +- ${CC-cc} $ASFLAGS -shared -o conftest.so conftest.s 1>&AS_MESSAGE_LOG_FD]) && +- ac_try=`$READELF -S conftest.so | sed -n \ +- ['${x;p;} +- s/^ *\[ *[1-9][0-9]*\] *\([^ ][^ ]*\) *\([^ ][^ ]*\) .*$/\2 \1/ +- t a +- b +- : a +- H']` +- then +- libc_seen_a=no libc_seen_b=no +- set -- $ac_try +- while test $# -ge 2 -a "$1" = NOTE; do +- case "$2" in +- .note.a) libc_seen_a=yes ;; +- .note.b) libc_seen_b=yes ;; +- esac +- shift 2 +- done +- case "$libc_seen_a$libc_seen_b" in +- yesyes) +- libc_cv_use_default_link=yes +- ;; +- *) +- echo >&AS_MESSAGE_LOG_FD "\ +-$libc_seen_a$libc_seen_b from: +-$ac_try" +- ;; +- esac +- fi +- rm -f conftest*]) +- use_default_link=$libc_cv_use_default_link +-fi +- + AC_CACHE_CHECK(for GLOB_DAT reloc, + libc_cv_has_glob_dat, [dnl + cat > conftest.c < $@ 2>&1; $(evaluate-test) ++# The optional symbols are present in libc only if the architecture has ++# the GLIBC_2.0 symbol set in libc. ++$(objpfx)tst-relro-libc.out: tst-relro-symbols.py $(..)/scripts/glibcelf.py \ ++ $(common-objpfx)libc.so ++ $(PYTHON) tst-relro-symbols.py $(common-objpfx)libc.so \ ++ --required=_IO_cookie_jumps \ ++ --required=_IO_file_jumps \ ++ --required=_IO_file_jumps_maybe_mmap \ ++ --required=_IO_file_jumps_mmap \ ++ --required=_IO_helper_jumps \ ++ --required=_IO_mem_jumps \ ++ --required=_IO_obstack_jumps \ ++ --required=_IO_proc_jumps \ ++ --required=_IO_str_chk_jumps \ ++ --required=_IO_str_jumps \ ++ --required=_IO_strn_jumps \ ++ --required=_IO_wfile_jumps \ ++ --required=_IO_wfile_jumps_maybe_mmap \ ++ --required=_IO_wfile_jumps_mmap \ ++ --required=_IO_wmem_jumps \ ++ --required=_IO_wstr_jumps \ ++ --required=_IO_wstrn_jumps \ ++ --optional=_IO_old_cookie_jumps \ ++ --optional=_IO_old_file_jumps \ ++ --optional=_IO_old_proc_jumps \ ++ > $@ 2>&1; $(evaluate-test) ++ + tests += $(tests-execstack-$(have-z-execstack)) + ifeq ($(run-built-tests),yes) + tests-special += \ +diff --git a/elf/tst-relro-symbols.py b/elf/tst-relro-symbols.py +new file mode 100644 +index 0000000000000000..368ea3349f86bd81 +--- /dev/null ++++ b/elf/tst-relro-symbols.py +@@ -0,0 +1,137 @@ ++#!/usr/bin/python3 ++# Verify that certain symbols are covered by RELRO. ++# Copyright (C) 2022 Free Software Foundation, Inc. ++# This file is part of the GNU C Library. ++# ++# The GNU C Library is free software; you can redistribute it and/or ++# modify it under the terms of the GNU Lesser General Public ++# License as published by the Free Software Foundation; either ++# version 2.1 of the License, or (at your option) any later version. ++# ++# The GNU C Library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with the GNU C Library; if not, see ++# . ++ ++"""Analyze a (shared) object to verify that certain symbols are ++present and covered by the PT_GNU_RELRO segment. ++ ++""" ++ ++import argparse ++import os.path ++import sys ++ ++# Make available glibc Python modules. ++sys.path.append(os.path.join( ++ os.path.dirname(os.path.realpath(__file__)), os.path.pardir, 'scripts')) ++ ++import glibcelf ++ ++def find_relro(path: str, img: glibcelf.Image) -> (int, int): ++ """Discover the address range of the PT_GNU_RELRO segment.""" ++ for phdr in img.phdrs(): ++ if phdr.p_type == glibcelf.Pt.PT_GNU_RELRO: ++ # The computation is not entirely accurate because ++ # _dl_protect_relro in elf/dl-reloc.c rounds both the ++ # start end and downwards using the run-time page size. ++ return phdr.p_vaddr, phdr.p_vaddr + phdr.p_memsz ++ sys.stdout.write('{}: error: no PT_GNU_RELRO segment\n'.format(path)) ++ sys.exit(1) ++ ++def check_in_relro(kind, relro_begin, relro_end, name, start, size, error): ++ """Check if a section or symbol falls within in the RELRO segment.""" ++ end = start + size - 1 ++ if not (relro_begin <= start < end < relro_end): ++ error( ++ '{} {!r} of size {} at 0x{:x} is not in RELRO range [0x{:x}, 0x{:x})'.format( ++ kind, name.decode('UTF-8'), start, size, ++ relro_begin, relro_end)) ++ ++def get_parser(): ++ """Return an argument parser for this script.""" ++ parser = argparse.ArgumentParser(description=__doc__) ++ parser.add_argument('object', help='path to object file to check') ++ parser.add_argument('--required', metavar='NAME', default=(), ++ help='required symbol names', nargs='*') ++ parser.add_argument('--optional', metavar='NAME', default=(), ++ help='required symbol names', nargs='*') ++ return parser ++ ++def main(argv): ++ """The main entry point.""" ++ parser = get_parser() ++ opts = parser.parse_args(argv) ++ img = glibcelf.Image.readfile(opts.object) ++ ++ required_symbols = frozenset([sym.encode('UTF-8') ++ for sym in opts.required]) ++ optional_symbols = frozenset([sym.encode('UTF-8') ++ for sym in opts.optional]) ++ check_symbols = required_symbols | optional_symbols ++ ++ # Tracks the symbols in check_symbols that have been found. ++ symbols_found = set() ++ ++ # Discover the extent of the RELRO segment. ++ relro_begin, relro_end = find_relro(opts.object, img) ++ symbol_table_found = False ++ ++ errors = False ++ def error(msg: str) -> None: ++ """Record an error condition and write a message to standard output.""" ++ nonlocal errors ++ errors = True ++ sys.stdout.write('{}: error: {}\n'.format(opts.object, msg)) ++ ++ # Iterate over section headers to find the symbol table. ++ for shdr in img.shdrs(): ++ if shdr.sh_type == glibcelf.Sht.SHT_SYMTAB: ++ symbol_table_found = True ++ for sym in img.syms(shdr): ++ if sym.st_name in check_symbols: ++ symbols_found.add(sym.st_name) ++ ++ # Validate symbol type, section, and size. ++ if sym.st_info.type != glibcelf.Stt.STT_OBJECT: ++ error('symbol {!r} has wrong type {}'.format( ++ sym.st_name.decode('UTF-8'), sym.st_info.type)) ++ if sym.st_shndx in glibcelf.Shn: ++ error('symbol {!r} has reserved section {}'.format( ++ sym.st_name.decode('UTF-8'), sym.st_shndx)) ++ continue ++ if sym.st_size == 0: ++ error('symbol {!r} has size zero'.format( ++ sym.st_name.decode('UTF-8'))) ++ continue ++ ++ check_in_relro('symbol', relro_begin, relro_end, ++ sym.st_name, sym.st_value, sym.st_size, ++ error) ++ continue # SHT_SYMTAB ++ if shdr.sh_name == b'.data.rel.ro' \ ++ or shdr.sh_name.startswith(b'.data.rel.ro.'): ++ check_in_relro('section', relro_begin, relro_end, ++ shdr.sh_name, shdr.sh_addr, shdr.sh_size, ++ error) ++ continue ++ ++ if required_symbols - symbols_found: ++ for sym in sorted(required_symbols - symbols_found): ++ error('symbol {!r} not found'.format(sym.decode('UTF-8'))) ++ ++ if errors: ++ sys.exit(1) ++ ++ if not symbol_table_found: ++ sys.stdout.write( ++ '{}: warning: no symbol table found (stripped object)\n'.format( ++ opts.object)) ++ sys.exit(77) ++ ++if __name__ == '__main__': ++ main(sys.argv[1:]) +diff --git a/manual/install.texi b/manual/install.texi +index 816b77a0a25a88a7..36a5af62bc5722b0 100644 +--- a/manual/install.texi ++++ b/manual/install.texi +@@ -117,6 +117,12 @@ problem and suppress these constructs, so that the library will still be + usable, but functionality may be lost---for example, you can't build a + shared libc with old binutils. + ++@item --with-default-link=@var{FLAG} ++With @code{--with-default-link=yes}, the build system does not use a ++custom linker script for linking shared objects. The default for ++@var{FLAG} is the opposite, @samp{no}, because the custom linker script ++is needed for full RELRO protection. ++ + @item --with-nonshared-cflags=@var{cflags} + Use additional compiler flags @var{cflags} to build the parts of the + library which are always statically linked into applications and +diff --git a/sysdeps/unix/sysv/linux/ia64/Makefile b/sysdeps/unix/sysv/linux/ia64/Makefile +index da85ba43e2d0ddef..c5cc41b3677d4a2a 100644 +--- a/sysdeps/unix/sysv/linux/ia64/Makefile ++++ b/sysdeps/unix/sysv/linux/ia64/Makefile +@@ -1,3 +1,9 @@ ++ifeq ($(subdir),elf) ++# ia64 does not support PT_GNU_RELRO. ++test-xfail-tst-relro-ldso = yes ++test-xfail-tst-relro-libc = yes ++endif ++ + ifeq ($(subdir),misc) + sysdep_headers += sys/rse.h + endif diff --git a/glibc-upstream-2.34-169.patch b/glibc-upstream-2.34-169.patch new file mode 100644 index 0000000..63cb452 --- /dev/null +++ b/glibc-upstream-2.34-169.patch @@ -0,0 +1,87 @@ +commit ca0faa140ff8cebe4c041d935f0f5eb480873d99 +Author: Joan Bruguera +Date: Mon Apr 11 19:49:56 2022 +0200 + + misc: Fix rare fortify crash on wchar funcs. [BZ 29030] + + If `__glibc_objsize (__o) == (size_t) -1` (i.e. `__o` is unknown size), fortify + checks should pass, and `__whatever_alias` should be called. + + Previously, `__glibc_objsize (__o) == (size_t) -1` was explicitly checked, but + on commit a643f60c53876b, this was moved into `__glibc_safe_or_unknown_len`. + + A comment says the -1 case should work as: "The -1 check is redundant because + since it implies that __glibc_safe_len_cond is true.". But this fails when: + * `__s > 1` + * `__osz == -1` (i.e. unknown size at compile time) + * `__l` is big enough + * `__l * __s <= __osz` can be folded to a constant + (I only found this to be true for `mbsrtowcs` and other functions in wchar2.h) + + In this case `__l * __s <= __osz` is false, and `__whatever_chk_warn` will be + called by `__glibc_fortify` or `__glibc_fortify_n` and crash the program. + + This commit adds the explicit `__osz == -1` check again. + moc crashes on startup due to this, see: https://bugs.archlinux.org/task/74041 + + Minimal test case (test.c): + #include + + int main (void) + { + const char *hw = "HelloWorld"; + mbsrtowcs (NULL, &hw, (size_t)-1, NULL); + return 0; + } + + Build with: + gcc -O2 -Wp,-D_FORTIFY_SOURCE=2 test.c -o test && ./test + + Output: + *** buffer overflow detected ***: terminated + + Fixes: BZ #29030 + Signed-off-by: Joan Bruguera + Signed-off-by: Siddhesh Poyarekar + (cherry picked from commit 33e03f9cd2be4f2cd62f93fda539cc07d9c8130e) + +diff --git a/debug/tst-fortify.c b/debug/tst-fortify.c +index 8b5902423cf0ad88..fb02452f5993c594 100644 +--- a/debug/tst-fortify.c ++++ b/debug/tst-fortify.c +@@ -1505,6 +1505,11 @@ do_test (void) + CHK_FAIL_END + #endif + ++ /* Bug 29030 regresion check */ ++ cp = "HelloWorld"; ++ if (mbsrtowcs (NULL, &cp, (size_t)-1, &s) != 10) ++ FAIL (); ++ + cp = "A"; + if (mbstowcs (wenough, cp, 10) != 1 + || wcscmp (wenough, L"A") != 0) +diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h +index 515fb681a0547217..b36013b9a6b4d9c3 100644 +--- a/misc/sys/cdefs.h ++++ b/misc/sys/cdefs.h +@@ -161,13 +161,13 @@ + || (__builtin_constant_p (__l) && (__l) > 0)) + + /* Length is known to be safe at compile time if the __L * __S <= __OBJSZ +- condition can be folded to a constant and if it is true. The -1 check is +- redundant because since it implies that __glibc_safe_len_cond is true. */ ++ condition can be folded to a constant and if it is true, or unknown (-1) */ + #define __glibc_safe_or_unknown_len(__l, __s, __osz) \ +- (__glibc_unsigned_or_positive (__l) \ +- && __builtin_constant_p (__glibc_safe_len_cond ((__SIZE_TYPE__) (__l), \ +- __s, __osz)) \ +- && __glibc_safe_len_cond ((__SIZE_TYPE__) (__l), __s, __osz)) ++ ((__osz) == (__SIZE_TYPE__) -1 \ ++ || (__glibc_unsigned_or_positive (__l) \ ++ && __builtin_constant_p (__glibc_safe_len_cond ((__SIZE_TYPE__) (__l), \ ++ (__s), (__osz))) \ ++ && __glibc_safe_len_cond ((__SIZE_TYPE__) (__l), (__s), (__osz)))) + + /* Conversely, we know at compile time that the length is unsafe if the + __L * __S <= __OBJSZ condition can be folded to a constant and if it is diff --git a/glibc-upstream-2.34-170.patch b/glibc-upstream-2.34-170.patch new file mode 100644 index 0000000..11aa68c --- /dev/null +++ b/glibc-upstream-2.34-170.patch @@ -0,0 +1,49 @@ +commit 0d477e92c49db2906b32e44135b98746ccc73c7b +Author: Florian Weimer +Date: Tue Apr 26 14:22:10 2022 +0200 + + INSTALL: Rephrase -with-default-link documentation + + Reviewed-by: Carlos O'Donell + (cherry picked from commit c935789bdf40ba22b5698da869d3a4789797e09f) + +diff --git a/INSTALL b/INSTALL +index 60d01568d77645c7..10a3dcdc0a8db665 100644 +--- a/INSTALL ++++ b/INSTALL +@@ -90,10 +90,10 @@ if 'CFLAGS' is specified it must enable optimization. For example: + library will still be usable, but functionality may be lost--for + example, you can't build a shared libc with old binutils. + +-'--with-default-link=FLAG' +- With '--with-default-link=yes', the build system does not use a +- custom linker script for linking shared objects. The default for +- FLAG is the opposite, 'no', because the custom linker script is ++'--with-default-link' ++ With '--with-default-link', the build system does not use a custom ++ linker script for linking shared objects. The default is ++ '--without-default-link', because the custom linker script is + needed for full RELRO protection. + + '--with-nonshared-cflags=CFLAGS' +diff --git a/manual/install.texi b/manual/install.texi +index 36a5af62bc5722b0..8e34ff7e1847f3ae 100644 +--- a/manual/install.texi ++++ b/manual/install.texi +@@ -117,11 +117,11 @@ problem and suppress these constructs, so that the library will still be + usable, but functionality may be lost---for example, you can't build a + shared libc with old binutils. + +-@item --with-default-link=@var{FLAG} +-With @code{--with-default-link=yes}, the build system does not use a +-custom linker script for linking shared objects. The default for +-@var{FLAG} is the opposite, @samp{no}, because the custom linker script +-is needed for full RELRO protection. ++@item --with-default-link ++With @code{--with-default-link}, the build system does not use a custom ++linker script for linking shared objects. The default is ++@code{--without-default-link}, because the custom linker script is ++needed for full RELRO protection. + + @item --with-nonshared-cflags=@var{cflags} + Use additional compiler flags @var{cflags} to build the parts of the diff --git a/glibc-upstream-2.34-171.patch b/glibc-upstream-2.34-171.patch new file mode 100644 index 0000000..04e6898 --- /dev/null +++ b/glibc-upstream-2.34-171.patch @@ -0,0 +1,377 @@ +commit bc56ab1f4aa937665034373d3e320d0779a839aa +Author: Florian Weimer +Date: Tue Apr 26 14:23:02 2022 +0200 + + dlfcn: Do not use rtld_active () to determine ld.so state (bug 29078) + + When audit modules are loaded, ld.so initialization is not yet + complete, and rtld_active () returns false even though ld.so is + mostly working. Instead, the static dlopen hook is used, but that + does not work at all because this is not a static dlopen situation. + + Commit 466c1ea15f461edb8e3ffaf5d86d708876343bbf ("dlfcn: Rework + static dlopen hooks") moved the hook pointer into _rtld_global_ro, + which means that separate protection is not needed anymore and the + hook pointer can be checked directly. + + The guard for disabling libio vtable hardening in _IO_vtable_check + should stay for now. + + Fixes commit 8e1472d2c1e25e6eabc2059170731365f6d5b3d1 ("ld.so: + Examine GLRO to detect inactive loader [BZ #20204]"). + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 8dcb6d0af07fda3607b541857e4f3970a74ed55b) + +diff --git a/dlfcn/dladdr.c b/dlfcn/dladdr.c +index 1cc305f0c46e7c3b..0d07ae1cd4dbb7a2 100644 +--- a/dlfcn/dladdr.c ++++ b/dlfcn/dladdr.c +@@ -24,7 +24,7 @@ int + __dladdr (const void *address, Dl_info *info) + { + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dladdr (address, info); + #endif + return _dl_addr (address, info, NULL, NULL); +diff --git a/dlfcn/dladdr1.c b/dlfcn/dladdr1.c +index 78560dbac208c316..93ce68c1d6067fe2 100644 +--- a/dlfcn/dladdr1.c ++++ b/dlfcn/dladdr1.c +@@ -24,7 +24,7 @@ int + __dladdr1 (const void *address, Dl_info *info, void **extra, int flags) + { + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dladdr1 (address, info, extra, flags); + #endif + +diff --git a/dlfcn/dlclose.c b/dlfcn/dlclose.c +index 6a013a81bb648191..07ecb21bf7d43be4 100644 +--- a/dlfcn/dlclose.c ++++ b/dlfcn/dlclose.c +@@ -24,7 +24,7 @@ int + __dlclose (void *handle) + { + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlclose (handle); + #endif + +diff --git a/dlfcn/dlerror.c b/dlfcn/dlerror.c +index 5047b140662bc33e..63da79c63000eef0 100644 +--- a/dlfcn/dlerror.c ++++ b/dlfcn/dlerror.c +@@ -32,7 +32,7 @@ char * + __dlerror (void) + { + # ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlerror (); + # endif + +diff --git a/dlfcn/dlinfo.c b/dlfcn/dlinfo.c +index c6f9a1da09ff8622..47d2daa96fa5986f 100644 +--- a/dlfcn/dlinfo.c ++++ b/dlfcn/dlinfo.c +@@ -89,7 +89,7 @@ dlinfo_implementation (void *handle, int request, void *arg) + int + ___dlinfo (void *handle, int request, void *arg) + { +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlinfo (handle, request, arg); + else + return dlinfo_implementation (handle, request, arg); +diff --git a/dlfcn/dlmopen.c b/dlfcn/dlmopen.c +index c171c8953da20fc7..2309224eb8484b1a 100644 +--- a/dlfcn/dlmopen.c ++++ b/dlfcn/dlmopen.c +@@ -80,7 +80,7 @@ dlmopen_implementation (Lmid_t nsid, const char *file, int mode, + void * + ___dlmopen (Lmid_t nsid, const char *file, int mode) + { +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlmopen (nsid, file, mode, RETURN_ADDRESS (0)); + else + return dlmopen_implementation (nsid, file, mode, RETURN_ADDRESS (0)); +diff --git a/dlfcn/dlopen.c b/dlfcn/dlopen.c +index e04b374b82b04337..9c59c751c4eaf7a7 100644 +--- a/dlfcn/dlopen.c ++++ b/dlfcn/dlopen.c +@@ -75,7 +75,7 @@ dlopen_implementation (const char *file, int mode, void *dl_caller) + void * + ___dlopen (const char *file, int mode) + { +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlopen (file, mode, RETURN_ADDRESS (0)); + else + return dlopen_implementation (file, mode, RETURN_ADDRESS (0)); +diff --git a/dlfcn/dlopenold.c b/dlfcn/dlopenold.c +index 9115501ac121eeca..c2f2a42194d50953 100644 +--- a/dlfcn/dlopenold.c ++++ b/dlfcn/dlopenold.c +@@ -70,7 +70,7 @@ __dlopen_nocheck (const char *file, int mode) + mode |= RTLD_LAZY; + args.mode = mode; + +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlopen (file, mode, RETURN_ADDRESS (0)); + + return _dlerror_run (dlopen_doit, &args) ? NULL : args.new; +diff --git a/dlfcn/dlsym.c b/dlfcn/dlsym.c +index 43044cf7bb95801e..d3861170a7631d01 100644 +--- a/dlfcn/dlsym.c ++++ b/dlfcn/dlsym.c +@@ -62,7 +62,7 @@ dlsym_implementation (void *handle, const char *name, void *dl_caller) + void * + ___dlsym (void *handle, const char *name) + { +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlsym (handle, name, RETURN_ADDRESS (0)); + else + return dlsym_implementation (handle, name, RETURN_ADDRESS (0)); +diff --git a/dlfcn/dlvsym.c b/dlfcn/dlvsym.c +index 9b76f9afa513e11f..3af02109c306b800 100644 +--- a/dlfcn/dlvsym.c ++++ b/dlfcn/dlvsym.c +@@ -65,7 +65,7 @@ dlvsym_implementation (void *handle, const char *name, const char *version, + void * + ___dlvsym (void *handle, const char *name, const char *version) + { +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->dlvsym (handle, name, version, + RETURN_ADDRESS (0)); + else +diff --git a/elf/Makefile b/elf/Makefile +index fec6e23b5b625e3b..c89a6a58690646ee 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -376,6 +376,7 @@ tests += \ + tst-audit24d \ + tst-audit25a \ + tst-audit25b \ ++ tst-audit26 \ + tst-auditmany \ + tst-auxobj \ + tst-auxobj-dlopen \ +@@ -721,6 +722,7 @@ modules-names = \ + tst-auditmod24c \ + tst-auditmod24d \ + tst-auditmod25 \ ++ tst-auditmod26 \ + tst-auxvalmod \ + tst-big-note-lib \ + tst-deep1mod1 \ +@@ -2194,6 +2196,10 @@ $(objpfx)tst-audit25b: $(objpfx)tst-audit25mod1.so \ + LDFLAGS-tst-audit25b = -Wl,-z,now + tst-audit25b-ARGS = -- $(host-test-program-cmd) + ++$(objpfx)tst-audit26.out: $(objpfx)tst-auditmod26.so ++$(objpfx)tst-auditmod26.so: $(libsupport) ++tst-audit26-ENV = LD_AUDIT=$(objpfx)tst-auditmod26.so ++ + # tst-sonamemove links against an older implementation of the library. + LDFLAGS-tst-sonamemove-linkmod1.so = \ + -Wl,--version-script=tst-sonamemove-linkmod1.map \ +diff --git a/elf/dl-libc.c b/elf/dl-libc.c +index d5bc4a277f4c6ef3..db4342a3256921f0 100644 +--- a/elf/dl-libc.c ++++ b/elf/dl-libc.c +@@ -157,7 +157,7 @@ __libc_dlopen_mode (const char *name, int mode) + args.caller_dlopen = RETURN_ADDRESS (0); + + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->libc_dlopen_mode (name, mode); + #endif + return dlerror_run (do_dlopen, &args) ? NULL : (void *) args.map; +@@ -185,7 +185,7 @@ __libc_dlsym (void *map, const char *name) + args.name = name; + + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->libc_dlsym (map, name); + #endif + return (dlerror_run (do_dlsym, &args) ? NULL +@@ -199,7 +199,7 @@ void * + __libc_dlvsym (void *map, const char *name, const char *version) + { + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->libc_dlvsym (map, name, version); + #endif + +@@ -222,7 +222,7 @@ int + __libc_dlclose (void *map) + { + #ifdef SHARED +- if (!rtld_active ()) ++ if (GLRO (dl_dlfcn_hook) != NULL) + return GLRO (dl_dlfcn_hook)->libc_dlclose (map); + #endif + return dlerror_run (do_dlclose, map); +diff --git a/elf/tst-audit26.c b/elf/tst-audit26.c +new file mode 100644 +index 0000000000000000..3f920e83bac247a5 +--- /dev/null ++++ b/elf/tst-audit26.c +@@ -0,0 +1,35 @@ ++/* Check the usability of functions in audit modules. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ /* Check that the audit module has been loaded. */ ++ void *handle = xdlopen ("mapped to libc", RTLD_LOCAL | RTLD_NOW); ++ TEST_VERIFY (handle ++ == xdlopen (LIBC_SO, RTLD_LOCAL | RTLD_NOW | RTLD_NOLOAD)); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-auditmod26.c b/elf/tst-auditmod26.c +new file mode 100644 +index 0000000000000000..db7ba95abec20f53 +--- /dev/null ++++ b/elf/tst-auditmod26.c +@@ -0,0 +1,104 @@ ++/* Check the usability of functions in audit modules. Audit module. ++ Copyright (C) 2022 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++unsigned int ++la_version (unsigned int current) ++{ ++ /* Exercise various functions. */ ++ ++ /* Check dlopen, dlsym, dlclose. */ ++ void *handle = xdlopen (LIBM_SO, RTLD_LOCAL | RTLD_NOW); ++ void *ptr = xdlsym (handle, "sincos"); ++ TEST_VERIFY (ptr != NULL); ++ ptr = dlsym (handle, "SINCOS"); ++ TEST_VERIFY (ptr == NULL); ++ const char *message = dlerror (); ++ TEST_VERIFY (strstr (message, ": undefined symbol: SINCOS") != NULL); ++ ptr = dlsym (handle, "SINCOS"); ++ TEST_VERIFY (ptr == NULL); ++ xdlclose (handle); ++ TEST_COMPARE_STRING (dlerror (), NULL); ++ ++ handle = xdlopen (LIBC_SO, RTLD_LOCAL | RTLD_NOW | RTLD_NOLOAD); ++ ++ /* Check dlvsym. _exit is unlikely to gain another symbol ++ version. */ ++ TEST_VERIFY (xdlsym (handle, "_exit") ++ == xdlvsym (handle, "_exit", FIRST_VERSION_libc__exit_STRING)); ++ ++ /* Check dlinfo. */ ++ { ++ void *handle2 = NULL; ++ TEST_COMPARE (dlinfo (handle, RTLD_DI_LINKMAP, &handle2), 0); ++ TEST_VERIFY (handle2 == handle); ++ } ++ ++ /* Check dladdr and dladdr1. */ ++ Dl_info info = { }; ++ TEST_VERIFY (dladdr (&_exit, &info) != 0); ++ if (strcmp (info.dli_sname, "_Exit") != 0) /* _Exit is an alias. */ ++ TEST_COMPARE_STRING (info.dli_sname, "_exit"); ++ TEST_VERIFY (info.dli_saddr == &_exit); ++ TEST_VERIFY (strstr (info.dli_fname, LIBC_SO)); ++ void *extra_info; ++ memset (&info, 0, sizeof (info)); ++ TEST_VERIFY (dladdr1 (&_exit, &info, &extra_info, RTLD_DL_LINKMAP) != 0); ++ TEST_VERIFY (extra_info == handle); ++ ++ /* Verify that dlmopen creates a new namespace. */ ++ void *dlmopen_handle = xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW); ++ TEST_VERIFY (dlmopen_handle != handle); ++ memset (&info, 0, sizeof (info)); ++ extra_info = NULL; ++ ptr = xdlsym (dlmopen_handle, "_exit"); ++ TEST_VERIFY (dladdr1 (ptr, &info, &extra_info, RTLD_DL_LINKMAP) != 0); ++ TEST_VERIFY (extra_info == dlmopen_handle); ++ xdlclose (dlmopen_handle); ++ ++ /* Terminate the process with an error state. This does not happen ++ automatically because the audit module state is not shared with ++ the main program. */ ++ if (support_record_failure_is_failed ()) ++ { ++ fflush (stdout); ++ fflush (stderr); ++ _exit (1); ++ } ++ ++ return LAV_CURRENT; ++} ++ ++char * ++la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag) ++{ ++ if (strcmp (name, "mapped to libc") == 0) ++ return (char *) LIBC_SO; ++ else ++ return (char *) name; ++} diff --git a/glibc-upstream-2.34-172.patch b/glibc-upstream-2.34-172.patch new file mode 100644 index 0000000..06dc695 --- /dev/null +++ b/glibc-upstream-2.34-172.patch @@ -0,0 +1,28 @@ +commit 83cc145830bdbefdabe03787ed884d548bea9c99 +Author: Florian Weimer +Date: Fri Apr 22 19:34:52 2022 +0200 + + scripts/glibcelf.py: Mark as UNSUPPORTED on Python 3.5 and earlier + + enum.IntFlag and enum.EnumMeta._missing_ support are not part of + earlier Python versions. + + (cherry picked from commit b571f3adffdcbed23f35ea39b0ca43809dbb4f5b) + +diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py +index 8f7d0ca184845714..da0d5380f33a195e 100644 +--- a/scripts/glibcelf.py ++++ b/scripts/glibcelf.py +@@ -28,6 +28,12 @@ import collections + import enum + import struct + ++if not hasattr(enum, 'IntFlag'): ++ import sys ++ sys.stdout.write( ++ 'warning: glibcelf.py needs Python 3.6 for enum support\n') ++ sys.exit(77) ++ + class _OpenIntEnum(enum.IntEnum): + """Integer enumeration that supports arbitrary int values.""" + @classmethod diff --git a/glibc-upstream-2.34-173.patch b/glibc-upstream-2.34-173.patch new file mode 100644 index 0000000..69a92b8 --- /dev/null +++ b/glibc-upstream-2.34-173.patch @@ -0,0 +1,254 @@ +commit 16245986fb9bfe396113fc7dfd1929f69a9e748e +Author: H.J. Lu +Date: Fri Aug 20 06:42:24 2021 -0700 + + x86-64: Optimize load of all bits set into ZMM register [BZ #28252] + + Optimize loads of all bits set into ZMM register in AVX512 SVML codes + by replacing + + vpbroadcastq .L_2il0floatpacket.16(%rip), %zmmX + + and + + vmovups .L_2il0floatpacket.13(%rip), %zmmX + + with + vpternlogd $0xff, %zmmX, %zmmX, %zmmX + + This fixes BZ #28252. + + (cherry picked from commit 78c9ec9000f873abe7a15a91b87080a2e4308260) + +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S +index e68fcdbb16a79f36..58e588a3d42a8bc9 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S +@@ -265,7 +265,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos + vmovaps %zmm0, %zmm8 + + /* Check for large arguments path */ +- vpbroadcastq .L_2il0floatpacket.16(%rip), %zmm2 ++ vpternlogd $0xff, %zmm2, %zmm2, %zmm2 + + /* + ARGUMENT RANGE REDUCTION: +@@ -456,8 +456,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos + jmp .LBL_2_7 + #endif + END (_ZGVeN8v_cos_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.16: +- .long 0xffffffff,0xffffffff +- .type .L_2il0floatpacket.16,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S +index dfa2acafc486b56b..f5f117d474f66176 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S +@@ -274,7 +274,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log + + /* preserve mantissa, set input exponent to 2^(-10) */ + vpternlogq $248, _ExpMask(%rax), %zmm3, %zmm2 +- vpbroadcastq .L_2il0floatpacket.12(%rip), %zmm1 ++ vpternlogd $0xff, %zmm1, %zmm1, %zmm1 + vpsrlq $32, %zmm4, %zmm6 + + /* reciprocal approximation good to at least 11 bits */ +@@ -461,8 +461,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log + jmp .LBL_2_7 + #endif + END (_ZGVeN8v_log_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.12: +- .long 0xffffffff,0xffffffff +- .type .L_2il0floatpacket.12,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S +index be8ab7c6e0e33819..48d251db16ccab9d 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S +@@ -261,7 +261,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin + andq $-64, %rsp + subq $1280, %rsp + movq __svml_d_trig_data@GOTPCREL(%rip), %rax +- vpbroadcastq .L_2il0floatpacket.14(%rip), %zmm14 ++ vpternlogd $0xff, %zmm1, %zmm1, %zmm14 + vmovups __dAbsMask(%rax), %zmm7 + vmovups __dInvPI(%rax), %zmm2 + vmovups __dRShifter(%rax), %zmm1 +@@ -458,8 +458,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin + jmp .LBL_2_7 + #endif + END (_ZGVeN8v_sin_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.14: +- .long 0xffffffff,0xffffffff +- .type .L_2il0floatpacket.14,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S +index 611887082a545854..a4944a4feef6aa98 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S +@@ -430,7 +430,7 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos + + /* SinPoly = SinR*SinPoly */ + vfmadd213pd %zmm5, %zmm5, %zmm4 +- vpbroadcastq .L_2il0floatpacket.15(%rip), %zmm3 ++ vpternlogd $0xff, %zmm3, %zmm3, %zmm3 + + /* Update Cos result's sign */ + vxorpd %zmm2, %zmm1, %zmm1 +@@ -741,8 +741,3 @@ END (_ZGVeN8vvv_sincos_knl) + ENTRY (_ZGVeN8vvv_sincos_skx) + WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_skx + END (_ZGVeN8vvv_sincos_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.15: +- .long 0xffffffff,0xffffffff +- .type .L_2il0floatpacket.15,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S +index f671d60d5dab5a0e..fe8474fed943e8ad 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S +@@ -278,7 +278,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf + X = X - Y*PI1 - Y*PI2 - Y*PI3 + */ + vmovaps %zmm0, %zmm6 +- vmovups .L_2il0floatpacket.13(%rip), %zmm12 ++ vpternlogd $0xff, %zmm12, %zmm12, %zmm12 + vmovups __sRShifter(%rax), %zmm3 + vmovups __sPI1_FMA(%rax), %zmm5 + vmovups __sA9_FMA(%rax), %zmm9 +@@ -453,8 +453,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf + jmp .LBL_2_7 + #endif + END (_ZGVeN16v_cosf_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.13: +- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +- .type .L_2il0floatpacket.13,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S +index 637bfe3c06ab9ad4..229b7828cde04db2 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S +@@ -264,7 +264,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf + vmovaps %zmm0, %zmm7 + + /* compare against threshold */ +- vmovups .L_2il0floatpacket.13(%rip), %zmm3 ++ vpternlogd $0xff, %zmm3, %zmm3, %zmm3 + vmovups __sInvLn2(%rax), %zmm4 + vmovups __sShifter(%rax), %zmm1 + vmovups __sLn2hi(%rax), %zmm6 +@@ -440,8 +440,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf + + #endif + END (_ZGVeN16v_expf_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.13: +- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +- .type .L_2il0floatpacket.13,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S +index 9d790fbf0ad6c8ec..fa2aae986f543582 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S +@@ -235,7 +235,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf + andq $-64, %rsp + subq $1280, %rsp + movq __svml_slog_data@GOTPCREL(%rip), %rax +- vmovups .L_2il0floatpacket.7(%rip), %zmm6 ++ vpternlogd $0xff, %zmm6, %zmm6, %zmm6 + vmovups _iBrkValue(%rax), %zmm4 + vmovups _sPoly_7(%rax), %zmm8 + +@@ -409,8 +409,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf + + #endif + END (_ZGVeN16v_logf_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.7: +- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +- .type .L_2il0floatpacket.7,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S +index c5c43c46ff7af5a3..6aea2a4f11d1f85f 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S +@@ -385,7 +385,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf + vpsrlq $32, %zmm3, %zmm2 + vpmovqd %zmm2, %ymm11 + vcvtps2pd %ymm14, %zmm13 +- vmovups .L_2il0floatpacket.23(%rip), %zmm14 ++ vpternlogd $0xff, %zmm14, %zmm14, %zmm14 + vmovaps %zmm14, %zmm26 + vpandd _ABSMASK(%rax), %zmm1, %zmm8 + vpcmpd $1, _INF(%rax), %zmm8, %k2 +@@ -427,7 +427,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf + vpmovqd %zmm11, %ymm5 + vpxord %zmm10, %zmm10, %zmm10 + vgatherdpd _Log2Rcp_lookup(%rax,%ymm4), %zmm10{%k3} +- vpbroadcastq .L_2il0floatpacket.24(%rip), %zmm4 ++ vpternlogd $0xff, %zmm4, %zmm4, %zmm4 + vpxord %zmm11, %zmm11, %zmm11 + vcvtdq2pd %ymm7, %zmm7 + vgatherdpd _Log2Rcp_lookup(%rax,%ymm5), %zmm11{%k1} +@@ -643,11 +643,3 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf + jmp .LBL_2_7 + #endif + END (_ZGVeN16vv_powf_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.23: +- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +- .type .L_2il0floatpacket.23,@object +-.L_2il0floatpacket.24: +- .long 0xffffffff,0xffffffff +- .type .L_2il0floatpacket.24,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S +index 9cf359c86ff9bd70..a446c504f63c9399 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S +@@ -317,7 +317,7 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf + + /* Result sign calculations */ + vpternlogd $150, %zmm0, %zmm14, %zmm1 +- vmovups .L_2il0floatpacket.13(%rip), %zmm14 ++ vpternlogd $0xff, %zmm14, %zmm14, %zmm14 + + /* Add correction term 0.5 for cos() part */ + vaddps %zmm8, %zmm5, %zmm15 +@@ -748,8 +748,3 @@ END (_ZGVeN16vvv_sincosf_knl) + ENTRY (_ZGVeN16vvv_sincosf_skx) + WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_skx + END (_ZGVeN16vvv_sincosf_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.13: +- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +- .type .L_2il0floatpacket.13,@object +diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S +index bd05109a62181f22..c1b352d0ad1992cd 100644 +--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S ++++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S +@@ -280,7 +280,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf + movq __svml_s_trig_data@GOTPCREL(%rip), %rax + + /* Check for large and special values */ +- vmovups .L_2il0floatpacket.11(%rip), %zmm14 ++ vpternlogd $0xff, %zmm14, %zmm14, %zmm14 + vmovups __sAbsMask(%rax), %zmm5 + vmovups __sInvPI(%rax), %zmm1 + vmovups __sRShifter(%rax), %zmm2 +@@ -472,8 +472,3 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf + jmp .LBL_2_7 + #endif + END (_ZGVeN16v_sinf_skx) +- +- .section .rodata, "a" +-.L_2il0floatpacket.11: +- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff +- .type .L_2il0floatpacket.11,@object diff --git a/glibc-upstream-2.34-174.patch b/glibc-upstream-2.34-174.patch new file mode 100644 index 0000000..3bf44a8 --- /dev/null +++ b/glibc-upstream-2.34-174.patch @@ -0,0 +1,42 @@ +commit b5a44a6a471aafd3677659a610f32468c40a666b +Author: Noah Goldstein +Date: Tue Sep 21 18:31:49 2021 -0500 + + x86: Modify ENTRY in sysdep.h so that p2align can be specified + + No bug. + + This change adds a new macro ENTRY_P2ALIGN which takes a second + argument, log2 of the desired function alignment. + + The old ENTRY(name) macro is just ENTRY_P2ALIGN(name, 4) so this + doesn't affect any existing functionality. + + Signed-off-by: Noah Goldstein + (cherry picked from commit fc5bd179ef3a953dff8d1655bd530d0e230ffe71) + +diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h +index cac1d762fb3f99d0..937180c1bd791570 100644 +--- a/sysdeps/x86/sysdep.h ++++ b/sysdeps/x86/sysdep.h +@@ -78,15 +78,18 @@ enum cf_protection_level + #define ASM_SIZE_DIRECTIVE(name) .size name,.-name; + + /* Define an entry point visible from C. */ +-#define ENTRY(name) \ ++#define ENTRY_P2ALIGN(name, alignment) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),@function; \ +- .align ALIGNARG(4); \ ++ .align ALIGNARG(alignment); \ + C_LABEL(name) \ + cfi_startproc; \ + _CET_ENDBR; \ + CALL_MCOUNT + ++/* Common entry 16 byte aligns. */ ++#define ENTRY(name) ENTRY_P2ALIGN (name, 4) ++ + #undef END + #define END(name) \ + cfi_endproc; \ diff --git a/glibc-upstream-2.34-175.patch b/glibc-upstream-2.34-175.patch new file mode 100644 index 0000000..5ebf0b7 --- /dev/null +++ b/glibc-upstream-2.34-175.patch @@ -0,0 +1,653 @@ +commit 5ec3416853c4150c4d13312e05f93a053586d528 +Author: Noah Goldstein +Date: Tue Sep 21 18:45:03 2021 -0500 + + x86: Optimize memcmp-evex-movbe.S for frontend behavior and size + + No bug. + + The frontend optimizations are to: + 1. Reorganize logically connected basic blocks so they are either in + the same cache line or adjacent cache lines. + 2. Avoid cases when basic blocks unnecissarily cross cache lines. + 3. Try and 32 byte align any basic blocks possible without sacrificing + code size. Smaller / Less hot basic blocks are used for this. + + Overall code size shrunk by 168 bytes. This should make up for any + extra costs due to aligning to 64 bytes. + + In general performance before deviated a great deal dependending on + whether entry alignment % 64 was 0, 16, 32, or 48. These changes + essentially make it so that the current implementation is at least + equal to the best alignment of the original for any arguments. + + The only additional optimization is in the page cross case. Branch on + equals case was removed from the size == [4, 7] case. As well the [4, + 7] and [2, 3] case where swapped as [4, 7] is likely a more hot + argument size. + + test-memcmp and test-wmemcmp are both passing. + + (cherry picked from commit 1bd8b8d58fc9967cc073d2c13bfb6befefca2faa) + +diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +index 654dc7ac8ccb9445..2761b54f2e7dea9f 100644 +--- a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S ++++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +@@ -34,7 +34,24 @@ + area. + 7. Use 2 vector compares when size is 2 * CHAR_PER_VEC or less. + 8. Use 4 vector compares when size is 4 * CHAR_PER_VEC or less. +- 9. Use 8 vector compares when size is 8 * CHAR_PER_VEC or less. */ ++ 9. Use 8 vector compares when size is 8 * CHAR_PER_VEC or less. ++ ++When possible the implementation tries to optimize for frontend in the ++following ways: ++Throughput: ++ 1. All code sections that fit are able to run optimally out of the ++ LSD. ++ 2. All code sections that fit are able to run optimally out of the ++ DSB ++ 3. Basic blocks are contained in minimum number of fetch blocks ++ necessary. ++ ++Latency: ++ 1. Logically connected basic blocks are put in the same ++ cache-line. ++ 2. Logically connected basic blocks that do not fit in the same ++ cache-line are put in adjacent lines. This can get beneficial ++ L2 spatial prefetching and L1 next-line prefetching. */ + + # include + +@@ -47,9 +64,11 @@ + # ifdef USE_AS_WMEMCMP + # define CHAR_SIZE 4 + # define VPCMP vpcmpd ++# define VPTEST vptestmd + # else + # define CHAR_SIZE 1 + # define VPCMP vpcmpub ++# define VPTEST vptestmb + # endif + + # define VEC_SIZE 32 +@@ -75,7 +94,9 @@ + */ + + .section .text.evex,"ax",@progbits +-ENTRY (MEMCMP) ++/* Cache align memcmp entry. This allows for much more thorough ++ frontend optimization. */ ++ENTRY_P2ALIGN (MEMCMP, 6) + # ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +@@ -89,7 +110,7 @@ ENTRY (MEMCMP) + VPCMP $4, (%rdi), %YMM1, %k1 + kmovd %k1, %eax + /* NB: eax must be destination register if going to +- L(return_vec_[0,2]). For L(return_vec_3 destination register ++ L(return_vec_[0,2]). For L(return_vec_3) destination register + must be ecx. */ + testl %eax, %eax + jnz L(return_vec_0) +@@ -121,10 +142,6 @@ ENTRY (MEMCMP) + testl %ecx, %ecx + jnz L(return_vec_3) + +- /* Zero YMM0. 4x VEC reduction is done with vpxor + vtern so +- compare with zero to get a mask is needed. */ +- vpxorq %XMM0, %XMM0, %XMM0 +- + /* Go to 4x VEC loop. */ + cmpq $(CHAR_PER_VEC * 8), %rdx + ja L(more_8x_vec) +@@ -148,47 +165,61 @@ ENTRY (MEMCMP) + + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 + vpxorq (VEC_SIZE * 2)(%rdi), %YMM3, %YMM3 +- /* Or together YMM1, YMM2, and YMM3 into YMM3. */ +- vpternlogd $0xfe, %YMM1, %YMM2, %YMM3 + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + /* Ternary logic to xor (VEC_SIZE * 3)(%rdi) with YMM4 while +- oring with YMM3. Result is stored in YMM4. */ +- vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM3, %YMM4 +- /* Compare YMM4 with 0. If any 1s s1 and s2 don't match. */ +- VPCMP $4, %YMM4, %YMM0, %k1 ++ oring with YMM1. Result is stored in YMM4. */ ++ vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM1, %YMM4 ++ ++ /* Or together YMM2, YMM3, and YMM4 into YMM4. */ ++ vpternlogd $0xfe, %YMM2, %YMM3, %YMM4 ++ ++ /* Test YMM4 against itself. Store any CHAR mismatches in k1. ++ */ ++ VPTEST %YMM4, %YMM4, %k1 ++ /* k1 must go to ecx for L(return_vec_0_1_2_3). */ + kmovd %k1, %ecx + testl %ecx, %ecx + jnz L(return_vec_0_1_2_3) + /* NB: eax must be zero to reach here. */ + ret + +- /* NB: aligning 32 here allows for the rest of the jump targets +- to be tuned for 32 byte alignment. Most important this ensures +- the L(more_8x_vec) loop is 32 byte aligned. */ +- .p2align 5 +-L(less_vec): +- /* Check if one or less CHAR. This is necessary for size = 0 but +- is also faster for size = CHAR_SIZE. */ +- cmpl $1, %edx +- jbe L(one_or_less) ++ .p2align 4 ++L(8x_end_return_vec_0_1_2_3): ++ movq %rdx, %rdi ++L(8x_return_vec_0_1_2_3): ++ addq %rdi, %rsi ++L(return_vec_0_1_2_3): ++ VPTEST %YMM1, %YMM1, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(return_vec_0) + +- /* Check if loading one VEC from either s1 or s2 could cause a +- page cross. This can have false positives but is by far the +- fastest method. */ +- movl %edi, %eax +- orl %esi, %eax +- andl $(PAGE_SIZE - 1), %eax +- cmpl $(PAGE_SIZE - VEC_SIZE), %eax +- jg L(page_cross_less_vec) ++ VPTEST %YMM2, %YMM2, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(return_vec_1) + +- /* No page cross possible. */ +- VMOVU (%rsi), %YMM2 +- VPCMP $4, (%rdi), %YMM2, %k1 +- kmovd %k1, %eax +- /* Create mask in ecx for potentially in bound matches. */ +- bzhil %edx, %eax, %eax +- jnz L(return_vec_0) ++ VPTEST %YMM3, %YMM3, %k0 ++ kmovd %k0, %eax ++ testl %eax, %eax ++ jnz L(return_vec_2) ++L(return_vec_3): ++ /* bsf saves 1 byte from tzcnt. This keep L(return_vec_3) in one ++ fetch block and the entire L(*return_vec_0_1_2_3) in 1 cache ++ line. */ ++ bsfl %ecx, %ecx ++# ifdef USE_AS_WMEMCMP ++ movl (VEC_SIZE * 3)(%rdi, %rcx, CHAR_SIZE), %eax ++ xorl %edx, %edx ++ cmpl (VEC_SIZE * 3)(%rsi, %rcx, CHAR_SIZE), %eax ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++# else ++ movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax ++ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %ecx ++ subl %ecx, %eax ++# endif + ret + + .p2align 4 +@@ -209,10 +240,11 @@ L(return_vec_0): + # endif + ret + +- /* NB: No p2align necessary. Alignment % 16 is naturally 1 +- which is good enough for a target not in a loop. */ ++ .p2align 4 + L(return_vec_1): +- tzcntl %eax, %eax ++ /* bsf saves 1 byte over tzcnt and keeps L(return_vec_1) in one ++ fetch block. */ ++ bsfl %eax, %eax + # ifdef USE_AS_WMEMCMP + movl VEC_SIZE(%rdi, %rax, CHAR_SIZE), %ecx + xorl %edx, %edx +@@ -226,10 +258,11 @@ L(return_vec_1): + # endif + ret + +- /* NB: No p2align necessary. Alignment % 16 is naturally 2 +- which is good enough for a target not in a loop. */ ++ .p2align 4,, 10 + L(return_vec_2): +- tzcntl %eax, %eax ++ /* bsf saves 1 byte over tzcnt and keeps L(return_vec_2) in one ++ fetch block. */ ++ bsfl %eax, %eax + # ifdef USE_AS_WMEMCMP + movl (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %ecx + xorl %edx, %edx +@@ -243,40 +276,6 @@ L(return_vec_2): + # endif + ret + +- .p2align 4 +-L(8x_return_vec_0_1_2_3): +- /* Returning from L(more_8x_vec) requires restoring rsi. */ +- addq %rdi, %rsi +-L(return_vec_0_1_2_3): +- VPCMP $4, %YMM1, %YMM0, %k0 +- kmovd %k0, %eax +- testl %eax, %eax +- jnz L(return_vec_0) +- +- VPCMP $4, %YMM2, %YMM0, %k0 +- kmovd %k0, %eax +- testl %eax, %eax +- jnz L(return_vec_1) +- +- VPCMP $4, %YMM3, %YMM0, %k0 +- kmovd %k0, %eax +- testl %eax, %eax +- jnz L(return_vec_2) +-L(return_vec_3): +- tzcntl %ecx, %ecx +-# ifdef USE_AS_WMEMCMP +- movl (VEC_SIZE * 3)(%rdi, %rcx, CHAR_SIZE), %eax +- xorl %edx, %edx +- cmpl (VEC_SIZE * 3)(%rsi, %rcx, CHAR_SIZE), %eax +- setg %dl +- leal -1(%rdx, %rdx), %eax +-# else +- movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax +- movzbl (VEC_SIZE * 3)(%rsi, %rcx), %ecx +- subl %ecx, %eax +-# endif +- ret +- + .p2align 4 + L(more_8x_vec): + /* Set end of s1 in rdx. */ +@@ -288,21 +287,19 @@ L(more_8x_vec): + andq $-VEC_SIZE, %rdi + /* Adjust because first 4x vec where check already. */ + subq $-(VEC_SIZE * 4), %rdi ++ + .p2align 4 + L(loop_4x_vec): + VMOVU (%rsi, %rdi), %YMM1 + vpxorq (%rdi), %YMM1, %YMM1 +- + VMOVU VEC_SIZE(%rsi, %rdi), %YMM2 + vpxorq VEC_SIZE(%rdi), %YMM2, %YMM2 +- + VMOVU (VEC_SIZE * 2)(%rsi, %rdi), %YMM3 + vpxorq (VEC_SIZE * 2)(%rdi), %YMM3, %YMM3 +- vpternlogd $0xfe, %YMM1, %YMM2, %YMM3 +- + VMOVU (VEC_SIZE * 3)(%rsi, %rdi), %YMM4 +- vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM3, %YMM4 +- VPCMP $4, %YMM4, %YMM0, %k1 ++ vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM1, %YMM4 ++ vpternlogd $0xfe, %YMM2, %YMM3, %YMM4 ++ VPTEST %YMM4, %YMM4, %k1 + kmovd %k1, %ecx + testl %ecx, %ecx + jnz L(8x_return_vec_0_1_2_3) +@@ -319,28 +316,25 @@ L(loop_4x_vec): + cmpl $(VEC_SIZE * 2), %edi + jae L(8x_last_2x_vec) + ++ vpxorq (VEC_SIZE * 2)(%rdx), %YMM3, %YMM3 ++ + VMOVU (%rsi, %rdx), %YMM1 + vpxorq (%rdx), %YMM1, %YMM1 + + VMOVU VEC_SIZE(%rsi, %rdx), %YMM2 + vpxorq VEC_SIZE(%rdx), %YMM2, %YMM2 +- +- vpxorq (VEC_SIZE * 2)(%rdx), %YMM3, %YMM3 +- vpternlogd $0xfe, %YMM1, %YMM2, %YMM3 +- + VMOVU (VEC_SIZE * 3)(%rsi, %rdx), %YMM4 +- vpternlogd $0xde, (VEC_SIZE * 3)(%rdx), %YMM3, %YMM4 +- VPCMP $4, %YMM4, %YMM0, %k1 ++ vpternlogd $0xde, (VEC_SIZE * 3)(%rdx), %YMM1, %YMM4 ++ vpternlogd $0xfe, %YMM2, %YMM3, %YMM4 ++ VPTEST %YMM4, %YMM4, %k1 + kmovd %k1, %ecx +- /* Restore s1 pointer to rdi. */ +- movq %rdx, %rdi + testl %ecx, %ecx +- jnz L(8x_return_vec_0_1_2_3) ++ jnz L(8x_end_return_vec_0_1_2_3) + /* NB: eax must be zero to reach here. */ + ret + + /* Only entry is from L(more_8x_vec). */ +- .p2align 4 ++ .p2align 4,, 10 + L(8x_last_2x_vec): + VPCMP $4, (VEC_SIZE * 2)(%rdx), %YMM3, %k1 + kmovd %k1, %eax +@@ -355,7 +349,31 @@ L(8x_last_1x_vec): + jnz L(8x_return_vec_3) + ret + +- .p2align 4 ++ /* Not ideally aligned (at offset +9 bytes in fetch block) but ++ not aligning keeps it in the same cache line as ++ L(8x_last_1x/2x_vec) so likely worth it. As well, saves code ++ size. */ ++ .p2align 4,, 4 ++L(8x_return_vec_2): ++ subq $VEC_SIZE, %rdx ++L(8x_return_vec_3): ++ bsfl %eax, %eax ++# ifdef USE_AS_WMEMCMP ++ leaq (%rdx, %rax, CHAR_SIZE), %rax ++ movl (VEC_SIZE * 3)(%rax), %ecx ++ xorl %edx, %edx ++ cmpl (VEC_SIZE * 3)(%rsi, %rax), %ecx ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++# else ++ addq %rdx, %rax ++ movzbl (VEC_SIZE * 3)(%rsi, %rax), %ecx ++ movzbl (VEC_SIZE * 3)(%rax), %eax ++ subl %ecx, %eax ++# endif ++ ret ++ ++ .p2align 4,, 10 + L(last_2x_vec): + /* Check second to last VEC. */ + VMOVU -(VEC_SIZE * 2)(%rsi, %rdx, CHAR_SIZE), %YMM1 +@@ -374,26 +392,49 @@ L(last_1x_vec): + jnz L(return_vec_0_end) + ret + +- .p2align 4 +-L(8x_return_vec_2): +- subq $VEC_SIZE, %rdx +-L(8x_return_vec_3): +- tzcntl %eax, %eax ++ .p2align 4,, 10 ++L(return_vec_1_end): ++ /* Use bsf to save code size. This is necessary to have ++ L(one_or_less) fit in aligning bytes between. */ ++ bsfl %eax, %eax ++ addl %edx, %eax + # ifdef USE_AS_WMEMCMP +- leaq (%rdx, %rax, CHAR_SIZE), %rax +- movl (VEC_SIZE * 3)(%rax), %ecx ++ movl -(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %ecx + xorl %edx, %edx +- cmpl (VEC_SIZE * 3)(%rsi, %rax), %ecx ++ cmpl -(VEC_SIZE * 2)(%rsi, %rax, CHAR_SIZE), %ecx + setg %dl + leal -1(%rdx, %rdx), %eax + # else +- addq %rdx, %rax +- movzbl (VEC_SIZE * 3)(%rsi, %rax), %ecx +- movzbl (VEC_SIZE * 3)(%rax), %eax ++ movzbl -(VEC_SIZE * 2)(%rsi, %rax), %ecx ++ movzbl -(VEC_SIZE * 2)(%rdi, %rax), %eax + subl %ecx, %eax + # endif + ret + ++ /* NB: L(one_or_less) fits in alignment padding between ++ L(return_vec_1_end) and L(return_vec_0_end). */ ++# ifdef USE_AS_WMEMCMP ++L(one_or_less): ++ jb L(zero) ++ movl (%rdi), %ecx ++ xorl %edx, %edx ++ cmpl (%rsi), %ecx ++ je L(zero) ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++ ret ++# else ++L(one_or_less): ++ jb L(zero) ++ movzbl (%rsi), %ecx ++ movzbl (%rdi), %eax ++ subl %ecx, %eax ++ ret ++# endif ++L(zero): ++ xorl %eax, %eax ++ ret ++ + .p2align 4 + L(return_vec_0_end): + tzcntl %eax, %eax +@@ -412,23 +453,56 @@ L(return_vec_0_end): + ret + + .p2align 4 +-L(return_vec_1_end): ++L(less_vec): ++ /* Check if one or less CHAR. This is necessary for size == 0 ++ but is also faster for size == CHAR_SIZE. */ ++ cmpl $1, %edx ++ jbe L(one_or_less) ++ ++ /* Check if loading one VEC from either s1 or s2 could cause a ++ page cross. This can have false positives but is by far the ++ fastest method. */ ++ movl %edi, %eax ++ orl %esi, %eax ++ andl $(PAGE_SIZE - 1), %eax ++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax ++ jg L(page_cross_less_vec) ++ ++ /* No page cross possible. */ ++ VMOVU (%rsi), %YMM2 ++ VPCMP $4, (%rdi), %YMM2, %k1 ++ kmovd %k1, %eax ++ /* Check if any matches where in bounds. Intentionally not ++ storing result in eax to limit dependency chain if it goes to ++ L(return_vec_0_lv). */ ++ bzhil %edx, %eax, %edx ++ jnz L(return_vec_0_lv) ++ xorl %eax, %eax ++ ret ++ ++ /* Essentially duplicate of L(return_vec_0). Ends up not costing ++ any code as shrinks L(less_vec) by allowing 2-byte encoding of ++ the jump and ends up fitting in aligning bytes. As well fits on ++ same cache line as L(less_vec) so also saves a line from having ++ to be fetched on cold calls to memcmp. */ ++ .p2align 4,, 4 ++L(return_vec_0_lv): + tzcntl %eax, %eax +- addl %edx, %eax + # ifdef USE_AS_WMEMCMP +- movl -(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %ecx ++ movl (%rdi, %rax, CHAR_SIZE), %ecx + xorl %edx, %edx +- cmpl -(VEC_SIZE * 2)(%rsi, %rax, CHAR_SIZE), %ecx ++ cmpl (%rsi, %rax, CHAR_SIZE), %ecx ++ /* NB: no partial register stall here because xorl zero idiom ++ above. */ + setg %dl + leal -1(%rdx, %rdx), %eax + # else +- movzbl -(VEC_SIZE * 2)(%rsi, %rax), %ecx +- movzbl -(VEC_SIZE * 2)(%rdi, %rax), %eax ++ movzbl (%rsi, %rax), %ecx ++ movzbl (%rdi, %rax), %eax + subl %ecx, %eax + # endif + ret + +- + .p2align 4 + L(page_cross_less_vec): + /* if USE_AS_WMEMCMP it can only be 0, 4, 8, 12, 16, 20, 24, 28 +@@ -439,108 +513,84 @@ L(page_cross_less_vec): + cmpl $8, %edx + jae L(between_8_15) + cmpl $4, %edx +- jae L(between_4_7) +-L(between_2_3): +- /* Load as big endian to avoid branches. */ +- movzwl (%rdi), %eax +- movzwl (%rsi), %ecx +- shll $8, %eax +- shll $8, %ecx +- bswap %eax +- bswap %ecx +- movzbl -1(%rdi, %rdx), %edi +- movzbl -1(%rsi, %rdx), %esi +- orl %edi, %eax +- orl %esi, %ecx +- /* Subtraction is okay because the upper 8 bits are zero. */ +- subl %ecx, %eax +- ret +- .p2align 4 +-L(one_or_less): +- jb L(zero) +- movzbl (%rsi), %ecx +- movzbl (%rdi), %eax +- subl %ecx, %eax ++ jb L(between_2_3) ++ ++ /* Load as big endian with overlapping movbe to avoid branches. ++ */ ++ movbe (%rdi), %eax ++ movbe (%rsi), %ecx ++ shlq $32, %rax ++ shlq $32, %rcx ++ movbe -4(%rdi, %rdx), %edi ++ movbe -4(%rsi, %rdx), %esi ++ orq %rdi, %rax ++ orq %rsi, %rcx ++ subq %rcx, %rax ++ /* edx is guranteed to be positive int32 in range [4, 7]. */ ++ cmovne %edx, %eax ++ /* ecx is -1 if rcx > rax. Otherwise 0. */ ++ sbbl %ecx, %ecx ++ /* If rcx > rax, then ecx is 0 and eax is positive. If rcx == ++ rax then eax and ecx are zero. If rax < rax then ecx is -1 so ++ eax doesn't matter. */ ++ orl %ecx, %eax + ret + +- .p2align 4 ++ .p2align 4,, 8 + L(between_8_15): + # endif + /* If USE_AS_WMEMCMP fall through into 8-15 byte case. */ +- vmovq (%rdi), %XMM1 +- vmovq (%rsi), %XMM2 +- VPCMP $4, %XMM1, %XMM2, %k1 ++ vmovq (%rdi), %xmm1 ++ vmovq (%rsi), %xmm2 ++ VPCMP $4, %xmm1, %xmm2, %k1 + kmovd %k1, %eax + testl %eax, %eax +- jnz L(return_vec_0) ++ jnz L(return_vec_0_lv) + /* Use overlapping loads to avoid branches. */ +- leaq -8(%rdi, %rdx, CHAR_SIZE), %rdi +- leaq -8(%rsi, %rdx, CHAR_SIZE), %rsi +- vmovq (%rdi), %XMM1 +- vmovq (%rsi), %XMM2 +- VPCMP $4, %XMM1, %XMM2, %k1 ++ vmovq -8(%rdi, %rdx, CHAR_SIZE), %xmm1 ++ vmovq -8(%rsi, %rdx, CHAR_SIZE), %xmm2 ++ VPCMP $4, %xmm1, %xmm2, %k1 ++ addl $(CHAR_PER_VEC - (8 / CHAR_SIZE)), %edx + kmovd %k1, %eax + testl %eax, %eax +- jnz L(return_vec_0) +- ret +- +- .p2align 4 +-L(zero): +- xorl %eax, %eax ++ jnz L(return_vec_0_end) + ret + +- .p2align 4 ++ .p2align 4,, 8 + L(between_16_31): + /* From 16 to 31 bytes. No branch when size == 16. */ +- VMOVU (%rsi), %XMM2 +- VPCMP $4, (%rdi), %XMM2, %k1 ++ ++ /* Use movups to save code size. */ ++ movups (%rsi), %xmm2 ++ VPCMP $4, (%rdi), %xmm2, %k1 + kmovd %k1, %eax + testl %eax, %eax +- jnz L(return_vec_0) +- ++ jnz L(return_vec_0_lv) + /* Use overlapping loads to avoid branches. */ +- +- VMOVU -16(%rsi, %rdx, CHAR_SIZE), %XMM2 +- leaq -16(%rdi, %rdx, CHAR_SIZE), %rdi +- leaq -16(%rsi, %rdx, CHAR_SIZE), %rsi +- VPCMP $4, (%rdi), %XMM2, %k1 ++ movups -16(%rsi, %rdx, CHAR_SIZE), %xmm2 ++ VPCMP $4, -16(%rdi, %rdx, CHAR_SIZE), %xmm2, %k1 ++ addl $(CHAR_PER_VEC - (16 / CHAR_SIZE)), %edx + kmovd %k1, %eax + testl %eax, %eax +- jnz L(return_vec_0) +- ret +- +-# ifdef USE_AS_WMEMCMP +- .p2align 4 +-L(one_or_less): +- jb L(zero) +- movl (%rdi), %ecx +- xorl %edx, %edx +- cmpl (%rsi), %ecx +- je L(zero) +- setg %dl +- leal -1(%rdx, %rdx), %eax ++ jnz L(return_vec_0_end) + ret +-# else + +- .p2align 4 +-L(between_4_7): +- /* Load as big endian with overlapping movbe to avoid branches. +- */ +- movbe (%rdi), %eax +- movbe (%rsi), %ecx +- shlq $32, %rax +- shlq $32, %rcx +- movbe -4(%rdi, %rdx), %edi +- movbe -4(%rsi, %rdx), %esi +- orq %rdi, %rax +- orq %rsi, %rcx +- subq %rcx, %rax +- jz L(zero_4_7) +- sbbl %eax, %eax +- orl $1, %eax +-L(zero_4_7): ++# ifndef USE_AS_WMEMCMP ++L(between_2_3): ++ /* Load as big endian to avoid branches. */ ++ movzwl (%rdi), %eax ++ movzwl (%rsi), %ecx ++ shll $8, %eax ++ shll $8, %ecx ++ bswap %eax ++ bswap %ecx ++ movzbl -1(%rdi, %rdx), %edi ++ movzbl -1(%rsi, %rdx), %esi ++ orl %edi, %eax ++ orl %esi, %ecx ++ /* Subtraction is okay because the upper 8 bits are zero. */ ++ subl %ecx, %eax + ret + # endif +- + END (MEMCMP) + #endif diff --git a/glibc-upstream-2.34-176.patch b/glibc-upstream-2.34-176.patch new file mode 100644 index 0000000..74b18ab --- /dev/null +++ b/glibc-upstream-2.34-176.patch @@ -0,0 +1,497 @@ +commit 6d18a93dbbde2958001d65dff3080beed7ae675a +Author: Noah Goldstein +Date: Mon Sep 20 16:20:15 2021 -0500 + + x86: Optimize memset-vec-unaligned-erms.S + + No bug. + + Optimization are + + 1. change control flow for L(more_2x_vec) to fall through to loop and + jump for L(less_4x_vec) and L(less_8x_vec). This uses less code + size and saves jumps for length > 4x VEC_SIZE. + + 2. For EVEX/AVX512 move L(less_vec) closer to entry. + + 3. Avoid complex address mode for length > 2x VEC_SIZE + + 4. Slightly better aligning code for the loop from the perspective of + code size and uops. + + 5. Align targets so they make full use of their fetch block and if + possible cache line. + + 6. Try and reduce total number of icache lines that will need to be + pulled in for a given length. + + 7. Include "local" version of stosb target. For AVX2/EVEX/AVX512 + jumping to the stosb target in the sse2 code section will almost + certainly be to a new page. The new version does increase code size + marginally by duplicating the target but should get better iTLB + behavior as a result. + + test-memset, test-wmemset, and test-bzero are all passing. + + Signed-off-by: Noah Goldstein + Reviewed-by: H.J. Lu + (cherry picked from commit e59ced238482fd71f3e493717f14f6507346741e) + +diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S +index 7d4a327eba29ecb4..0137eba4cdd9f830 100644 +--- a/sysdeps/x86_64/memset.S ++++ b/sysdeps/x86_64/memset.S +@@ -18,13 +18,15 @@ + . */ + + #include ++#define USE_WITH_SSE2 1 + + #define VEC_SIZE 16 ++#define MOV_SIZE 3 ++#define RET_SIZE 1 ++ + #define VEC(i) xmm##i +-/* Don't use movups and movaps since it will get larger nop paddings for +- alignment. */ +-#define VMOVU movdqu +-#define VMOVA movdqa ++#define VMOVU movups ++#define VMOVA movaps + + #define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + movd d, %xmm0; \ +diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +index ae0860f36a47d594..1af668af0aeda59e 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +@@ -1,8 +1,14 @@ + #if IS_IN (libc) ++# define USE_WITH_AVX2 1 ++ + # define VEC_SIZE 32 ++# define MOV_SIZE 4 ++# define RET_SIZE 4 ++ + # define VEC(i) ymm##i +-# define VMOVU vmovdqu +-# define VMOVA vmovdqa ++ ++# define VMOVU vmovdqu ++# define VMOVA vmovdqa + + # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + vmovd d, %xmm0; \ +diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +index 8ad842fc2f140527..f14d6f8493c21a36 100644 +--- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +@@ -1,11 +1,18 @@ + #if IS_IN (libc) ++# define USE_WITH_AVX512 1 ++ + # define VEC_SIZE 64 ++# define MOV_SIZE 6 ++# define RET_SIZE 1 ++ + # define XMM0 xmm16 + # define YMM0 ymm16 + # define VEC0 zmm16 + # define VEC(i) VEC##i +-# define VMOVU vmovdqu64 +-# define VMOVA vmovdqa64 ++ ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 ++ + # define VZEROUPPER + + # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ +diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +index 640f092903302ad0..64b09e77cc20cc42 100644 +--- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S +@@ -1,11 +1,18 @@ + #if IS_IN (libc) ++# define USE_WITH_EVEX 1 ++ + # define VEC_SIZE 32 ++# define MOV_SIZE 6 ++# define RET_SIZE 1 ++ + # define XMM0 xmm16 + # define YMM0 ymm16 + # define VEC0 ymm16 + # define VEC(i) VEC##i +-# define VMOVU vmovdqu64 +-# define VMOVA vmovdqa64 ++ ++# define VMOVU vmovdqu64 ++# define VMOVA vmovdqa64 ++ + # define VZEROUPPER + + # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ +diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +index ff196844a093dc3b..e723413a664c088f 100644 +--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +@@ -63,8 +63,27 @@ + # endif + #endif + ++#if VEC_SIZE == 64 ++# define LOOP_4X_OFFSET (VEC_SIZE * 4) ++#else ++# define LOOP_4X_OFFSET (0) ++#endif ++ ++#if defined USE_WITH_EVEX || defined USE_WITH_AVX512 ++# define END_REG rcx ++# define LOOP_REG rdi ++#else ++# define END_REG rdi ++# define LOOP_REG rdx ++#endif ++ + #define PAGE_SIZE 4096 + ++/* Macro to calculate size of small memset block for aligning ++ purposes. */ ++#define SMALL_MEMSET_ALIGN(mov_sz, ret_sz) (2 * (mov_sz) + (ret_sz) + 1) ++ ++ + #ifndef SECTION + # error SECTION is not defined! + #endif +@@ -74,6 +93,7 @@ + ENTRY (__bzero) + mov %RDI_LP, %RAX_LP /* Set return value. */ + mov %RSI_LP, %RDX_LP /* Set n. */ ++ xorl %esi, %esi + pxor %XMM0, %XMM0 + jmp L(entry_from_bzero) + END (__bzero) +@@ -158,7 +178,7 @@ ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms)) + END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms)) + # endif + +-ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms)) ++ENTRY_P2ALIGN (MEMSET_SYMBOL (__memset, unaligned_erms), 6) + MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi) + # ifdef __ILP32__ + /* Clear the upper 32 bits. */ +@@ -168,75 +188,43 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms)) + jb L(less_vec) + cmp $(VEC_SIZE * 2), %RDX_LP + ja L(stosb_more_2x_vec) +- /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ +- VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) +- VMOVU %VEC(0), (%rdi) ++ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. ++ */ ++ VMOVU %VEC(0), (%rax) ++ VMOVU %VEC(0), -VEC_SIZE(%rax, %rdx) + VZEROUPPER_RETURN +- +- .p2align 4 +-L(stosb_more_2x_vec): +- cmp __x86_rep_stosb_threshold(%rip), %RDX_LP +- ja L(stosb) +-#else +- .p2align 4 + #endif +-L(more_2x_vec): +- /* Stores to first 2x VEC before cmp as any path forward will +- require it. */ +- VMOVU %VEC(0), (%rdi) +- VMOVU %VEC(0), VEC_SIZE(%rdi) +- cmpq $(VEC_SIZE * 4), %rdx +- ja L(loop_start) +- VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx) +- VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) +-L(return): +-#if VEC_SIZE > 16 +- ZERO_UPPER_VEC_REGISTERS_RETURN ++ ++ .p2align 4,, 10 ++L(last_2x_vec): ++#ifdef USE_LESS_VEC_MASK_STORE ++ VMOVU %VEC(0), (VEC_SIZE * 2 + LOOP_4X_OFFSET)(%rcx) ++ VMOVU %VEC(0), (VEC_SIZE * 3 + LOOP_4X_OFFSET)(%rcx) + #else +- ret ++ VMOVU %VEC(0), (VEC_SIZE * -2)(%rdi) ++ VMOVU %VEC(0), (VEC_SIZE * -1)(%rdi) + #endif ++ VZEROUPPER_RETURN + +-L(loop_start): +- VMOVU %VEC(0), (VEC_SIZE * 2)(%rdi) +- VMOVU %VEC(0), (VEC_SIZE * 3)(%rdi) +- cmpq $(VEC_SIZE * 8), %rdx +- jbe L(loop_end) +- andq $-(VEC_SIZE * 2), %rdi +- subq $-(VEC_SIZE * 4), %rdi +- leaq -(VEC_SIZE * 4)(%rax, %rdx), %rcx +- .p2align 4 +-L(loop): +- VMOVA %VEC(0), (%rdi) +- VMOVA %VEC(0), VEC_SIZE(%rdi) +- VMOVA %VEC(0), (VEC_SIZE * 2)(%rdi) +- VMOVA %VEC(0), (VEC_SIZE * 3)(%rdi) +- subq $-(VEC_SIZE * 4), %rdi +- cmpq %rcx, %rdi +- jb L(loop) +-L(loop_end): +- /* NB: rax is set as ptr in MEMSET_VDUP_TO_VEC0_AND_SET_RETURN. +- rdx as length is also unchanged. */ +- VMOVU %VEC(0), -(VEC_SIZE * 4)(%rax, %rdx) +- VMOVU %VEC(0), -(VEC_SIZE * 3)(%rax, %rdx) +- VMOVU %VEC(0), -(VEC_SIZE * 2)(%rax, %rdx) +- VMOVU %VEC(0), -VEC_SIZE(%rax, %rdx) +- VZEROUPPER_SHORT_RETURN +- +- .p2align 4 ++ /* If have AVX512 mask instructions put L(less_vec) close to ++ entry as it doesn't take much space and is likely a hot target. ++ */ ++#ifdef USE_LESS_VEC_MASK_STORE ++ .p2align 4,, 10 + L(less_vec): + /* Less than 1 VEC. */ + # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 + # error Unsupported VEC_SIZE! + # endif +-# ifdef USE_LESS_VEC_MASK_STORE + /* Clear high bits from edi. Only keeping bits relevant to page + cross check. Note that we are using rax which is set in +- MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out. +- */ ++ MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out. */ + andl $(PAGE_SIZE - 1), %edi +- /* Check if VEC_SIZE store cross page. Mask stores suffer serious +- performance degradation when it has to fault supress. */ ++ /* Check if VEC_SIZE store cross page. Mask stores suffer ++ serious performance degradation when it has to fault supress. ++ */ + cmpl $(PAGE_SIZE - VEC_SIZE), %edi ++ /* This is generally considered a cold target. */ + ja L(cross_page) + # if VEC_SIZE > 32 + movq $-1, %rcx +@@ -247,58 +235,185 @@ L(less_vec): + bzhil %edx, %ecx, %ecx + kmovd %ecx, %k1 + # endif +- vmovdqu8 %VEC(0), (%rax) {%k1} ++ vmovdqu8 %VEC(0), (%rax){%k1} + VZEROUPPER_RETURN + ++# if defined USE_MULTIARCH && IS_IN (libc) ++ /* Include L(stosb_local) here if including L(less_vec) between ++ L(stosb_more_2x_vec) and ENTRY. This is to cache align the ++ L(stosb_more_2x_vec) target. */ ++ .p2align 4,, 10 ++L(stosb_local): ++ movzbl %sil, %eax ++ mov %RDX_LP, %RCX_LP ++ mov %RDI_LP, %RDX_LP ++ rep stosb ++ mov %RDX_LP, %RAX_LP ++ VZEROUPPER_RETURN ++# endif ++#endif ++ ++#if defined USE_MULTIARCH && IS_IN (libc) + .p2align 4 +-L(cross_page): ++L(stosb_more_2x_vec): ++ cmp __x86_rep_stosb_threshold(%rip), %RDX_LP ++ ja L(stosb_local) ++#endif ++ /* Fallthrough goes to L(loop_4x_vec). Tests for memset (2x, 4x] ++ and (4x, 8x] jump to target. */ ++L(more_2x_vec): ++ ++ /* Two different methods of setting up pointers / compare. The ++ two methods are based on the fact that EVEX/AVX512 mov ++ instructions take more bytes then AVX2/SSE2 mov instructions. As ++ well that EVEX/AVX512 machines also have fast LEA_BID. Both ++ setup and END_REG to avoid complex address mode. For EVEX/AVX512 ++ this saves code size and keeps a few targets in one fetch block. ++ For AVX2/SSE2 this helps prevent AGU bottlenecks. */ ++#if defined USE_WITH_EVEX || defined USE_WITH_AVX512 ++ /* If EVEX/AVX512 compute END_REG - (VEC_SIZE * 4 + ++ LOOP_4X_OFFSET) with LEA_BID. */ ++ ++ /* END_REG is rcx for EVEX/AVX512. */ ++ leaq -(VEC_SIZE * 4 + LOOP_4X_OFFSET)(%rdi, %rdx), %END_REG ++#endif ++ ++ /* Stores to first 2x VEC before cmp as any path forward will ++ require it. */ ++ VMOVU %VEC(0), (%rax) ++ VMOVU %VEC(0), VEC_SIZE(%rax) ++ ++ ++#if !(defined USE_WITH_EVEX || defined USE_WITH_AVX512) ++ /* If AVX2/SSE2 compute END_REG (rdi) with ALU. */ ++ addq %rdx, %END_REG ++#endif ++ ++ cmpq $(VEC_SIZE * 4), %rdx ++ jbe L(last_2x_vec) ++ ++ /* Store next 2x vec regardless. */ ++ VMOVU %VEC(0), (VEC_SIZE * 2)(%rax) ++ VMOVU %VEC(0), (VEC_SIZE * 3)(%rax) ++ ++ ++#if defined USE_WITH_EVEX || defined USE_WITH_AVX512 ++ /* If LOOP_4X_OFFSET don't readjust LOOP_REG (rdi), just add ++ extra offset to addresses in loop. Used for AVX512 to save space ++ as no way to get (VEC_SIZE * 4) in imm8. */ ++# if LOOP_4X_OFFSET == 0 ++ subq $-(VEC_SIZE * 4), %LOOP_REG + # endif +-# if VEC_SIZE > 32 +- cmpb $32, %dl +- jae L(between_32_63) ++ /* Avoid imm32 compare here to save code size. */ ++ cmpq %rdi, %rcx ++#else ++ addq $-(VEC_SIZE * 4), %END_REG ++ cmpq $(VEC_SIZE * 8), %rdx ++#endif ++ jbe L(last_4x_vec) ++#if !(defined USE_WITH_EVEX || defined USE_WITH_AVX512) ++ /* Set LOOP_REG (rdx). */ ++ leaq (VEC_SIZE * 4)(%rax), %LOOP_REG ++#endif ++ /* Align dst for loop. */ ++ andq $(VEC_SIZE * -2), %LOOP_REG ++ .p2align 4 ++L(loop): ++ VMOVA %VEC(0), LOOP_4X_OFFSET(%LOOP_REG) ++ VMOVA %VEC(0), (VEC_SIZE + LOOP_4X_OFFSET)(%LOOP_REG) ++ VMOVA %VEC(0), (VEC_SIZE * 2 + LOOP_4X_OFFSET)(%LOOP_REG) ++ VMOVA %VEC(0), (VEC_SIZE * 3 + LOOP_4X_OFFSET)(%LOOP_REG) ++ subq $-(VEC_SIZE * 4), %LOOP_REG ++ cmpq %END_REG, %LOOP_REG ++ jb L(loop) ++ .p2align 4,, MOV_SIZE ++L(last_4x_vec): ++ VMOVU %VEC(0), LOOP_4X_OFFSET(%END_REG) ++ VMOVU %VEC(0), (VEC_SIZE + LOOP_4X_OFFSET)(%END_REG) ++ VMOVU %VEC(0), (VEC_SIZE * 2 + LOOP_4X_OFFSET)(%END_REG) ++ VMOVU %VEC(0), (VEC_SIZE * 3 + LOOP_4X_OFFSET)(%END_REG) ++L(return): ++#if VEC_SIZE > 16 ++ ZERO_UPPER_VEC_REGISTERS_RETURN ++#else ++ ret ++#endif ++ ++ .p2align 4,, 10 ++#ifndef USE_LESS_VEC_MASK_STORE ++# if defined USE_MULTIARCH && IS_IN (libc) ++ /* If no USE_LESS_VEC_MASK put L(stosb_local) here. Will be in ++ range for 2-byte jump encoding. */ ++L(stosb_local): ++ movzbl %sil, %eax ++ mov %RDX_LP, %RCX_LP ++ mov %RDI_LP, %RDX_LP ++ rep stosb ++ mov %RDX_LP, %RAX_LP ++ VZEROUPPER_RETURN + # endif +-# if VEC_SIZE > 16 +- cmpb $16, %dl ++ /* Define L(less_vec) only if not otherwise defined. */ ++ .p2align 4 ++L(less_vec): ++#endif ++L(cross_page): ++#if VEC_SIZE > 32 ++ cmpl $32, %edx ++ jae L(between_32_63) ++#endif ++#if VEC_SIZE > 16 ++ cmpl $16, %edx + jae L(between_16_31) +-# endif +- MOVQ %XMM0, %rcx +- cmpb $8, %dl ++#endif ++ MOVQ %XMM0, %rdi ++ cmpl $8, %edx + jae L(between_8_15) +- cmpb $4, %dl ++ cmpl $4, %edx + jae L(between_4_7) +- cmpb $1, %dl ++ cmpl $1, %edx + ja L(between_2_3) +- jb 1f +- movb %cl, (%rax) +-1: ++ jb L(return) ++ movb %sil, (%rax) + VZEROUPPER_RETURN +-# if VEC_SIZE > 32 ++ ++ /* Align small targets only if not doing so would cross a fetch ++ line. */ ++#if VEC_SIZE > 32 ++ .p2align 4,, SMALL_MEMSET_ALIGN(MOV_SIZE, RET_SIZE) + /* From 32 to 63. No branch when size == 32. */ + L(between_32_63): +- VMOVU %YMM0, -32(%rax,%rdx) + VMOVU %YMM0, (%rax) ++ VMOVU %YMM0, -32(%rax, %rdx) + VZEROUPPER_RETURN +-# endif +-# if VEC_SIZE > 16 +- /* From 16 to 31. No branch when size == 16. */ ++#endif ++ ++#if VEC_SIZE >= 32 ++ .p2align 4,, SMALL_MEMSET_ALIGN(MOV_SIZE, RET_SIZE) + L(between_16_31): +- VMOVU %XMM0, -16(%rax,%rdx) ++ /* From 16 to 31. No branch when size == 16. */ + VMOVU %XMM0, (%rax) ++ VMOVU %XMM0, -16(%rax, %rdx) + VZEROUPPER_RETURN +-# endif +- /* From 8 to 15. No branch when size == 8. */ ++#endif ++ ++ .p2align 4,, SMALL_MEMSET_ALIGN(3, RET_SIZE) + L(between_8_15): +- movq %rcx, -8(%rax,%rdx) +- movq %rcx, (%rax) ++ /* From 8 to 15. No branch when size == 8. */ ++ movq %rdi, (%rax) ++ movq %rdi, -8(%rax, %rdx) + VZEROUPPER_RETURN ++ ++ .p2align 4,, SMALL_MEMSET_ALIGN(2, RET_SIZE) + L(between_4_7): + /* From 4 to 7. No branch when size == 4. */ +- movl %ecx, -4(%rax,%rdx) +- movl %ecx, (%rax) ++ movl %edi, (%rax) ++ movl %edi, -4(%rax, %rdx) + VZEROUPPER_RETURN ++ ++ .p2align 4,, SMALL_MEMSET_ALIGN(3, RET_SIZE) + L(between_2_3): + /* From 2 to 3. No branch when size == 2. */ +- movw %cx, -2(%rax,%rdx) +- movw %cx, (%rax) ++ movw %di, (%rax) ++ movb %dil, -1(%rax, %rdx) + VZEROUPPER_RETURN + END (MEMSET_SYMBOL (__memset, unaligned_erms)) diff --git a/glibc-upstream-2.34-177.patch b/glibc-upstream-2.34-177.patch new file mode 100644 index 0000000..112bcad --- /dev/null +++ b/glibc-upstream-2.34-177.patch @@ -0,0 +1,40 @@ +commit baf3ece63453adac59c5688930324a78ced5b2e4 +Author: Noah Goldstein +Date: Sat Oct 23 01:26:47 2021 -0400 + + x86: Replace sse2 instructions with avx in memcmp-evex-movbe.S + + This commit replaces two usages of SSE2 'movups' with AVX 'vmovdqu'. + + it could potentially be dangerous to use SSE2 if this function is ever + called without using 'vzeroupper' beforehand. While compilers appear + to use 'vzeroupper' before function calls if AVX2 has been used, using + SSE2 here is more brittle. Since it is not absolutely necessary it + should be avoided. + + It costs 2-extra bytes but the extra bytes should only eat into + alignment padding. + Reviewed-by: H.J. Lu + + (cherry picked from commit bad852b61b79503fcb3c5fc379c70f768df3e1fb) + +diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +index 2761b54f2e7dea9f..640f6757fac8a356 100644 +--- a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S ++++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +@@ -561,13 +561,13 @@ L(between_16_31): + /* From 16 to 31 bytes. No branch when size == 16. */ + + /* Use movups to save code size. */ +- movups (%rsi), %xmm2 ++ vmovdqu (%rsi), %xmm2 + VPCMP $4, (%rdi), %xmm2, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(return_vec_0_lv) + /* Use overlapping loads to avoid branches. */ +- movups -16(%rsi, %rdx, CHAR_SIZE), %xmm2 ++ vmovdqu -16(%rsi, %rdx, CHAR_SIZE), %xmm2 + VPCMP $4, -16(%rdi, %rdx, CHAR_SIZE), %xmm2, %k1 + addl $(CHAR_PER_VEC - (16 / CHAR_SIZE)), %edx + kmovd %k1, %eax diff --git a/glibc-upstream-2.34-178.patch b/glibc-upstream-2.34-178.patch new file mode 100644 index 0000000..1540e2f --- /dev/null +++ b/glibc-upstream-2.34-178.patch @@ -0,0 +1,690 @@ +commit f35ad30da4880a1574996df0674986ecf82fa7ae +Author: H.J. Lu +Date: Fri Oct 29 12:40:20 2021 -0700 + + x86-64: Improve EVEX strcmp with masked load + + In strcmp-evex.S, to compare 2 32-byte strings, replace + + VMOVU (%rdi, %rdx), %YMM0 + VMOVU (%rsi, %rdx), %YMM1 + /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ + VPCMP $4, %YMM0, %YMM1, %k0 + VPCMP $0, %YMMZERO, %YMM0, %k1 + VPCMP $0, %YMMZERO, %YMM1, %k2 + /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + kmovd %k1, %ecx + testl %ecx, %ecx + jne L(last_vector) + + with + + VMOVU (%rdi, %rdx), %YMM0 + VPTESTM %YMM0, %YMM0, %k2 + /* Each bit cleared in K1 represents a mismatch or a null CHAR + in YMM0 and 32 bytes at (%rsi, %rdx). */ + VPCMP $0, (%rsi, %rdx), %YMM0, %k1{%k2} + kmovd %k1, %ecx + incl %ecx + jne L(last_vector) + + It makes EVEX strcmp faster than AVX2 strcmp by up to 40% on Tiger Lake + and Ice Lake. + + Co-Authored-By: Noah Goldstein + (cherry picked from commit c46e9afb2df5fc9e39ff4d13777e4b4c26e04e55) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S +index d5aa6daa46c7ed25..82f12ac89bcae20b 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-evex.S ++++ b/sysdeps/x86_64/multiarch/strcmp-evex.S +@@ -41,6 +41,8 @@ + # ifdef USE_AS_WCSCMP + /* Compare packed dwords. */ + # define VPCMP vpcmpd ++# define VPMINU vpminud ++# define VPTESTM vptestmd + # define SHIFT_REG32 r8d + # define SHIFT_REG64 r8 + /* 1 dword char == 4 bytes. */ +@@ -48,6 +50,8 @@ + # else + /* Compare packed bytes. */ + # define VPCMP vpcmpb ++# define VPMINU vpminub ++# define VPTESTM vptestmb + # define SHIFT_REG32 ecx + # define SHIFT_REG64 rcx + /* 1 byte char == 1 byte. */ +@@ -67,6 +71,9 @@ + # define YMM5 ymm22 + # define YMM6 ymm23 + # define YMM7 ymm24 ++# define YMM8 ymm25 ++# define YMM9 ymm26 ++# define YMM10 ymm27 + + /* Warning! + wcscmp/wcsncmp have to use SIGNED comparison for elements. +@@ -76,7 +83,7 @@ + /* The main idea of the string comparison (byte or dword) using 256-bit + EVEX instructions consists of comparing (VPCMP) two ymm vectors. The + latter can be on either packed bytes or dwords depending on +- USE_AS_WCSCMP. In order to check the null char, algorithm keeps the ++ USE_AS_WCSCMP. In order to check the null CHAR, algorithm keeps the + matched bytes/dwords, requiring 5 EVEX instructions (3 VPCMP and 2 + KORD). In general, the costs of comparing VEC_SIZE bytes (32-bytes) + are 3 VPCMP and 2 KORD instructions, together with VMOVU and ktestd +@@ -123,27 +130,21 @@ ENTRY (STRCMP) + jg L(cross_page) + /* Start comparing 4 vectors. */ + VMOVU (%rdi), %YMM0 +- VMOVU (%rsi), %YMM1 + +- /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ +- VPCMP $4, %YMM0, %YMM1, %k0 ++ /* Each bit set in K2 represents a non-null CHAR in YMM0. */ ++ VPTESTM %YMM0, %YMM0, %k2 + +- /* Check for NULL in YMM0. */ +- VPCMP $0, %YMMZERO, %YMM0, %k1 +- /* Check for NULL in YMM1. */ +- VPCMP $0, %YMMZERO, %YMM1, %k2 +- /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ +- kord %k1, %k2, %k1 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM0 and 32 bytes at (%rsi). */ ++ VPCMP $0, (%rsi), %YMM0, %k1{%k2} + +- /* Each bit in K1 represents: +- 1. A mismatch in YMM0 and YMM1. Or +- 2. A NULL in YMM0 or YMM1. +- */ +- kord %k0, %k1, %k1 +- +- ktestd %k1, %k1 +- je L(next_3_vectors) + kmovd %k1, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif ++ je L(next_3_vectors) + tzcntl %ecx, %edx + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +@@ -172,9 +173,7 @@ L(return): + # endif + ret + +- .p2align 4 + L(return_vec_size): +- kmovd %k1, %ecx + tzcntl %ecx, %edx + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +@@ -210,9 +209,7 @@ L(return_vec_size): + # endif + ret + +- .p2align 4 + L(return_2_vec_size): +- kmovd %k1, %ecx + tzcntl %ecx, %edx + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +@@ -248,9 +245,7 @@ L(return_2_vec_size): + # endif + ret + +- .p2align 4 + L(return_3_vec_size): +- kmovd %k1, %ecx + tzcntl %ecx, %edx + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +@@ -289,43 +284,45 @@ L(return_3_vec_size): + .p2align 4 + L(next_3_vectors): + VMOVU VEC_SIZE(%rdi), %YMM0 +- VMOVU VEC_SIZE(%rsi), %YMM1 +- /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ +- VPCMP $4, %YMM0, %YMM1, %k0 +- VPCMP $0, %YMMZERO, %YMM0, %k1 +- VPCMP $0, %YMMZERO, %YMM1, %k2 +- /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- kord %k0, %k1, %k1 +- ktestd %k1, %k1 ++ /* Each bit set in K2 represents a non-null CHAR in YMM0. */ ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM0 and 32 bytes at VEC_SIZE(%rsi). */ ++ VPCMP $0, VEC_SIZE(%rsi), %YMM0, %k1{%k2} ++ kmovd %k1, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + jne L(return_vec_size) + +- VMOVU (VEC_SIZE * 2)(%rdi), %YMM2 +- VMOVU (VEC_SIZE * 3)(%rdi), %YMM3 +- VMOVU (VEC_SIZE * 2)(%rsi), %YMM4 +- VMOVU (VEC_SIZE * 3)(%rsi), %YMM5 +- +- /* Each bit in K0 represents a mismatch in YMM2 and YMM4. */ +- VPCMP $4, %YMM2, %YMM4, %k0 +- VPCMP $0, %YMMZERO, %YMM2, %k1 +- VPCMP $0, %YMMZERO, %YMM4, %k2 +- /* Each bit in K1 represents a NULL in YMM2 or YMM4. */ +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- kord %k0, %k1, %k1 +- ktestd %k1, %k1 ++ VMOVU (VEC_SIZE * 2)(%rdi), %YMM0 ++ /* Each bit set in K2 represents a non-null CHAR in YMM0. */ ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM0 and 32 bytes at (VEC_SIZE * 2)(%rsi). */ ++ VPCMP $0, (VEC_SIZE * 2)(%rsi), %YMM0, %k1{%k2} ++ kmovd %k1, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + jne L(return_2_vec_size) + +- /* Each bit in K0 represents a mismatch in YMM3 and YMM5. */ +- VPCMP $4, %YMM3, %YMM5, %k0 +- VPCMP $0, %YMMZERO, %YMM3, %k1 +- VPCMP $0, %YMMZERO, %YMM5, %k2 +- /* Each bit in K1 represents a NULL in YMM3 or YMM5. */ +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- kord %k0, %k1, %k1 +- ktestd %k1, %k1 ++ VMOVU (VEC_SIZE * 3)(%rdi), %YMM0 ++ /* Each bit set in K2 represents a non-null CHAR in YMM0. */ ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM0 and 32 bytes at (VEC_SIZE * 2)(%rsi). */ ++ VPCMP $0, (VEC_SIZE * 3)(%rsi), %YMM0, %k1{%k2} ++ kmovd %k1, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + jne L(return_3_vec_size) + L(main_loop_header): + leaq (VEC_SIZE * 4)(%rdi), %rdx +@@ -375,56 +372,51 @@ L(back_to_loop): + VMOVA VEC_SIZE(%rax), %YMM2 + VMOVA (VEC_SIZE * 2)(%rax), %YMM4 + VMOVA (VEC_SIZE * 3)(%rax), %YMM6 +- VMOVU (%rdx), %YMM1 +- VMOVU VEC_SIZE(%rdx), %YMM3 +- VMOVU (VEC_SIZE * 2)(%rdx), %YMM5 +- VMOVU (VEC_SIZE * 3)(%rdx), %YMM7 +- +- VPCMP $4, %YMM0, %YMM1, %k0 +- VPCMP $0, %YMMZERO, %YMM0, %k1 +- VPCMP $0, %YMMZERO, %YMM1, %k2 +- kord %k1, %k2, %k1 +- /* Each bit in K4 represents a NULL or a mismatch in YMM0 and +- YMM1. */ +- kord %k0, %k1, %k4 +- +- VPCMP $4, %YMM2, %YMM3, %k0 +- VPCMP $0, %YMMZERO, %YMM2, %k1 +- VPCMP $0, %YMMZERO, %YMM3, %k2 +- kord %k1, %k2, %k1 +- /* Each bit in K5 represents a NULL or a mismatch in YMM2 and +- YMM3. */ +- kord %k0, %k1, %k5 +- +- VPCMP $4, %YMM4, %YMM5, %k0 +- VPCMP $0, %YMMZERO, %YMM4, %k1 +- VPCMP $0, %YMMZERO, %YMM5, %k2 +- kord %k1, %k2, %k1 +- /* Each bit in K6 represents a NULL or a mismatch in YMM4 and +- YMM5. */ +- kord %k0, %k1, %k6 +- +- VPCMP $4, %YMM6, %YMM7, %k0 +- VPCMP $0, %YMMZERO, %YMM6, %k1 +- VPCMP $0, %YMMZERO, %YMM7, %k2 +- kord %k1, %k2, %k1 +- /* Each bit in K7 represents a NULL or a mismatch in YMM6 and +- YMM7. */ +- kord %k0, %k1, %k7 +- +- kord %k4, %k5, %k0 +- kord %k6, %k7, %k1 +- +- /* Test each mask (32 bits) individually because for VEC_SIZE +- == 32 is not possible to OR the four masks and keep all bits +- in a 64-bit integer register, differing from SSE2 strcmp +- where ORing is possible. */ +- kortestd %k0, %k1 +- je L(loop) +- ktestd %k4, %k4 ++ ++ VPMINU %YMM0, %YMM2, %YMM8 ++ VPMINU %YMM4, %YMM6, %YMM9 ++ ++ /* A zero CHAR in YMM8 means that there is a null CHAR. */ ++ VPMINU %YMM8, %YMM9, %YMM8 ++ ++ /* Each bit set in K1 represents a non-null CHAR in YMM8. */ ++ VPTESTM %YMM8, %YMM8, %k1 ++ ++ /* (YMM ^ YMM): A non-zero CHAR represents a mismatch. */ ++ vpxorq (%rdx), %YMM0, %YMM1 ++ vpxorq VEC_SIZE(%rdx), %YMM2, %YMM3 ++ vpxorq (VEC_SIZE * 2)(%rdx), %YMM4, %YMM5 ++ vpxorq (VEC_SIZE * 3)(%rdx), %YMM6, %YMM7 ++ ++ vporq %YMM1, %YMM3, %YMM9 ++ vporq %YMM5, %YMM7, %YMM10 ++ ++ /* A non-zero CHAR in YMM9 represents a mismatch. */ ++ vporq %YMM9, %YMM10, %YMM9 ++ ++ /* Each bit cleared in K0 represents a mismatch or a null CHAR. */ ++ VPCMP $0, %YMMZERO, %YMM9, %k0{%k1} ++ kmovd %k0, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif ++ je L(loop) ++ ++ /* Each bit set in K1 represents a non-null CHAR in YMM0. */ ++ VPTESTM %YMM0, %YMM0, %k1 ++ /* Each bit cleared in K0 represents a mismatch or a null CHAR ++ in YMM0 and (%rdx). */ ++ VPCMP $0, %YMMZERO, %YMM1, %k0{%k1} ++ kmovd %k0, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + je L(test_vec) +- kmovd %k4, %edi +- tzcntl %edi, %ecx ++ tzcntl %ecx, %ecx + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %ecx +@@ -466,9 +458,18 @@ L(test_vec): + cmpq $VEC_SIZE, %r11 + jbe L(zero) + # endif +- ktestd %k5, %k5 ++ /* Each bit set in K1 represents a non-null CHAR in YMM2. */ ++ VPTESTM %YMM2, %YMM2, %k1 ++ /* Each bit cleared in K0 represents a mismatch or a null CHAR ++ in YMM2 and VEC_SIZE(%rdx). */ ++ VPCMP $0, %YMMZERO, %YMM3, %k0{%k1} ++ kmovd %k0, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + je L(test_2_vec) +- kmovd %k5, %ecx + tzcntl %ecx, %edi + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +@@ -512,9 +513,18 @@ L(test_2_vec): + cmpq $(VEC_SIZE * 2), %r11 + jbe L(zero) + # endif +- ktestd %k6, %k6 ++ /* Each bit set in K1 represents a non-null CHAR in YMM4. */ ++ VPTESTM %YMM4, %YMM4, %k1 ++ /* Each bit cleared in K0 represents a mismatch or a null CHAR ++ in YMM4 and (VEC_SIZE * 2)(%rdx). */ ++ VPCMP $0, %YMMZERO, %YMM5, %k0{%k1} ++ kmovd %k0, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + je L(test_3_vec) +- kmovd %k6, %ecx + tzcntl %ecx, %edi + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ +@@ -558,8 +568,18 @@ L(test_3_vec): + cmpq $(VEC_SIZE * 3), %r11 + jbe L(zero) + # endif +- kmovd %k7, %esi +- tzcntl %esi, %ecx ++ /* Each bit set in K1 represents a non-null CHAR in YMM6. */ ++ VPTESTM %YMM6, %YMM6, %k1 ++ /* Each bit cleared in K0 represents a mismatch or a null CHAR ++ in YMM6 and (VEC_SIZE * 3)(%rdx). */ ++ VPCMP $0, %YMMZERO, %YMM7, %k0{%k1} ++ kmovd %k0, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif ++ tzcntl %ecx, %ecx + # ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %ecx +@@ -615,39 +635,51 @@ L(loop_cross_page): + + VMOVU (%rax, %r10), %YMM2 + VMOVU VEC_SIZE(%rax, %r10), %YMM3 +- VMOVU (%rdx, %r10), %YMM4 +- VMOVU VEC_SIZE(%rdx, %r10), %YMM5 +- +- VPCMP $4, %YMM4, %YMM2, %k0 +- VPCMP $0, %YMMZERO, %YMM2, %k1 +- VPCMP $0, %YMMZERO, %YMM4, %k2 +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch in YMM2 and +- YMM4. */ +- kord %k0, %k1, %k1 +- +- VPCMP $4, %YMM5, %YMM3, %k3 +- VPCMP $0, %YMMZERO, %YMM3, %k4 +- VPCMP $0, %YMMZERO, %YMM5, %k5 +- kord %k4, %k5, %k4 +- /* Each bit in K3 represents a NULL or a mismatch in YMM3 and +- YMM5. */ +- kord %k3, %k4, %k3 ++ ++ /* Each bit set in K2 represents a non-null CHAR in YMM2. */ ++ VPTESTM %YMM2, %YMM2, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM2 and 32 bytes at (%rdx, %r10). */ ++ VPCMP $0, (%rdx, %r10), %YMM2, %k1{%k2} ++ kmovd %k1, %r9d ++ /* Don't use subl since it is the lower 16/32 bits of RDI ++ below. */ ++ notl %r9d ++# ifdef USE_AS_WCSCMP ++ /* Only last 8 bits are valid. */ ++ andl $0xff, %r9d ++# endif ++ ++ /* Each bit set in K4 represents a non-null CHAR in YMM3. */ ++ VPTESTM %YMM3, %YMM3, %k4 ++ /* Each bit cleared in K3 represents a mismatch or a null CHAR ++ in YMM3 and 32 bytes at VEC_SIZE(%rdx, %r10). */ ++ VPCMP $0, VEC_SIZE(%rdx, %r10), %YMM3, %k3{%k4} ++ kmovd %k3, %edi ++# ifdef USE_AS_WCSCMP ++ /* Don't use subl since it is the upper 8 bits of EDI below. */ ++ notl %edi ++ andl $0xff, %edi ++# else ++ incl %edi ++# endif + + # ifdef USE_AS_WCSCMP +- /* NB: Each bit in K1/K3 represents 4-byte element. */ +- kshiftlw $8, %k3, %k2 ++ /* NB: Each bit in EDI/R9D represents 4-byte element. */ ++ sall $8, %edi + /* NB: Divide shift count by 4 since each bit in K1 represent 4 + bytes. */ + movl %ecx, %SHIFT_REG32 + sarl $2, %SHIFT_REG32 ++ ++ /* Each bit in EDI represents a null CHAR or a mismatch. */ ++ orl %r9d, %edi + # else +- kshiftlq $32, %k3, %k2 +-# endif ++ salq $32, %rdi + +- /* Each bit in K1 represents a NULL or a mismatch. */ +- korq %k1, %k2, %k1 +- kmovq %k1, %rdi ++ /* Each bit in RDI represents a null CHAR or a mismatch. */ ++ orq %r9, %rdi ++# endif + + /* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes. */ + shrxq %SHIFT_REG64, %rdi, %rdi +@@ -692,35 +724,45 @@ L(loop_cross_page_2_vec): + /* The first VEC_SIZE * 2 bytes match or are ignored. */ + VMOVU (VEC_SIZE * 2)(%rax, %r10), %YMM0 + VMOVU (VEC_SIZE * 3)(%rax, %r10), %YMM1 +- VMOVU (VEC_SIZE * 2)(%rdx, %r10), %YMM2 +- VMOVU (VEC_SIZE * 3)(%rdx, %r10), %YMM3 +- +- VPCMP $4, %YMM0, %YMM2, %k0 +- VPCMP $0, %YMMZERO, %YMM0, %k1 +- VPCMP $0, %YMMZERO, %YMM2, %k2 +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch in YMM0 and +- YMM2. */ +- kord %k0, %k1, %k1 +- +- VPCMP $4, %YMM1, %YMM3, %k3 +- VPCMP $0, %YMMZERO, %YMM1, %k4 +- VPCMP $0, %YMMZERO, %YMM3, %k5 +- kord %k4, %k5, %k4 +- /* Each bit in K3 represents a NULL or a mismatch in YMM1 and +- YMM3. */ +- kord %k3, %k4, %k3 + ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM0 and 32 bytes at (VEC_SIZE * 2)(%rdx, %r10). */ ++ VPCMP $0, (VEC_SIZE * 2)(%rdx, %r10), %YMM0, %k1{%k2} ++ kmovd %k1, %r9d ++ /* Don't use subl since it is the lower 16/32 bits of RDI ++ below. */ ++ notl %r9d + # ifdef USE_AS_WCSCMP +- /* NB: Each bit in K1/K3 represents 4-byte element. */ +- kshiftlw $8, %k3, %k2 ++ /* Only last 8 bits are valid. */ ++ andl $0xff, %r9d ++# endif ++ ++ VPTESTM %YMM1, %YMM1, %k4 ++ /* Each bit cleared in K3 represents a mismatch or a null CHAR ++ in YMM1 and 32 bytes at (VEC_SIZE * 3)(%rdx, %r10). */ ++ VPCMP $0, (VEC_SIZE * 3)(%rdx, %r10), %YMM1, %k3{%k4} ++ kmovd %k3, %edi ++# ifdef USE_AS_WCSCMP ++ /* Don't use subl since it is the upper 8 bits of EDI below. */ ++ notl %edi ++ andl $0xff, %edi + # else +- kshiftlq $32, %k3, %k2 ++ incl %edi + # endif + +- /* Each bit in K1 represents a NULL or a mismatch. */ +- korq %k1, %k2, %k1 +- kmovq %k1, %rdi ++# ifdef USE_AS_WCSCMP ++ /* NB: Each bit in EDI/R9D represents 4-byte element. */ ++ sall $8, %edi ++ ++ /* Each bit in EDI represents a null CHAR or a mismatch. */ ++ orl %r9d, %edi ++# else ++ salq $32, %rdi ++ ++ /* Each bit in RDI represents a null CHAR or a mismatch. */ ++ orq %r9, %rdi ++# endif + + xorl %r8d, %r8d + /* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */ +@@ -729,12 +771,15 @@ L(loop_cross_page_2_vec): + /* R8 has number of bytes skipped. */ + movl %ecx, %r8d + # ifdef USE_AS_WCSCMP +- /* NB: Divide shift count by 4 since each bit in K1 represent 4 ++ /* NB: Divide shift count by 4 since each bit in RDI represent 4 + bytes. */ + sarl $2, %ecx +-# endif ++ /* Skip ECX bytes. */ ++ shrl %cl, %edi ++# else + /* Skip ECX bytes. */ + shrq %cl, %rdi ++# endif + 1: + /* Before jumping back to the loop, set ESI to the number of + VEC_SIZE * 4 blocks before page crossing. */ +@@ -818,7 +863,7 @@ L(cross_page_loop): + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %ecx + # endif +- /* Check null char. */ ++ /* Check null CHAR. */ + testl %eax, %eax + jne L(cross_page_loop) + /* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED +@@ -901,18 +946,17 @@ L(cross_page): + jg L(cross_page_1_vector) + L(loop_1_vector): + VMOVU (%rdi, %rdx), %YMM0 +- VMOVU (%rsi, %rdx), %YMM1 +- +- /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ +- VPCMP $4, %YMM0, %YMM1, %k0 +- VPCMP $0, %YMMZERO, %YMM0, %k1 +- VPCMP $0, %YMMZERO, %YMM1, %k2 +- /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- kord %k0, %k1, %k1 ++ ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in YMM0 and 32 bytes at (%rsi, %rdx). */ ++ VPCMP $0, (%rsi, %rdx), %YMM0, %k1{%k2} + kmovd %k1, %ecx +- testl %ecx, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xff, %ecx ++# else ++ incl %ecx ++# endif + jne L(last_vector) + + addl $VEC_SIZE, %edx +@@ -931,18 +975,17 @@ L(cross_page_1_vector): + cmpl $(PAGE_SIZE - 16), %eax + jg L(cross_page_1_xmm) + VMOVU (%rdi, %rdx), %XMM0 +- VMOVU (%rsi, %rdx), %XMM1 +- +- /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ +- VPCMP $4, %XMM0, %XMM1, %k0 +- VPCMP $0, %XMMZERO, %XMM0, %k1 +- VPCMP $0, %XMMZERO, %XMM1, %k2 +- /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ +- korw %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- korw %k0, %k1, %k1 +- kmovw %k1, %ecx +- testl %ecx, %ecx ++ ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in XMM0 and 16 bytes at (%rsi, %rdx). */ ++ VPCMP $0, (%rsi, %rdx), %XMM0, %k1{%k2} ++ kmovd %k1, %ecx ++# ifdef USE_AS_WCSCMP ++ subl $0xf, %ecx ++# else ++ subl $0xffff, %ecx ++# endif + jne L(last_vector) + + addl $16, %edx +@@ -965,25 +1008,16 @@ L(cross_page_1_xmm): + vmovq (%rdi, %rdx), %XMM0 + vmovq (%rsi, %rdx), %XMM1 + +- /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ +- VPCMP $4, %XMM0, %XMM1, %k0 +- VPCMP $0, %XMMZERO, %XMM0, %k1 +- VPCMP $0, %XMMZERO, %XMM1, %k2 +- /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- kord %k0, %k1, %k1 +- kmovd %k1, %ecx +- ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in XMM0 and XMM1. */ ++ VPCMP $0, %XMM1, %XMM0, %k1{%k2} ++ kmovb %k1, %ecx + # ifdef USE_AS_WCSCMP +- /* Only last 2 bits are valid. */ +- andl $0x3, %ecx ++ subl $0x3, %ecx + # else +- /* Only last 8 bits are valid. */ +- andl $0xff, %ecx ++ subl $0xff, %ecx + # endif +- +- testl %ecx, %ecx + jne L(last_vector) + + addl $8, %edx +@@ -1002,25 +1036,16 @@ L(cross_page_8bytes): + vmovd (%rdi, %rdx), %XMM0 + vmovd (%rsi, %rdx), %XMM1 + +- /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ +- VPCMP $4, %XMM0, %XMM1, %k0 +- VPCMP $0, %XMMZERO, %XMM0, %k1 +- VPCMP $0, %XMMZERO, %XMM1, %k2 +- /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ +- kord %k1, %k2, %k1 +- /* Each bit in K1 represents a NULL or a mismatch. */ +- kord %k0, %k1, %k1 ++ VPTESTM %YMM0, %YMM0, %k2 ++ /* Each bit cleared in K1 represents a mismatch or a null CHAR ++ in XMM0 and XMM1. */ ++ VPCMP $0, %XMM1, %XMM0, %k1{%k2} + kmovd %k1, %ecx +- + # ifdef USE_AS_WCSCMP +- /* Only the last bit is valid. */ +- andl $0x1, %ecx ++ subl $0x1, %ecx + # else +- /* Only last 4 bits are valid. */ +- andl $0xf, %ecx ++ subl $0xf, %ecx + # endif +- +- testl %ecx, %ecx + jne L(last_vector) + + addl $4, %edx diff --git a/glibc-upstream-2.34-179.patch b/glibc-upstream-2.34-179.patch new file mode 100644 index 0000000..e9a4329 --- /dev/null +++ b/glibc-upstream-2.34-179.patch @@ -0,0 +1,85 @@ +commit a182bb7a3922404f79def09d79ef89678b4049f0 +Author: H.J. Lu +Date: Fri Oct 29 12:56:53 2021 -0700 + + x86-64: Remove Prefer_AVX2_STRCMP + + Remove Prefer_AVX2_STRCMP to enable EVEX strcmp. When comparing 2 32-byte + strings, EVEX strcmp has been improved to require 1 load, 1 VPTESTM, 1 + VPCMP, 1 KMOVD and 1 INCL instead of 2 loads, 3 VPCMPs, 2 KORDs, 1 KMOVD + and 1 TESTL while AVX2 strcmp requires 1 load, 2 VPCMPEQs, 1 VPMINU, 1 + VPMOVMSKB and 1 TESTL. EVEX strcmp is now faster than AVX2 strcmp by up + to 40% on Tiger Lake and Ice Lake. + + (cherry picked from commit 14dbbf46a007ae5df36646b51ad0c9e5f5259f30) + +diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c +index de4e3c3b7258120d..f4d4049e391cbabd 100644 +--- a/sysdeps/x86/cpu-features.c ++++ b/sysdeps/x86/cpu-features.c +@@ -574,14 +574,6 @@ disable_tsx: + if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) + cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER] + |= bit_arch_Prefer_No_VZEROUPPER; +- +- /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp +- requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp +- requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB, +- AVX2 strcmp is faster than EVEX strcmp. */ +- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)) +- cpu_features->preferred[index_arch_Prefer_AVX2_STRCMP] +- |= bit_arch_Prefer_AVX2_STRCMP; + } + + /* Avoid avoid short distance REP MOVSB on processor with FSRM. */ +diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c +index 58f2fad4323d5d91..957db3ad229ba39f 100644 +--- a/sysdeps/x86/cpu-tunables.c ++++ b/sysdeps/x86/cpu-tunables.c +@@ -239,8 +239,6 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) + CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features, + Fast_Copy_Backward, + disable, 18); +- CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH +- (n, cpu_features, Prefer_AVX2_STRCMP, AVX2, disable, 18); + } + break; + case 19: +diff --git a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def +index 3bdc76cf71007948..8250bfcbecd29a9f 100644 +--- a/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def ++++ b/sysdeps/x86/include/cpu-features-preferred_feature_index_1.def +@@ -31,5 +31,4 @@ BIT (Prefer_ERMS) + BIT (Prefer_No_AVX512) + BIT (MathVec_Prefer_No_AVX512) + BIT (Prefer_FSRM) +-BIT (Prefer_AVX2_STRCMP) + BIT (Avoid_Short_Distance_REP_MOVSB) +diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c +index 62b7abeeee646ab4..7c2901bf44456259 100644 +--- a/sysdeps/x86_64/multiarch/strcmp.c ++++ b/sysdeps/x86_64/multiarch/strcmp.c +@@ -43,8 +43,7 @@ IFUNC_SELECTOR (void) + { + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) +- && CPU_FEATURE_USABLE_P (cpu_features, BMI2) +- && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP)) ++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) + return OPTIMIZE (evex); + + if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) +diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c +index 60ba0fe356b31779..f94a421784bfe923 100644 +--- a/sysdeps/x86_64/multiarch/strncmp.c ++++ b/sysdeps/x86_64/multiarch/strncmp.c +@@ -43,8 +43,7 @@ IFUNC_SELECTOR (void) + { + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL) + && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW) +- && CPU_FEATURE_USABLE_P (cpu_features, BMI2) +- && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP)) ++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)) + return OPTIMIZE (evex); + + if (CPU_FEATURE_USABLE_P (cpu_features, RTM)) diff --git a/glibc-upstream-2.34-180.patch b/glibc-upstream-2.34-180.patch new file mode 100644 index 0000000..9707cf2 --- /dev/null +++ b/glibc-upstream-2.34-180.patch @@ -0,0 +1,48 @@ +commit 2e64237a8744dd50f9222293275fa52e7248ff76 +Author: Fangrui Song +Date: Tue Nov 2 20:59:52 2021 -0700 + + x86-64: Replace movzx with movzbl + + Clang cannot assemble movzx in the AT&T dialect mode. + + ../sysdeps/x86_64/strcmp.S:2232:16: error: invalid operand for instruction + movzx (%rsi), %ecx + ^~~~ + + Change movzx to movzbl, which follows the AT&T dialect and is used + elsewhere in the file. + + Reviewed-by: H.J. Lu + (cherry picked from commit 6720d36b6623c5e48c070d86acf61198b33e144e) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S +index bc19547b09639071..6197a723b9e0606e 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S ++++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S +@@ -1771,8 +1771,8 @@ LABEL(strcmp_exitz): + .p2align 4 + // XXX Same as code above + LABEL(Byte0): +- movzx (%rsi), %ecx +- movzx (%rdi), %eax ++ movzbl (%rsi), %ecx ++ movzbl (%rdi), %eax + + #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx +diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S +index 824e648230a15739..7f8a1bc756f86aee 100644 +--- a/sysdeps/x86_64/strcmp.S ++++ b/sysdeps/x86_64/strcmp.S +@@ -2232,8 +2232,8 @@ LABEL(strcmp_exitz): + + .p2align 4 + LABEL(Byte0): +- movzx (%rsi), %ecx +- movzx (%rdi), %eax ++ movzbl (%rsi), %ecx ++ movzbl (%rdi), %eax + + #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx diff --git a/glibc-upstream-2.34-181.patch b/glibc-upstream-2.34-181.patch new file mode 100644 index 0000000..36a401f --- /dev/null +++ b/glibc-upstream-2.34-181.patch @@ -0,0 +1,843 @@ +commit a7392db2ff2b9dd906500941ac6361dbe2211b0d +Author: Noah Goldstein +Date: Mon Nov 1 00:49:51 2021 -0500 + + x86: Optimize memmove-vec-unaligned-erms.S + + No bug. + + The optimizations are as follows: + + 1) Always align entry to 64 bytes. This makes behavior more + predictable and makes other frontend optimizations easier. + + 2) Make the L(more_8x_vec) cases 4k aliasing aware. This can have + significant benefits in the case that: + 0 < (dst - src) < [256, 512] + + 3) Align before `rep movsb`. For ERMS this is roughly a [0, 30%] + improvement and for FSRM [-10%, 25%]. + + In addition to these primary changes there is general cleanup + throughout to optimize the aligning routines and control flow logic. + + Signed-off-by: Noah Goldstein + Reviewed-by: H.J. Lu + (cherry picked from commit a6b7502ec0c2da89a7437f43171f160d713e39c6) + +diff --git a/sysdeps/x86_64/memmove.S b/sysdeps/x86_64/memmove.S +index db106a7a1f23f268..b2b318084823dceb 100644 +--- a/sysdeps/x86_64/memmove.S ++++ b/sysdeps/x86_64/memmove.S +@@ -25,7 +25,7 @@ + /* Use movups and movaps for smaller code sizes. */ + #define VMOVU movups + #define VMOVA movaps +- ++#define MOV_SIZE 3 + #define SECTION(p) p + + #ifdef USE_MULTIARCH +diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S +index 1ec1962e861dbf63..67a55f0c85af841c 100644 +--- a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S ++++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S +@@ -4,7 +4,7 @@ + # define VMOVNT vmovntdq + # define VMOVU vmovdqu + # define VMOVA vmovdqa +- ++# define MOV_SIZE 4 + # define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S +index e195e93f153c9512..975ae6c0515b83cb 100644 +--- a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S +@@ -4,7 +4,7 @@ + # define VMOVNT vmovntdq + # define VMOVU vmovdqu + # define VMOVA vmovdqa +- ++# define MOV_SIZE 4 + # define SECTION(p) p##.avx + # define MEMMOVE_SYMBOL(p,s) p##_avx_##s + +diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S +index 848848ab39ff9326..0fa7126830af7acb 100644 +--- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S +@@ -25,7 +25,7 @@ + # define VMOVU vmovdqu64 + # define VMOVA vmovdqa64 + # define VZEROUPPER +- ++# define MOV_SIZE 6 + # define SECTION(p) p##.evex512 + # define MEMMOVE_SYMBOL(p,s) p##_avx512_##s + +diff --git a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S +index 0cbce8f944da51a0..88715441feaaccf5 100644 +--- a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S +@@ -25,7 +25,7 @@ + # define VMOVU vmovdqu64 + # define VMOVA vmovdqa64 + # define VZEROUPPER +- ++# define MOV_SIZE 6 + # define SECTION(p) p##.evex + # define MEMMOVE_SYMBOL(p,s) p##_evex_##s + +diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +index abde8438d41f2320..7b27cbdda5fb99f7 100644 +--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +@@ -76,6 +76,25 @@ + # endif + #endif + ++/* Whether to align before movsb. Ultimately we want 64 byte ++ align and not worth it to load 4x VEC for VEC_SIZE == 16. */ ++#define ALIGN_MOVSB (VEC_SIZE > 16) ++/* Number of bytes to align movsb to. */ ++#define MOVSB_ALIGN_TO 64 ++ ++#define SMALL_MOV_SIZE (MOV_SIZE <= 4) ++#define LARGE_MOV_SIZE (MOV_SIZE > 4) ++ ++#if SMALL_MOV_SIZE + LARGE_MOV_SIZE != 1 ++# error MOV_SIZE Unknown ++#endif ++ ++#if LARGE_MOV_SIZE ++# define SMALL_SIZE_OFFSET (4) ++#else ++# define SMALL_SIZE_OFFSET (0) ++#endif ++ + #ifndef PAGE_SIZE + # define PAGE_SIZE 4096 + #endif +@@ -199,25 +218,21 @@ L(start): + # endif + cmp $VEC_SIZE, %RDX_LP + jb L(less_vec) ++ /* Load regardless. */ ++ VMOVU (%rsi), %VEC(0) + cmp $(VEC_SIZE * 2), %RDX_LP + ja L(more_2x_vec) +-#if !defined USE_MULTIARCH || !IS_IN (libc) +-L(last_2x_vec): +-#endif + /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ +- VMOVU (%rsi), %VEC(0) + VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1) + VMOVU %VEC(0), (%rdi) + VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) +-#if !defined USE_MULTIARCH || !IS_IN (libc) +-L(nop): +- ret ++#if !(defined USE_MULTIARCH && IS_IN (libc)) ++ ZERO_UPPER_VEC_REGISTERS_RETURN + #else + VZEROUPPER_RETURN + #endif + #if defined USE_MULTIARCH && IS_IN (libc) + END (MEMMOVE_SYMBOL (__memmove, unaligned)) +- + # if VEC_SIZE == 16 + ENTRY (__mempcpy_chk_erms) + cmp %RDX_LP, %RCX_LP +@@ -289,7 +304,7 @@ ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms)) + END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms)) + # endif + +-ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) ++ENTRY_P2ALIGN (MEMMOVE_SYMBOL (__memmove, unaligned_erms), 6) + movq %rdi, %rax + L(start_erms): + # ifdef __ILP32__ +@@ -298,310 +313,448 @@ L(start_erms): + # endif + cmp $VEC_SIZE, %RDX_LP + jb L(less_vec) ++ /* Load regardless. */ ++ VMOVU (%rsi), %VEC(0) + cmp $(VEC_SIZE * 2), %RDX_LP + ja L(movsb_more_2x_vec) +-L(last_2x_vec): +- /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ +- VMOVU (%rsi), %VEC(0) +- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1) ++ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. ++ */ ++ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(1) + VMOVU %VEC(0), (%rdi) +- VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) ++ VMOVU %VEC(1), -VEC_SIZE(%rdi, %rdx) + L(return): +-#if VEC_SIZE > 16 ++# if VEC_SIZE > 16 + ZERO_UPPER_VEC_REGISTERS_RETURN +-#else ++# else + ret ++# endif + #endif + +-L(movsb): +- cmp __x86_rep_movsb_stop_threshold(%rip), %RDX_LP +- jae L(more_8x_vec) +- cmpq %rsi, %rdi +- jb 1f +- /* Source == destination is less common. */ +- je L(nop) +- leaq (%rsi,%rdx), %r9 +- cmpq %r9, %rdi +- /* Avoid slow backward REP MOVSB. */ +- jb L(more_8x_vec_backward) +-# if AVOID_SHORT_DISTANCE_REP_MOVSB +- testl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip) +- jz 3f +- movq %rdi, %rcx +- subq %rsi, %rcx +- jmp 2f +-# endif +-1: +-# if AVOID_SHORT_DISTANCE_REP_MOVSB +- testl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip) +- jz 3f +- movq %rsi, %rcx +- subq %rdi, %rcx +-2: +-/* Avoid "rep movsb" if RCX, the distance between source and destination, +- is N*4GB + [1..63] with N >= 0. */ +- cmpl $63, %ecx +- jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */ +-3: +-# endif +- mov %RDX_LP, %RCX_LP +- rep movsb +-L(nop): ++#if LARGE_MOV_SIZE ++ /* If LARGE_MOV_SIZE this fits in the aligning bytes between the ++ ENTRY block and L(less_vec). */ ++ .p2align 4,, 8 ++L(between_4_7): ++ /* From 4 to 7. No branch when size == 4. */ ++ movl (%rsi), %ecx ++ movl (%rsi, %rdx), %esi ++ movl %ecx, (%rdi) ++ movl %esi, (%rdi, %rdx) + ret + #endif + ++ .p2align 4 + L(less_vec): + /* Less than 1 VEC. */ + #if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 + # error Unsupported VEC_SIZE! + #endif + #if VEC_SIZE > 32 +- cmpb $32, %dl ++ cmpl $32, %edx + jae L(between_32_63) + #endif + #if VEC_SIZE > 16 +- cmpb $16, %dl ++ cmpl $16, %edx + jae L(between_16_31) + #endif +- cmpb $8, %dl ++ cmpl $8, %edx + jae L(between_8_15) +- cmpb $4, %dl ++#if SMALL_MOV_SIZE ++ cmpl $4, %edx ++#else ++ subq $4, %rdx ++#endif + jae L(between_4_7) +- cmpb $1, %dl +- ja L(between_2_3) +- jb 1f +- movzbl (%rsi), %ecx ++ cmpl $(1 - SMALL_SIZE_OFFSET), %edx ++ jl L(copy_0) ++ movb (%rsi), %cl ++ je L(copy_1) ++ movzwl (-2 + SMALL_SIZE_OFFSET)(%rsi, %rdx), %esi ++ movw %si, (-2 + SMALL_SIZE_OFFSET)(%rdi, %rdx) ++L(copy_1): + movb %cl, (%rdi) +-1: ++L(copy_0): + ret ++ ++#if SMALL_MOV_SIZE ++ .p2align 4,, 8 ++L(between_4_7): ++ /* From 4 to 7. No branch when size == 4. */ ++ movl -4(%rsi, %rdx), %ecx ++ movl (%rsi), %esi ++ movl %ecx, -4(%rdi, %rdx) ++ movl %esi, (%rdi) ++ ret ++#endif ++ ++#if VEC_SIZE > 16 ++ /* From 16 to 31. No branch when size == 16. */ ++ .p2align 4,, 8 ++L(between_16_31): ++ vmovdqu (%rsi), %xmm0 ++ vmovdqu -16(%rsi, %rdx), %xmm1 ++ vmovdqu %xmm0, (%rdi) ++ vmovdqu %xmm1, -16(%rdi, %rdx) ++ /* No ymm registers have been touched. */ ++ ret ++#endif ++ + #if VEC_SIZE > 32 ++ .p2align 4,, 10 + L(between_32_63): + /* From 32 to 63. No branch when size == 32. */ + VMOVU (%rsi), %YMM0 +- VMOVU -32(%rsi,%rdx), %YMM1 ++ VMOVU -32(%rsi, %rdx), %YMM1 + VMOVU %YMM0, (%rdi) +- VMOVU %YMM1, -32(%rdi,%rdx) +- VZEROUPPER_RETURN +-#endif +-#if VEC_SIZE > 16 +- /* From 16 to 31. No branch when size == 16. */ +-L(between_16_31): +- VMOVU (%rsi), %XMM0 +- VMOVU -16(%rsi,%rdx), %XMM1 +- VMOVU %XMM0, (%rdi) +- VMOVU %XMM1, -16(%rdi,%rdx) ++ VMOVU %YMM1, -32(%rdi, %rdx) + VZEROUPPER_RETURN + #endif ++ ++ .p2align 4,, 10 + L(between_8_15): + /* From 8 to 15. No branch when size == 8. */ +- movq -8(%rsi,%rdx), %rcx ++ movq -8(%rsi, %rdx), %rcx + movq (%rsi), %rsi +- movq %rcx, -8(%rdi,%rdx) + movq %rsi, (%rdi) ++ movq %rcx, -8(%rdi, %rdx) + ret +-L(between_4_7): +- /* From 4 to 7. No branch when size == 4. */ +- movl -4(%rsi,%rdx), %ecx +- movl (%rsi), %esi +- movl %ecx, -4(%rdi,%rdx) +- movl %esi, (%rdi) +- ret +-L(between_2_3): +- /* From 2 to 3. No branch when size == 2. */ +- movzwl -2(%rsi,%rdx), %ecx +- movzwl (%rsi), %esi +- movw %cx, -2(%rdi,%rdx) +- movw %si, (%rdi) +- ret + ++ .p2align 4,, 10 ++L(last_4x_vec): ++ /* Copy from 2 * VEC + 1 to 4 * VEC, inclusively. */ ++ ++ /* VEC(0) and VEC(1) have already been loaded. */ ++ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(2) ++ VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(3) ++ VMOVU %VEC(0), (%rdi) ++ VMOVU %VEC(1), VEC_SIZE(%rdi) ++ VMOVU %VEC(2), -VEC_SIZE(%rdi, %rdx) ++ VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi, %rdx) ++ VZEROUPPER_RETURN ++ ++ .p2align 4 + #if defined USE_MULTIARCH && IS_IN (libc) + L(movsb_more_2x_vec): + cmp __x86_rep_movsb_threshold(%rip), %RDX_LP + ja L(movsb) + #endif + L(more_2x_vec): +- /* More than 2 * VEC and there may be overlap between destination +- and source. */ ++ /* More than 2 * VEC and there may be overlap between ++ destination and source. */ + cmpq $(VEC_SIZE * 8), %rdx + ja L(more_8x_vec) ++ /* Load VEC(1) regardless. VEC(0) has already been loaded. */ ++ VMOVU VEC_SIZE(%rsi), %VEC(1) + cmpq $(VEC_SIZE * 4), %rdx + jbe L(last_4x_vec) +- /* Copy from 4 * VEC + 1 to 8 * VEC, inclusively. */ +- VMOVU (%rsi), %VEC(0) +- VMOVU VEC_SIZE(%rsi), %VEC(1) ++ /* Copy from 4 * VEC + 1 to 8 * VEC, inclusively. */ + VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2) + VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3) +- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(4) +- VMOVU -(VEC_SIZE * 2)(%rsi,%rdx), %VEC(5) +- VMOVU -(VEC_SIZE * 3)(%rsi,%rdx), %VEC(6) +- VMOVU -(VEC_SIZE * 4)(%rsi,%rdx), %VEC(7) ++ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(4) ++ VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(5) ++ VMOVU -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(6) ++ VMOVU -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(7) + VMOVU %VEC(0), (%rdi) + VMOVU %VEC(1), VEC_SIZE(%rdi) + VMOVU %VEC(2), (VEC_SIZE * 2)(%rdi) + VMOVU %VEC(3), (VEC_SIZE * 3)(%rdi) +- VMOVU %VEC(4), -VEC_SIZE(%rdi,%rdx) +- VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx) +- VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx) +- VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx) +- VZEROUPPER_RETURN +-L(last_4x_vec): +- /* Copy from 2 * VEC + 1 to 4 * VEC, inclusively. */ +- VMOVU (%rsi), %VEC(0) +- VMOVU VEC_SIZE(%rsi), %VEC(1) +- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(2) +- VMOVU -(VEC_SIZE * 2)(%rsi,%rdx), %VEC(3) +- VMOVU %VEC(0), (%rdi) +- VMOVU %VEC(1), VEC_SIZE(%rdi) +- VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx) +- VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx) ++ VMOVU %VEC(4), -VEC_SIZE(%rdi, %rdx) ++ VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi, %rdx) ++ VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi, %rdx) ++ VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi, %rdx) + VZEROUPPER_RETURN + ++ .p2align 4,, 4 + L(more_8x_vec): ++ movq %rdi, %rcx ++ subq %rsi, %rcx ++ /* Go to backwards temporal copy if overlap no matter what as ++ backward REP MOVSB is slow and we don't want to use NT stores if ++ there is overlap. */ ++ cmpq %rdx, %rcx ++ /* L(more_8x_vec_backward_check_nop) checks for src == dst. */ ++ jb L(more_8x_vec_backward_check_nop) + /* Check if non-temporal move candidate. */ + #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) + /* Check non-temporal store threshold. */ +- cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP ++ cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP + ja L(large_memcpy_2x) + #endif +- /* Entry if rdx is greater than non-temporal threshold but there +- is overlap. */ ++ /* To reach this point there cannot be overlap and dst > src. So ++ check for overlap and src > dst in which case correctness ++ requires forward copy. Otherwise decide between backward/forward ++ copy depending on address aliasing. */ ++ ++ /* Entry if rdx is greater than __x86_rep_movsb_stop_threshold ++ but less than __x86_shared_non_temporal_threshold. */ + L(more_8x_vec_check): +- cmpq %rsi, %rdi +- ja L(more_8x_vec_backward) +- /* Source == destination is less common. */ +- je L(nop) +- /* Load the first VEC and last 4 * VEC to support overlapping +- addresses. */ +- VMOVU (%rsi), %VEC(4) ++ /* rcx contains dst - src. Add back length (rdx). */ ++ leaq (%rcx, %rdx), %r8 ++ /* If r8 has different sign than rcx then there is overlap so we ++ must do forward copy. */ ++ xorq %rcx, %r8 ++ /* Isolate just sign bit of r8. */ ++ shrq $63, %r8 ++ /* Get 4k difference dst - src. */ ++ andl $(PAGE_SIZE - 256), %ecx ++ /* If r8 is non-zero must do foward for correctness. Otherwise ++ if ecx is non-zero there is 4k False Alaising so do backward ++ copy. */ ++ addl %r8d, %ecx ++ jz L(more_8x_vec_backward) ++ ++ /* if rdx is greater than __x86_shared_non_temporal_threshold ++ but there is overlap, or from short distance movsb. */ ++L(more_8x_vec_forward): ++ /* Load first and last 4 * VEC to support overlapping addresses. ++ */ ++ ++ /* First vec was already loaded into VEC(0). */ + VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(5) + VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(6) ++ /* Save begining of dst. */ ++ movq %rdi, %rcx ++ /* Align dst to VEC_SIZE - 1. */ ++ orq $(VEC_SIZE - 1), %rdi + VMOVU -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(7) + VMOVU -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(8) +- /* Save start and stop of the destination buffer. */ +- movq %rdi, %r11 +- leaq -VEC_SIZE(%rdi, %rdx), %rcx +- /* Align destination for aligned stores in the loop. Compute +- how much destination is misaligned. */ +- movq %rdi, %r8 +- andq $(VEC_SIZE - 1), %r8 +- /* Get the negative of offset for alignment. */ +- subq $VEC_SIZE, %r8 +- /* Adjust source. */ +- subq %r8, %rsi +- /* Adjust destination which should be aligned now. */ +- subq %r8, %rdi +- /* Adjust length. */ +- addq %r8, %rdx + +- .p2align 4 ++ /* Subtract dst from src. Add back after dst aligned. */ ++ subq %rcx, %rsi ++ /* Finish aligning dst. */ ++ incq %rdi ++ /* Restore src adjusted with new value for aligned dst. */ ++ addq %rdi, %rsi ++ /* Store end of buffer minus tail in rdx. */ ++ leaq (VEC_SIZE * -4)(%rcx, %rdx), %rdx ++ ++ /* Dont use multi-byte nop to align. */ ++ .p2align 4,, 11 + L(loop_4x_vec_forward): + /* Copy 4 * VEC a time forward. */ +- VMOVU (%rsi), %VEC(0) +- VMOVU VEC_SIZE(%rsi), %VEC(1) +- VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2) +- VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3) ++ VMOVU (%rsi), %VEC(1) ++ VMOVU VEC_SIZE(%rsi), %VEC(2) ++ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(3) ++ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(4) + subq $-(VEC_SIZE * 4), %rsi +- addq $-(VEC_SIZE * 4), %rdx +- VMOVA %VEC(0), (%rdi) +- VMOVA %VEC(1), VEC_SIZE(%rdi) +- VMOVA %VEC(2), (VEC_SIZE * 2)(%rdi) +- VMOVA %VEC(3), (VEC_SIZE * 3)(%rdi) ++ VMOVA %VEC(1), (%rdi) ++ VMOVA %VEC(2), VEC_SIZE(%rdi) ++ VMOVA %VEC(3), (VEC_SIZE * 2)(%rdi) ++ VMOVA %VEC(4), (VEC_SIZE * 3)(%rdi) + subq $-(VEC_SIZE * 4), %rdi +- cmpq $(VEC_SIZE * 4), %rdx ++ cmpq %rdi, %rdx + ja L(loop_4x_vec_forward) + /* Store the last 4 * VEC. */ +- VMOVU %VEC(5), (%rcx) +- VMOVU %VEC(6), -VEC_SIZE(%rcx) +- VMOVU %VEC(7), -(VEC_SIZE * 2)(%rcx) +- VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx) ++ VMOVU %VEC(5), (VEC_SIZE * 3)(%rdx) ++ VMOVU %VEC(6), (VEC_SIZE * 2)(%rdx) ++ VMOVU %VEC(7), VEC_SIZE(%rdx) ++ VMOVU %VEC(8), (%rdx) + /* Store the first VEC. */ +- VMOVU %VEC(4), (%r11) ++ VMOVU %VEC(0), (%rcx) ++ /* Keep L(nop_backward) target close to jmp for 2-byte encoding. ++ */ ++L(nop_backward): + VZEROUPPER_RETURN + ++ .p2align 4,, 8 ++L(more_8x_vec_backward_check_nop): ++ /* rcx contains dst - src. Test for dst == src to skip all of ++ memmove. */ ++ testq %rcx, %rcx ++ jz L(nop_backward) + L(more_8x_vec_backward): + /* Load the first 4 * VEC and last VEC to support overlapping + addresses. */ +- VMOVU (%rsi), %VEC(4) ++ ++ /* First vec was also loaded into VEC(0). */ + VMOVU VEC_SIZE(%rsi), %VEC(5) + VMOVU (VEC_SIZE * 2)(%rsi), %VEC(6) ++ /* Begining of region for 4x backward copy stored in rcx. */ ++ leaq (VEC_SIZE * -4 + -1)(%rdi, %rdx), %rcx + VMOVU (VEC_SIZE * 3)(%rsi), %VEC(7) +- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(8) +- /* Save stop of the destination buffer. */ +- leaq -VEC_SIZE(%rdi, %rdx), %r11 +- /* Align destination end for aligned stores in the loop. Compute +- how much destination end is misaligned. */ +- leaq -VEC_SIZE(%rsi, %rdx), %rcx +- movq %r11, %r9 +- movq %r11, %r8 +- andq $(VEC_SIZE - 1), %r8 +- /* Adjust source. */ +- subq %r8, %rcx +- /* Adjust the end of destination which should be aligned now. */ +- subq %r8, %r9 +- /* Adjust length. */ +- subq %r8, %rdx +- +- .p2align 4 ++ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(8) ++ /* Subtract dst from src. Add back after dst aligned. */ ++ subq %rdi, %rsi ++ /* Align dst. */ ++ andq $-(VEC_SIZE), %rcx ++ /* Restore src. */ ++ addq %rcx, %rsi ++ ++ /* Don't use multi-byte nop to align. */ ++ .p2align 4,, 11 + L(loop_4x_vec_backward): + /* Copy 4 * VEC a time backward. */ +- VMOVU (%rcx), %VEC(0) +- VMOVU -VEC_SIZE(%rcx), %VEC(1) +- VMOVU -(VEC_SIZE * 2)(%rcx), %VEC(2) +- VMOVU -(VEC_SIZE * 3)(%rcx), %VEC(3) +- addq $-(VEC_SIZE * 4), %rcx +- addq $-(VEC_SIZE * 4), %rdx +- VMOVA %VEC(0), (%r9) +- VMOVA %VEC(1), -VEC_SIZE(%r9) +- VMOVA %VEC(2), -(VEC_SIZE * 2)(%r9) +- VMOVA %VEC(3), -(VEC_SIZE * 3)(%r9) +- addq $-(VEC_SIZE * 4), %r9 +- cmpq $(VEC_SIZE * 4), %rdx +- ja L(loop_4x_vec_backward) ++ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(1) ++ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2) ++ VMOVU (VEC_SIZE * 1)(%rsi), %VEC(3) ++ VMOVU (VEC_SIZE * 0)(%rsi), %VEC(4) ++ addq $(VEC_SIZE * -4), %rsi ++ VMOVA %VEC(1), (VEC_SIZE * 3)(%rcx) ++ VMOVA %VEC(2), (VEC_SIZE * 2)(%rcx) ++ VMOVA %VEC(3), (VEC_SIZE * 1)(%rcx) ++ VMOVA %VEC(4), (VEC_SIZE * 0)(%rcx) ++ addq $(VEC_SIZE * -4), %rcx ++ cmpq %rcx, %rdi ++ jb L(loop_4x_vec_backward) + /* Store the first 4 * VEC. */ +- VMOVU %VEC(4), (%rdi) ++ VMOVU %VEC(0), (%rdi) + VMOVU %VEC(5), VEC_SIZE(%rdi) + VMOVU %VEC(6), (VEC_SIZE * 2)(%rdi) + VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi) + /* Store the last VEC. */ +- VMOVU %VEC(8), (%r11) ++ VMOVU %VEC(8), -VEC_SIZE(%rdx, %rdi) ++ VZEROUPPER_RETURN ++ ++#if defined USE_MULTIARCH && IS_IN (libc) ++ /* L(skip_short_movsb_check) is only used with ERMS. Not for ++ FSRM. */ ++ .p2align 5,, 16 ++# if ALIGN_MOVSB ++L(skip_short_movsb_check): ++# if MOVSB_ALIGN_TO > VEC_SIZE ++ VMOVU VEC_SIZE(%rsi), %VEC(1) ++# endif ++# if MOVSB_ALIGN_TO > (VEC_SIZE * 2) ++# error Unsupported MOVSB_ALIGN_TO ++# endif ++ /* If CPU does not have FSRM two options for aligning. Align src ++ if dst and src 4k alias. Otherwise align dst. */ ++ testl $(PAGE_SIZE - 512), %ecx ++ jnz L(movsb_align_dst) ++ /* Fall through. dst and src 4k alias. It's better to align src ++ here because the bottleneck will be loads dues to the false ++ dependency on dst. */ ++ ++ /* rcx already has dst - src. */ ++ movq %rcx, %r9 ++ /* Add src to len. Subtract back after src aligned. -1 because ++ src is initially aligned to MOVSB_ALIGN_TO - 1. */ ++ leaq -1(%rsi, %rdx), %rcx ++ /* Inclusively align src to MOVSB_ALIGN_TO - 1. */ ++ orq $(MOVSB_ALIGN_TO - 1), %rsi ++ /* Restore dst and len adjusted with new values for aligned dst. ++ */ ++ leaq 1(%rsi, %r9), %rdi ++ subq %rsi, %rcx ++ /* Finish aligning src. */ ++ incq %rsi ++ ++ rep movsb ++ ++ VMOVU %VEC(0), (%r8) ++# if MOVSB_ALIGN_TO > VEC_SIZE ++ VMOVU %VEC(1), VEC_SIZE(%r8) ++# endif + VZEROUPPER_RETURN ++# endif ++ ++ .p2align 4,, 12 ++L(movsb): ++ movq %rdi, %rcx ++ subq %rsi, %rcx ++ /* Go to backwards temporal copy if overlap no matter what as ++ backward REP MOVSB is slow and we don't want to use NT stores if ++ there is overlap. */ ++ cmpq %rdx, %rcx ++ /* L(more_8x_vec_backward_check_nop) checks for src == dst. */ ++ jb L(more_8x_vec_backward_check_nop) ++# if ALIGN_MOVSB ++ /* Save dest for storing aligning VECs later. */ ++ movq %rdi, %r8 ++# endif ++ /* If above __x86_rep_movsb_stop_threshold most likely is ++ candidate for NT moves aswell. */ ++ cmp __x86_rep_movsb_stop_threshold(%rip), %RDX_LP ++ jae L(large_memcpy_2x_check) ++# if AVOID_SHORT_DISTANCE_REP_MOVSB || ALIGN_MOVSB ++ /* Only avoid short movsb if CPU has FSRM. */ ++ testl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip) ++ jz L(skip_short_movsb_check) ++# if AVOID_SHORT_DISTANCE_REP_MOVSB ++ /* Avoid "rep movsb" if RCX, the distance between source and ++ destination, is N*4GB + [1..63] with N >= 0. */ ++ ++ /* ecx contains dst - src. Early check for backward copy ++ conditions means only case of slow movsb with src = dst + [0, ++ 63] is ecx in [-63, 0]. Use unsigned comparison with -64 check ++ for that case. */ ++ cmpl $-64, %ecx ++ ja L(more_8x_vec_forward) ++# endif ++# endif ++# if ALIGN_MOVSB ++# if MOVSB_ALIGN_TO > VEC_SIZE ++ VMOVU VEC_SIZE(%rsi), %VEC(1) ++# endif ++# if MOVSB_ALIGN_TO > (VEC_SIZE * 2) ++# error Unsupported MOVSB_ALIGN_TO ++# endif ++ /* Fall through means cpu has FSRM. In that case exclusively ++ align destination. */ ++L(movsb_align_dst): ++ /* Subtract dst from src. Add back after dst aligned. */ ++ subq %rdi, %rsi ++ /* Exclusively align dst to MOVSB_ALIGN_TO (64). */ ++ addq $(MOVSB_ALIGN_TO - 1), %rdi ++ /* Add dst to len. Subtract back after dst aligned. */ ++ leaq (%r8, %rdx), %rcx ++ /* Finish aligning dst. */ ++ andq $-(MOVSB_ALIGN_TO), %rdi ++ /* Restore src and len adjusted with new values for aligned dst. ++ */ ++ addq %rdi, %rsi ++ subq %rdi, %rcx ++ ++ rep movsb ++ ++ /* Store VECs loaded for aligning. */ ++ VMOVU %VEC(0), (%r8) ++# if MOVSB_ALIGN_TO > VEC_SIZE ++ VMOVU %VEC(1), VEC_SIZE(%r8) ++# endif ++ VZEROUPPER_RETURN ++# else /* !ALIGN_MOVSB. */ ++L(skip_short_movsb_check): ++ mov %RDX_LP, %RCX_LP ++ rep movsb ++ ret ++# endif ++#endif + ++ .p2align 4,, 10 + #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) +- .p2align 4 ++L(large_memcpy_2x_check): ++ cmp __x86_rep_movsb_threshold(%rip), %RDX_LP ++ jb L(more_8x_vec_check) + L(large_memcpy_2x): +- /* Compute absolute value of difference between source and +- destination. */ +- movq %rdi, %r9 +- subq %rsi, %r9 +- movq %r9, %r8 +- leaq -1(%r9), %rcx +- sarq $63, %r8 +- xorq %r8, %r9 +- subq %r8, %r9 +- /* Don't use non-temporal store if there is overlap between +- destination and source since destination may be in cache when +- source is loaded. */ +- cmpq %r9, %rdx +- ja L(more_8x_vec_check) ++ /* To reach this point it is impossible for dst > src and ++ overlap. Remaining to check is src > dst and overlap. rcx ++ already contains dst - src. Negate rcx to get src - dst. If ++ length > rcx then there is overlap and forward copy is best. */ ++ negq %rcx ++ cmpq %rcx, %rdx ++ ja L(more_8x_vec_forward) + + /* Cache align destination. First store the first 64 bytes then + adjust alignments. */ +- VMOVU (%rsi), %VEC(8) +-#if VEC_SIZE < 64 +- VMOVU VEC_SIZE(%rsi), %VEC(9) +-#if VEC_SIZE < 32 +- VMOVU (VEC_SIZE * 2)(%rsi), %VEC(10) +- VMOVU (VEC_SIZE * 3)(%rsi), %VEC(11) +-#endif +-#endif +- VMOVU %VEC(8), (%rdi) +-#if VEC_SIZE < 64 +- VMOVU %VEC(9), VEC_SIZE(%rdi) +-#if VEC_SIZE < 32 +- VMOVU %VEC(10), (VEC_SIZE * 2)(%rdi) +- VMOVU %VEC(11), (VEC_SIZE * 3)(%rdi) +-#endif +-#endif ++ ++ /* First vec was also loaded into VEC(0). */ ++# if VEC_SIZE < 64 ++ VMOVU VEC_SIZE(%rsi), %VEC(1) ++# if VEC_SIZE < 32 ++ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2) ++ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3) ++# endif ++# endif ++ VMOVU %VEC(0), (%rdi) ++# if VEC_SIZE < 64 ++ VMOVU %VEC(1), VEC_SIZE(%rdi) ++# if VEC_SIZE < 32 ++ VMOVU %VEC(2), (VEC_SIZE * 2)(%rdi) ++ VMOVU %VEC(3), (VEC_SIZE * 3)(%rdi) ++# endif ++# endif ++ + /* Adjust source, destination, and size. */ + movq %rdi, %r8 + andq $63, %r8 +@@ -614,9 +767,13 @@ L(large_memcpy_2x): + /* Adjust length. */ + addq %r8, %rdx + +- /* Test if source and destination addresses will alias. If they do +- the larger pipeline in large_memcpy_4x alleviated the ++ /* Test if source and destination addresses will alias. If they ++ do the larger pipeline in large_memcpy_4x alleviated the + performance drop. */ ++ ++ /* ecx contains -(dst - src). not ecx will return dst - src - 1 ++ which works for testing aliasing. */ ++ notl %ecx + testl $(PAGE_SIZE - VEC_SIZE * 8), %ecx + jz L(large_memcpy_4x) + +@@ -704,8 +861,8 @@ L(loop_large_memcpy_4x_outer): + /* ecx stores inner loop counter. */ + movl $(PAGE_SIZE / LARGE_LOAD_SIZE), %ecx + L(loop_large_memcpy_4x_inner): +- /* Only one prefetch set per page as doing 4 pages give more time +- for prefetcher to keep up. */ ++ /* Only one prefetch set per page as doing 4 pages give more ++ time for prefetcher to keep up. */ + PREFETCH_ONE_SET(1, (%rsi), PREFETCHED_LOAD_SIZE) + PREFETCH_ONE_SET(1, (%rsi), PAGE_SIZE + PREFETCHED_LOAD_SIZE) + PREFETCH_ONE_SET(1, (%rsi), PAGE_SIZE * 2 + PREFETCHED_LOAD_SIZE) diff --git a/glibc-upstream-2.34-182.patch b/glibc-upstream-2.34-182.patch new file mode 100644 index 0000000..563ff9d --- /dev/null +++ b/glibc-upstream-2.34-182.patch @@ -0,0 +1,131 @@ +commit cecbac52123456e2fbcff062a4165bf7b9174797 +Author: Noah Goldstein +Date: Mon Nov 1 00:49:52 2021 -0500 + + x86: Double size of ERMS rep_movsb_threshold in dl-cacheinfo.h + + No bug. + + This patch doubles the rep_movsb_threshold when using ERMS. Based on + benchmarks the vector copy loop, especially now that it handles 4k + aliasing, is better for these medium ranged. + + On Skylake with ERMS: + + Size, Align1, Align2, dst>src,(rep movsb) / (vec copy) + 4096, 0, 0, 0, 0.975 + 4096, 0, 0, 1, 0.953 + 4096, 12, 0, 0, 0.969 + 4096, 12, 0, 1, 0.872 + 4096, 44, 0, 0, 0.979 + 4096, 44, 0, 1, 0.83 + 4096, 0, 12, 0, 1.006 + 4096, 0, 12, 1, 0.989 + 4096, 0, 44, 0, 0.739 + 4096, 0, 44, 1, 0.942 + 4096, 12, 12, 0, 1.009 + 4096, 12, 12, 1, 0.973 + 4096, 44, 44, 0, 0.791 + 4096, 44, 44, 1, 0.961 + 4096, 2048, 0, 0, 0.978 + 4096, 2048, 0, 1, 0.951 + 4096, 2060, 0, 0, 0.986 + 4096, 2060, 0, 1, 0.963 + 4096, 2048, 12, 0, 0.971 + 4096, 2048, 12, 1, 0.941 + 4096, 2060, 12, 0, 0.977 + 4096, 2060, 12, 1, 0.949 + 8192, 0, 0, 0, 0.85 + 8192, 0, 0, 1, 0.845 + 8192, 13, 0, 0, 0.937 + 8192, 13, 0, 1, 0.939 + 8192, 45, 0, 0, 0.932 + 8192, 45, 0, 1, 0.927 + 8192, 0, 13, 0, 0.621 + 8192, 0, 13, 1, 0.62 + 8192, 0, 45, 0, 0.53 + 8192, 0, 45, 1, 0.516 + 8192, 13, 13, 0, 0.664 + 8192, 13, 13, 1, 0.659 + 8192, 45, 45, 0, 0.593 + 8192, 45, 45, 1, 0.575 + 8192, 2048, 0, 0, 0.854 + 8192, 2048, 0, 1, 0.834 + 8192, 2061, 0, 0, 0.863 + 8192, 2061, 0, 1, 0.857 + 8192, 2048, 13, 0, 0.63 + 8192, 2048, 13, 1, 0.629 + 8192, 2061, 13, 0, 0.627 + 8192, 2061, 13, 1, 0.62 + + Signed-off-by: Noah Goldstein + Reviewed-by: H.J. Lu + (cherry picked from commit 475b63702ef38b69558fc3d31a0b66776a70f1d3) + +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index e6c94dfd023a25dc..2e43e67e4f4037d3 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -866,12 +866,14 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */ + unsigned int minimum_rep_movsb_threshold; + #endif +- /* NB: The default REP MOVSB threshold is 2048 * (VEC_SIZE / 16). */ ++ /* NB: The default REP MOVSB threshold is 4096 * (VEC_SIZE / 16) for ++ VEC_SIZE == 64 or 32. For VEC_SIZE == 16, the default REP MOVSB ++ threshold is 2048 * (VEC_SIZE / 16). */ + unsigned int rep_movsb_threshold; + if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F) + && !CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512)) + { +- rep_movsb_threshold = 2048 * (64 / 16); ++ rep_movsb_threshold = 4096 * (64 / 16); + #if HAVE_TUNABLES + minimum_rep_movsb_threshold = 64 * 8; + #endif +@@ -879,7 +881,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + else if (CPU_FEATURE_PREFERRED_P (cpu_features, + AVX_Fast_Unaligned_Load)) + { +- rep_movsb_threshold = 2048 * (32 / 16); ++ rep_movsb_threshold = 4096 * (32 / 16); + #if HAVE_TUNABLES + minimum_rep_movsb_threshold = 32 * 8; + #endif +diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list +index dd6e1d65c9490d4f..419313804d49cf65 100644 +--- a/sysdeps/x86/dl-tunables.list ++++ b/sysdeps/x86/dl-tunables.list +@@ -32,17 +32,21 @@ glibc { + } + x86_rep_movsb_threshold { + type: SIZE_T +- # Since there is overhead to set up REP MOVSB operation, REP MOVSB +- # isn't faster on short data. The memcpy micro benchmark in glibc +- # shows that 2KB is the approximate value above which REP MOVSB +- # becomes faster than SSE2 optimization on processors with Enhanced +- # REP MOVSB. Since larger register size can move more data with a +- # single load and store, the threshold is higher with larger register +- # size. Note: Since the REP MOVSB threshold must be greater than 8 +- # times of vector size and the default value is 2048 * (vector size +- # / 16), the default value and the minimum value must be updated at +- # run-time. NB: Don't set the default value since we can't tell if +- # the tunable value is set by user or not [BZ #27069]. ++ # Since there is overhead to set up REP MOVSB operation, REP ++ # MOVSB isn't faster on short data. The memcpy micro benchmark ++ # in glibc shows that 2KB is the approximate value above which ++ # REP MOVSB becomes faster than SSE2 optimization on processors ++ # with Enhanced REP MOVSB. Since larger register size can move ++ # more data with a single load and store, the threshold is ++ # higher with larger register size. Micro benchmarks show AVX ++ # REP MOVSB becomes faster apprximately at 8KB. The AVX512 ++ # threshold is extrapolated to 16KB. For machines with FSRM the ++ # threshold is universally set at 2112 bytes. Note: Since the ++ # REP MOVSB threshold must be greater than 8 times of vector ++ # size and the default value is 4096 * (vector size / 16), the ++ # default value and the minimum value must be updated at ++ # run-time. NB: Don't set the default value since we can't tell ++ # if the tunable value is set by user or not [BZ #27069]. + minval: 1 + } + x86_rep_stosb_threshold { diff --git a/glibc-upstream-2.34-183.patch b/glibc-upstream-2.34-183.patch new file mode 100644 index 0000000..a1a7285 --- /dev/null +++ b/glibc-upstream-2.34-183.patch @@ -0,0 +1,2423 @@ +commit 7cb126e7e7febf9dc3e369cc3e4885e34fb9433b +Author: Noah Goldstein +Date: Wed Nov 10 16:18:56 2021 -0600 + + x86: Shrink memcmp-sse4.S code size + + No bug. + + This implementation refactors memcmp-sse4.S primarily with minimizing + code size in mind. It does this by removing the lookup table logic and + removing the unrolled check from (256, 512] bytes. + + memcmp-sse4 code size reduction : -3487 bytes + wmemcmp-sse4 code size reduction: -1472 bytes + + The current memcmp-sse4.S implementation has a large code size + cost. This has serious adverse affects on the ICache / ITLB. While + in micro-benchmarks the implementations appears fast, traces of + real-world code have shown that the speed in micro benchmarks does not + translate when the ICache/ITLB are not primed, and that the cost + of the code size has measurable negative affects on overall + application performance. + + See https://research.google/pubs/pub48320/ for more details. + + Signed-off-by: Noah Goldstein + Reviewed-by: H.J. Lu + (cherry picked from commit 2f9062d7171850451e6044ef78d91ff8c017b9c0) + +diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S +index b7ac034569ec6178..97c102a9c5ab2b91 100644 +--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S ++++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S +@@ -25,14 +25,14 @@ + # define MEMCMP __memcmp_sse4_1 + # endif + +-# define JMPTBL(I, B) (I - B) ++#ifdef USE_AS_WMEMCMP ++# define CMPEQ pcmpeqd ++# define CHAR_SIZE 4 ++#else ++# define CMPEQ pcmpeqb ++# define CHAR_SIZE 1 ++#endif + +-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ +- lea TABLE(%rip), %r11; \ +- movslq (%r11, INDEX, SCALE), %rcx; \ +- add %r11, %rcx; \ +- _CET_NOTRACK jmp *%rcx; \ +- ud2 + + /* Warning! + wmemcmp has to use SIGNED comparison for elements. +@@ -47,33 +47,253 @@ ENTRY (MEMCMP) + /* Clear the upper 32 bits. */ + mov %edx, %edx + # endif +- pxor %xmm0, %xmm0 + cmp $79, %RDX_LP + ja L(79bytesormore) ++ ++ cmp $CHAR_SIZE, %RDX_LP ++ jbe L(firstbyte) ++ ++ /* N in (CHAR_SIZE, 79) bytes. */ ++ cmpl $32, %edx ++ ja L(more_32_bytes) ++ ++ cmpl $16, %edx ++ jae L(16_to_32_bytes) ++ + # ifndef USE_AS_WMEMCMP +- cmp $1, %RDX_LP +- je L(firstbyte) ++ cmpl $8, %edx ++ jae L(8_to_16_bytes) ++ ++ cmpl $4, %edx ++ jb L(2_to_3_bytes) ++ ++ movl (%rdi), %eax ++ movl (%rsi), %ecx ++ ++ bswap %eax ++ bswap %ecx ++ ++ shlq $32, %rax ++ shlq $32, %rcx ++ ++ movl -4(%rdi, %rdx), %edi ++ movl -4(%rsi, %rdx), %esi ++ ++ bswap %edi ++ bswap %esi ++ ++ orq %rdi, %rax ++ orq %rsi, %rcx ++ subq %rcx, %rax ++ cmovne %edx, %eax ++ sbbl %ecx, %ecx ++ orl %ecx, %eax ++ ret ++ ++ .p2align 4,, 8 ++L(2_to_3_bytes): ++ movzwl (%rdi), %eax ++ movzwl (%rsi), %ecx ++ shll $8, %eax ++ shll $8, %ecx ++ bswap %eax ++ bswap %ecx ++ movzbl -1(%rdi, %rdx), %edi ++ movzbl -1(%rsi, %rdx), %esi ++ orl %edi, %eax ++ orl %esi, %ecx ++ subl %ecx, %eax ++ ret ++ ++ .p2align 4,, 8 ++L(8_to_16_bytes): ++ movq (%rdi), %rax ++ movq (%rsi), %rcx ++ ++ bswap %rax ++ bswap %rcx ++ ++ subq %rcx, %rax ++ jne L(8_to_16_bytes_done) ++ ++ movq -8(%rdi, %rdx), %rax ++ movq -8(%rsi, %rdx), %rcx ++ ++ bswap %rax ++ bswap %rcx ++ ++ subq %rcx, %rax ++ ++L(8_to_16_bytes_done): ++ cmovne %edx, %eax ++ sbbl %ecx, %ecx ++ orl %ecx, %eax ++ ret ++# else ++ xorl %eax, %eax ++ movl (%rdi), %ecx ++ cmpl (%rsi), %ecx ++ jne L(8_to_16_bytes_done) ++ movl 4(%rdi), %ecx ++ cmpl 4(%rsi), %ecx ++ jne L(8_to_16_bytes_done) ++ movl -4(%rdi, %rdx), %ecx ++ cmpl -4(%rsi, %rdx), %ecx ++ jne L(8_to_16_bytes_done) ++ ret + # endif +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) + +-# ifndef USE_AS_WMEMCMP +- .p2align 4 ++ .p2align 4,, 3 ++L(ret_zero): ++ xorl %eax, %eax ++L(zero): ++ ret ++ ++ .p2align 4,, 8 + L(firstbyte): ++ jb L(ret_zero) ++# ifdef USE_AS_WMEMCMP ++ xorl %eax, %eax ++ movl (%rdi), %ecx ++ cmpl (%rsi), %ecx ++ je L(zero) ++L(8_to_16_bytes_done): ++ setg %al ++ leal -1(%rax, %rax), %eax ++# else + movzbl (%rdi), %eax + movzbl (%rsi), %ecx + sub %ecx, %eax ++# endif + ret ++ ++ .p2align 4 ++L(vec_return_begin_48): ++ addq $16, %rdi ++ addq $16, %rsi ++L(vec_return_begin_32): ++ bsfl %eax, %eax ++# ifdef USE_AS_WMEMCMP ++ movl 32(%rdi, %rax), %ecx ++ xorl %edx, %edx ++ cmpl 32(%rsi, %rax), %ecx ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++# else ++ movzbl 32(%rsi, %rax), %ecx ++ movzbl 32(%rdi, %rax), %eax ++ subl %ecx, %eax ++# endif ++ ret ++ ++ .p2align 4 ++L(vec_return_begin_16): ++ addq $16, %rdi ++ addq $16, %rsi ++L(vec_return_begin): ++ bsfl %eax, %eax ++# ifdef USE_AS_WMEMCMP ++ movl (%rdi, %rax), %ecx ++ xorl %edx, %edx ++ cmpl (%rsi, %rax), %ecx ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++# else ++ movzbl (%rsi, %rax), %ecx ++ movzbl (%rdi, %rax), %eax ++ subl %ecx, %eax ++# endif ++ ret ++ ++ .p2align 4 ++L(vec_return_end_16): ++ subl $16, %edx ++L(vec_return_end): ++ bsfl %eax, %eax ++ addl %edx, %eax ++# ifdef USE_AS_WMEMCMP ++ movl -16(%rdi, %rax), %ecx ++ xorl %edx, %edx ++ cmpl -16(%rsi, %rax), %ecx ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++# else ++ movzbl -16(%rsi, %rax), %ecx ++ movzbl -16(%rdi, %rax), %eax ++ subl %ecx, %eax + # endif ++ ret ++ ++ .p2align 4,, 8 ++L(more_32_bytes): ++ movdqu (%rdi), %xmm0 ++ movdqu (%rsi), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqu 16(%rdi), %xmm0 ++ movdqu 16(%rsi), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ cmpl $64, %edx ++ jbe L(32_to_64_bytes) ++ movdqu 32(%rdi), %xmm0 ++ movdqu 32(%rsi), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ .p2align 4,, 6 ++L(32_to_64_bytes): ++ movdqu -32(%rdi, %rdx), %xmm0 ++ movdqu -32(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end_16) ++ ++ movdqu -16(%rdi, %rdx), %xmm0 ++ movdqu -16(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end) ++ ret ++ ++ .p2align 4 ++L(16_to_32_bytes): ++ movdqu (%rdi), %xmm0 ++ movdqu (%rsi), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqu -16(%rdi, %rdx), %xmm0 ++ movdqu -16(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end) ++ ret ++ + + .p2align 4 + L(79bytesormore): ++ movdqu (%rdi), %xmm0 + movdqu (%rsi), %xmm1 +- movdqu (%rdi), %xmm2 +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ + mov %rsi, %rcx + and $-16, %rsi + add $16, %rsi +@@ -86,1694 +306,499 @@ L(79bytesormore): + + cmp $128, %rdx + ja L(128bytesormore) +-L(less128bytes): +- sub $64, %rdx +- +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) + +- movdqu 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- +- movdqu 32(%rdi), %xmm2 +- pxor 32(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(48bytesin256) +- +- movdqu 48(%rdi), %xmm2 +- pxor 48(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(64bytesin256) +- cmp $32, %rdx +- jb L(less32bytesin64) +- +- movdqu 64(%rdi), %xmm2 +- pxor 64(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(80bytesin256) +- +- movdqu 80(%rdi), %xmm2 +- pxor 80(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(96bytesin256) +- sub $32, %rdx +- add $32, %rdi +- add $32, %rsi +-L(less32bytesin64): +- add $64, %rdi +- add $64, %rsi +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) ++ .p2align 4,, 6 ++L(less128bytes): ++ movdqu (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqu 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqu 32(%rdi), %xmm1 ++ CMPEQ 32(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ movdqu 48(%rdi), %xmm1 ++ CMPEQ 48(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_48) ++ ++ cmp $96, %rdx ++ jb L(32_to_64_bytes) ++ ++ addq $64, %rdi ++ addq $64, %rsi ++ subq $64, %rdx ++ ++ .p2align 4,, 6 ++L(last_64_bytes): ++ movdqu (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqu 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqu -32(%rdi, %rdx), %xmm0 ++ movdqu -32(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end_16) ++ ++ movdqu -16(%rdi, %rdx), %xmm0 ++ movdqu -16(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end) ++ ret + ++ .p2align 4 + L(128bytesormore): +- cmp $512, %rdx +- ja L(512bytesormore) + cmp $256, %rdx +- ja L(less512bytes) ++ ja L(unaligned_loop) + L(less256bytes): +- sub $128, %rdx +- +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqu 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- +- movdqu 32(%rdi), %xmm2 +- pxor 32(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(48bytesin256) +- +- movdqu 48(%rdi), %xmm2 +- pxor 48(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(64bytesin256) +- +- movdqu 64(%rdi), %xmm2 +- pxor 64(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(80bytesin256) +- +- movdqu 80(%rdi), %xmm2 +- pxor 80(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(96bytesin256) +- +- movdqu 96(%rdi), %xmm2 +- pxor 96(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(112bytesin256) +- +- movdqu 112(%rdi), %xmm2 +- pxor 112(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(128bytesin256) +- +- add $128, %rsi +- add $128, %rdi +- +- cmp $64, %rdx +- jae L(less128bytes) +- +- cmp $32, %rdx +- jb L(less32bytesin128) +- +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqu 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- sub $32, %rdx +- add $32, %rdi +- add $32, %rsi +-L(less32bytesin128): +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) +- +-L(less512bytes): +- sub $256, %rdx +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqu 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- +- movdqu 32(%rdi), %xmm2 +- pxor 32(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(48bytesin256) +- +- movdqu 48(%rdi), %xmm2 +- pxor 48(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(64bytesin256) +- +- movdqu 64(%rdi), %xmm2 +- pxor 64(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(80bytesin256) +- +- movdqu 80(%rdi), %xmm2 +- pxor 80(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(96bytesin256) +- +- movdqu 96(%rdi), %xmm2 +- pxor 96(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(112bytesin256) +- +- movdqu 112(%rdi), %xmm2 +- pxor 112(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(128bytesin256) +- +- movdqu 128(%rdi), %xmm2 +- pxor 128(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(144bytesin256) +- +- movdqu 144(%rdi), %xmm2 +- pxor 144(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(160bytesin256) +- +- movdqu 160(%rdi), %xmm2 +- pxor 160(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(176bytesin256) +- +- movdqu 176(%rdi), %xmm2 +- pxor 176(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(192bytesin256) +- +- movdqu 192(%rdi), %xmm2 +- pxor 192(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(208bytesin256) +- +- movdqu 208(%rdi), %xmm2 +- pxor 208(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(224bytesin256) +- +- movdqu 224(%rdi), %xmm2 +- pxor 224(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(240bytesin256) +- +- movdqu 240(%rdi), %xmm2 +- pxor 240(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(256bytesin256) +- +- add $256, %rsi +- add $256, %rdi +- +- cmp $128, %rdx +- jae L(less256bytes) ++ movdqu (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqu 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqu 32(%rdi), %xmm1 ++ CMPEQ 32(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ movdqu 48(%rdi), %xmm1 ++ CMPEQ 48(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_48) ++ ++ addq $64, %rdi ++ addq $64, %rsi ++ ++ movdqu (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqu 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqu 32(%rdi), %xmm1 ++ CMPEQ 32(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ movdqu 48(%rdi), %xmm1 ++ CMPEQ 48(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_48) ++ ++ addq $-128, %rdx ++ subq $-64, %rsi ++ subq $-64, %rdi + + cmp $64, %rdx +- jae L(less128bytes) ++ ja L(less128bytes) + + cmp $32, %rdx +- jb L(less32bytesin256) +- +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqu 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- sub $32, %rdx +- add $32, %rdi +- add $32, %rsi +-L(less32bytesin256): +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) ++ ja L(last_64_bytes) ++ ++ movdqu -32(%rdi, %rdx), %xmm0 ++ movdqu -32(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end_16) ++ ++ movdqu -16(%rdi, %rdx), %xmm0 ++ movdqu -16(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end) ++ ret + + .p2align 4 +-L(512bytesormore): ++L(unaligned_loop): + # ifdef DATA_CACHE_SIZE_HALF + mov $DATA_CACHE_SIZE_HALF, %R8_LP + # else + mov __x86_data_cache_size_half(%rip), %R8_LP + # endif +- mov %r8, %r9 +- shr $1, %r8 +- add %r9, %r8 +- cmp %r8, %rdx +- ja L(L2_L3_cache_unaglined) ++ movq %r8, %r9 ++ addq %r8, %r8 ++ addq %r9, %r8 ++ cmpq %r8, %rdx ++ ja L(L2_L3_cache_unaligned) + sub $64, %rdx + .p2align 4 + L(64bytesormore_loop): +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- movdqa %xmm2, %xmm1 ++ movdqu (%rdi), %xmm0 ++ movdqu 16(%rdi), %xmm1 ++ movdqu 32(%rdi), %xmm2 ++ movdqu 48(%rdi), %xmm3 + +- movdqu 16(%rdi), %xmm3 +- pxor 16(%rsi), %xmm3 +- por %xmm3, %xmm1 ++ CMPEQ (%rsi), %xmm0 ++ CMPEQ 16(%rsi), %xmm1 ++ CMPEQ 32(%rsi), %xmm2 ++ CMPEQ 48(%rsi), %xmm3 + +- movdqu 32(%rdi), %xmm4 +- pxor 32(%rsi), %xmm4 +- por %xmm4, %xmm1 ++ pand %xmm0, %xmm1 ++ pand %xmm2, %xmm3 ++ pand %xmm1, %xmm3 + +- movdqu 48(%rdi), %xmm5 +- pxor 48(%rsi), %xmm5 +- por %xmm5, %xmm1 ++ pmovmskb %xmm3, %eax ++ incw %ax ++ jnz L(64bytesormore_loop_end) + +- ptest %xmm1, %xmm0 +- jnc L(64bytesormore_loop_end) + add $64, %rsi + add $64, %rdi + sub $64, %rdx +- jae L(64bytesormore_loop) ++ ja L(64bytesormore_loop) + +- add $64, %rdx +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) ++ .p2align 4,, 6 ++L(loop_tail): ++ addq %rdx, %rdi ++ movdqu (%rdi), %xmm0 ++ movdqu 16(%rdi), %xmm1 ++ movdqu 32(%rdi), %xmm2 ++ movdqu 48(%rdi), %xmm3 ++ ++ addq %rdx, %rsi ++ movdqu (%rsi), %xmm4 ++ movdqu 16(%rsi), %xmm5 ++ movdqu 32(%rsi), %xmm6 ++ movdqu 48(%rsi), %xmm7 ++ ++ CMPEQ %xmm4, %xmm0 ++ CMPEQ %xmm5, %xmm1 ++ CMPEQ %xmm6, %xmm2 ++ CMPEQ %xmm7, %xmm3 ++ ++ pand %xmm0, %xmm1 ++ pand %xmm2, %xmm3 ++ pand %xmm1, %xmm3 ++ ++ pmovmskb %xmm3, %eax ++ incw %ax ++ jnz L(64bytesormore_loop_end) ++ ret + +-L(L2_L3_cache_unaglined): +- sub $64, %rdx ++L(L2_L3_cache_unaligned): ++ subq $64, %rdx + .p2align 4 + L(L2_L3_unaligned_128bytes_loop): + prefetchnta 0x1c0(%rdi) + prefetchnta 0x1c0(%rsi) +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- movdqa %xmm2, %xmm1 + +- movdqu 16(%rdi), %xmm3 +- pxor 16(%rsi), %xmm3 +- por %xmm3, %xmm1 ++ movdqu (%rdi), %xmm0 ++ movdqu 16(%rdi), %xmm1 ++ movdqu 32(%rdi), %xmm2 ++ movdqu 48(%rdi), %xmm3 ++ ++ CMPEQ (%rsi), %xmm0 ++ CMPEQ 16(%rsi), %xmm1 ++ CMPEQ 32(%rsi), %xmm2 ++ CMPEQ 48(%rsi), %xmm3 + +- movdqu 32(%rdi), %xmm4 +- pxor 32(%rsi), %xmm4 +- por %xmm4, %xmm1 ++ pand %xmm0, %xmm1 ++ pand %xmm2, %xmm3 ++ pand %xmm1, %xmm3 + +- movdqu 48(%rdi), %xmm5 +- pxor 48(%rsi), %xmm5 +- por %xmm5, %xmm1 ++ pmovmskb %xmm3, %eax ++ incw %ax ++ jnz L(64bytesormore_loop_end) + +- ptest %xmm1, %xmm0 +- jnc L(64bytesormore_loop_end) + add $64, %rsi + add $64, %rdi + sub $64, %rdx +- jae L(L2_L3_unaligned_128bytes_loop) ++ ja L(L2_L3_unaligned_128bytes_loop) ++ jmp L(loop_tail) + +- add $64, %rdx +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) + +-/* +- * This case is for machines which are sensitive for unaligned instructions. +- */ ++ /* This case is for machines which are sensitive for unaligned ++ * instructions. */ + .p2align 4 + L(2aligned): + cmp $128, %rdx + ja L(128bytesormorein2aligned) + L(less128bytesin2aligned): +- sub $64, %rdx +- +- movdqa (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqa 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- +- movdqa 32(%rdi), %xmm2 +- pxor 32(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(48bytesin256) +- +- movdqa 48(%rdi), %xmm2 +- pxor 48(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(64bytesin256) +- cmp $32, %rdx +- jb L(less32bytesin64in2alinged) +- +- movdqa 64(%rdi), %xmm2 +- pxor 64(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(80bytesin256) +- +- movdqa 80(%rdi), %xmm2 +- pxor 80(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(96bytesin256) +- sub $32, %rdx +- add $32, %rdi +- add $32, %rsi +-L(less32bytesin64in2alinged): +- add $64, %rdi +- add $64, %rsi +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) ++ movdqa (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqa 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqa 32(%rdi), %xmm1 ++ CMPEQ 32(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ movdqa 48(%rdi), %xmm1 ++ CMPEQ 48(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_48) ++ ++ cmp $96, %rdx ++ jb L(32_to_64_bytes) ++ ++ addq $64, %rdi ++ addq $64, %rsi ++ subq $64, %rdx ++ ++ .p2align 4,, 6 ++L(aligned_last_64_bytes): ++ movdqa (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqa 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqu -32(%rdi, %rdx), %xmm0 ++ movdqu -32(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end_16) ++ ++ movdqu -16(%rdi, %rdx), %xmm0 ++ movdqu -16(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end) ++ ret + + .p2align 4 + L(128bytesormorein2aligned): +- cmp $512, %rdx +- ja L(512bytesormorein2aligned) + cmp $256, %rdx +- ja L(256bytesormorein2aligned) ++ ja L(aligned_loop) + L(less256bytesin2alinged): +- sub $128, %rdx +- +- movdqa (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqa 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- +- movdqa 32(%rdi), %xmm2 +- pxor 32(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(48bytesin256) +- +- movdqa 48(%rdi), %xmm2 +- pxor 48(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(64bytesin256) +- +- movdqa 64(%rdi), %xmm2 +- pxor 64(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(80bytesin256) +- +- movdqa 80(%rdi), %xmm2 +- pxor 80(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(96bytesin256) +- +- movdqa 96(%rdi), %xmm2 +- pxor 96(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(112bytesin256) +- +- movdqa 112(%rdi), %xmm2 +- pxor 112(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(128bytesin256) +- +- add $128, %rsi +- add $128, %rdi ++ movdqa (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqa 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqa 32(%rdi), %xmm1 ++ CMPEQ 32(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ movdqa 48(%rdi), %xmm1 ++ CMPEQ 48(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_48) ++ ++ addq $64, %rdi ++ addq $64, %rsi ++ ++ movdqa (%rdi), %xmm1 ++ CMPEQ (%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin) ++ ++ movdqa 16(%rdi), %xmm1 ++ CMPEQ 16(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_16) ++ ++ movdqa 32(%rdi), %xmm1 ++ CMPEQ 32(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_32) ++ ++ movdqa 48(%rdi), %xmm1 ++ CMPEQ 48(%rsi), %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_begin_48) ++ ++ addq $-128, %rdx ++ subq $-64, %rsi ++ subq $-64, %rdi + + cmp $64, %rdx +- jae L(less128bytesin2aligned) ++ ja L(less128bytesin2aligned) + + cmp $32, %rdx +- jb L(less32bytesin128in2aligned) +- +- movdqu (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqu 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- sub $32, %rdx +- add $32, %rdi +- add $32, %rsi +-L(less32bytesin128in2aligned): +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) +- +- .p2align 4 +-L(256bytesormorein2aligned): +- +- sub $256, %rdx +- movdqa (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqa 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- +- movdqa 32(%rdi), %xmm2 +- pxor 32(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(48bytesin256) +- +- movdqa 48(%rdi), %xmm2 +- pxor 48(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(64bytesin256) +- +- movdqa 64(%rdi), %xmm2 +- pxor 64(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(80bytesin256) +- +- movdqa 80(%rdi), %xmm2 +- pxor 80(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(96bytesin256) +- +- movdqa 96(%rdi), %xmm2 +- pxor 96(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(112bytesin256) +- +- movdqa 112(%rdi), %xmm2 +- pxor 112(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(128bytesin256) +- +- movdqa 128(%rdi), %xmm2 +- pxor 128(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(144bytesin256) +- +- movdqa 144(%rdi), %xmm2 +- pxor 144(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(160bytesin256) +- +- movdqa 160(%rdi), %xmm2 +- pxor 160(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(176bytesin256) +- +- movdqa 176(%rdi), %xmm2 +- pxor 176(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(192bytesin256) +- +- movdqa 192(%rdi), %xmm2 +- pxor 192(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(208bytesin256) +- +- movdqa 208(%rdi), %xmm2 +- pxor 208(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(224bytesin256) +- +- movdqa 224(%rdi), %xmm2 +- pxor 224(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(240bytesin256) +- +- movdqa 240(%rdi), %xmm2 +- pxor 240(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(256bytesin256) +- +- add $256, %rsi +- add $256, %rdi +- +- cmp $128, %rdx +- jae L(less256bytesin2alinged) +- +- cmp $64, %rdx +- jae L(less128bytesin2aligned) +- +- cmp $32, %rdx +- jb L(less32bytesin256in2alinged) +- +- movdqa (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(16bytesin256) +- +- movdqa 16(%rdi), %xmm2 +- pxor 16(%rsi), %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(32bytesin256) +- sub $32, %rdx +- add $32, %rdi +- add $32, %rsi +-L(less32bytesin256in2alinged): +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) ++ ja L(aligned_last_64_bytes) ++ ++ movdqu -32(%rdi, %rdx), %xmm0 ++ movdqu -32(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end_16) ++ ++ movdqu -16(%rdi, %rdx), %xmm0 ++ movdqu -16(%rsi, %rdx), %xmm1 ++ CMPEQ %xmm0, %xmm1 ++ pmovmskb %xmm1, %eax ++ incw %ax ++ jnz L(vec_return_end) ++ ret + + .p2align 4 +-L(512bytesormorein2aligned): ++L(aligned_loop): + # ifdef DATA_CACHE_SIZE_HALF + mov $DATA_CACHE_SIZE_HALF, %R8_LP + # else + mov __x86_data_cache_size_half(%rip), %R8_LP + # endif +- mov %r8, %r9 +- shr $1, %r8 +- add %r9, %r8 +- cmp %r8, %rdx +- ja L(L2_L3_cache_aglined) ++ movq %r8, %r9 ++ addq %r8, %r8 ++ addq %r9, %r8 ++ cmpq %r8, %rdx ++ ja L(L2_L3_cache_aligned) + + sub $64, %rdx + .p2align 4 + L(64bytesormore_loopin2aligned): +- movdqa (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- movdqa %xmm2, %xmm1 +- +- movdqa 16(%rdi), %xmm3 +- pxor 16(%rsi), %xmm3 +- por %xmm3, %xmm1 ++ movdqa (%rdi), %xmm0 ++ movdqa 16(%rdi), %xmm1 ++ movdqa 32(%rdi), %xmm2 ++ movdqa 48(%rdi), %xmm3 + +- movdqa 32(%rdi), %xmm4 +- pxor 32(%rsi), %xmm4 +- por %xmm4, %xmm1 ++ CMPEQ (%rsi), %xmm0 ++ CMPEQ 16(%rsi), %xmm1 ++ CMPEQ 32(%rsi), %xmm2 ++ CMPEQ 48(%rsi), %xmm3 + +- movdqa 48(%rdi), %xmm5 +- pxor 48(%rsi), %xmm5 +- por %xmm5, %xmm1 ++ pand %xmm0, %xmm1 ++ pand %xmm2, %xmm3 ++ pand %xmm1, %xmm3 + +- ptest %xmm1, %xmm0 +- jnc L(64bytesormore_loop_end) ++ pmovmskb %xmm3, %eax ++ incw %ax ++ jnz L(64bytesormore_loop_end) + add $64, %rsi + add $64, %rdi + sub $64, %rdx +- jae L(64bytesormore_loopin2aligned) +- +- add $64, %rdx +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) +-L(L2_L3_cache_aglined): +- sub $64, %rdx ++ ja L(64bytesormore_loopin2aligned) ++ jmp L(loop_tail) + ++L(L2_L3_cache_aligned): ++ subq $64, %rdx + .p2align 4 + L(L2_L3_aligned_128bytes_loop): + prefetchnta 0x1c0(%rdi) + prefetchnta 0x1c0(%rsi) +- movdqa (%rdi), %xmm2 +- pxor (%rsi), %xmm2 +- movdqa %xmm2, %xmm1 +- +- movdqa 16(%rdi), %xmm3 +- pxor 16(%rsi), %xmm3 +- por %xmm3, %xmm1 ++ movdqa (%rdi), %xmm0 ++ movdqa 16(%rdi), %xmm1 ++ movdqa 32(%rdi), %xmm2 ++ movdqa 48(%rdi), %xmm3 + +- movdqa 32(%rdi), %xmm4 +- pxor 32(%rsi), %xmm4 +- por %xmm4, %xmm1 ++ CMPEQ (%rsi), %xmm0 ++ CMPEQ 16(%rsi), %xmm1 ++ CMPEQ 32(%rsi), %xmm2 ++ CMPEQ 48(%rsi), %xmm3 + +- movdqa 48(%rdi), %xmm5 +- pxor 48(%rsi), %xmm5 +- por %xmm5, %xmm1 ++ pand %xmm0, %xmm1 ++ pand %xmm2, %xmm3 ++ pand %xmm1, %xmm3 + +- ptest %xmm1, %xmm0 +- jnc L(64bytesormore_loop_end) +- add $64, %rsi +- add $64, %rdi +- sub $64, %rdx +- jae L(L2_L3_aligned_128bytes_loop) +- +- add $64, %rdx +- add %rdx, %rsi +- add %rdx, %rdi +- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) ++ pmovmskb %xmm3, %eax ++ incw %ax ++ jnz L(64bytesormore_loop_end) + ++ addq $64, %rsi ++ addq $64, %rdi ++ subq $64, %rdx ++ ja L(L2_L3_aligned_128bytes_loop) ++ jmp L(loop_tail) + + .p2align 4 + L(64bytesormore_loop_end): +- add $16, %rdi +- add $16, %rsi +- ptest %xmm2, %xmm0 +- jnc L(16bytes) +- +- add $16, %rdi +- add $16, %rsi +- ptest %xmm3, %xmm0 +- jnc L(16bytes) +- +- add $16, %rdi +- add $16, %rsi +- ptest %xmm4, %xmm0 +- jnc L(16bytes) +- +- add $16, %rdi +- add $16, %rsi +- jmp L(16bytes) +- +-L(256bytesin256): +- add $256, %rdi +- add $256, %rsi +- jmp L(16bytes) +-L(240bytesin256): +- add $240, %rdi +- add $240, %rsi +- jmp L(16bytes) +-L(224bytesin256): +- add $224, %rdi +- add $224, %rsi +- jmp L(16bytes) +-L(208bytesin256): +- add $208, %rdi +- add $208, %rsi +- jmp L(16bytes) +-L(192bytesin256): +- add $192, %rdi +- add $192, %rsi +- jmp L(16bytes) +-L(176bytesin256): +- add $176, %rdi +- add $176, %rsi +- jmp L(16bytes) +-L(160bytesin256): +- add $160, %rdi +- add $160, %rsi +- jmp L(16bytes) +-L(144bytesin256): +- add $144, %rdi +- add $144, %rsi +- jmp L(16bytes) +-L(128bytesin256): +- add $128, %rdi +- add $128, %rsi +- jmp L(16bytes) +-L(112bytesin256): +- add $112, %rdi +- add $112, %rsi +- jmp L(16bytes) +-L(96bytesin256): +- add $96, %rdi +- add $96, %rsi +- jmp L(16bytes) +-L(80bytesin256): +- add $80, %rdi +- add $80, %rsi +- jmp L(16bytes) +-L(64bytesin256): +- add $64, %rdi +- add $64, %rsi +- jmp L(16bytes) +-L(48bytesin256): +- add $16, %rdi +- add $16, %rsi +-L(32bytesin256): +- add $16, %rdi +- add $16, %rsi +-L(16bytesin256): +- add $16, %rdi +- add $16, %rsi +-L(16bytes): +- mov -16(%rdi), %rax +- mov -16(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +-L(8bytes): +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(12bytes): +- mov -12(%rdi), %rax +- mov -12(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +-L(4bytes): +- mov -4(%rsi), %ecx +-# ifndef USE_AS_WMEMCMP +- mov -4(%rdi), %eax +- cmp %eax, %ecx +-# else +- cmp -4(%rdi), %ecx +-# endif +- jne L(diffin4bytes) +-L(0bytes): +- xor %eax, %eax +- ret +- +-# ifndef USE_AS_WMEMCMP +-/* unreal case for wmemcmp */ +- .p2align 4 +-L(65bytes): +- movdqu -65(%rdi), %xmm1 +- movdqu -65(%rsi), %xmm2 +- mov $-65, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(49bytes): +- movdqu -49(%rdi), %xmm1 +- movdqu -49(%rsi), %xmm2 +- mov $-49, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(33bytes): +- movdqu -33(%rdi), %xmm1 +- movdqu -33(%rsi), %xmm2 +- mov $-33, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(17bytes): +- mov -17(%rdi), %rax +- mov -17(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +-L(9bytes): +- mov -9(%rdi), %rax +- mov -9(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- movzbl -1(%rdi), %eax +- movzbl -1(%rsi), %edx +- sub %edx, %eax +- ret +- +- .p2align 4 +-L(13bytes): +- mov -13(%rdi), %rax +- mov -13(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(5bytes): +- mov -5(%rdi), %eax +- mov -5(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin4bytes) +- movzbl -1(%rdi), %eax +- movzbl -1(%rsi), %edx +- sub %edx, %eax +- ret +- +- .p2align 4 +-L(66bytes): +- movdqu -66(%rdi), %xmm1 +- movdqu -66(%rsi), %xmm2 +- mov $-66, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(50bytes): +- movdqu -50(%rdi), %xmm1 +- movdqu -50(%rsi), %xmm2 +- mov $-50, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(34bytes): +- movdqu -34(%rdi), %xmm1 +- movdqu -34(%rsi), %xmm2 +- mov $-34, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(18bytes): +- mov -18(%rdi), %rax +- mov -18(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +-L(10bytes): +- mov -10(%rdi), %rax +- mov -10(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- movzwl -2(%rdi), %eax +- movzwl -2(%rsi), %ecx +- cmp %cl, %al +- jne L(end) +- and $0xffff, %eax +- and $0xffff, %ecx +- sub %ecx, %eax +- ret +- +- .p2align 4 +-L(14bytes): +- mov -14(%rdi), %rax +- mov -14(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(6bytes): +- mov -6(%rdi), %eax +- mov -6(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin4bytes) +-L(2bytes): +- movzwl -2(%rsi), %ecx +- movzwl -2(%rdi), %eax +- cmp %cl, %al +- jne L(end) +- and $0xffff, %eax +- and $0xffff, %ecx +- sub %ecx, %eax +- ret +- +- .p2align 4 +-L(67bytes): +- movdqu -67(%rdi), %xmm2 +- movdqu -67(%rsi), %xmm1 +- mov $-67, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(51bytes): +- movdqu -51(%rdi), %xmm2 +- movdqu -51(%rsi), %xmm1 +- mov $-51, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(35bytes): +- movdqu -35(%rsi), %xmm1 +- movdqu -35(%rdi), %xmm2 +- mov $-35, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(19bytes): +- mov -19(%rdi), %rax +- mov -19(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +-L(11bytes): +- mov -11(%rdi), %rax +- mov -11(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -4(%rdi), %eax +- mov -4(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin4bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(15bytes): +- mov -15(%rdi), %rax +- mov -15(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(7bytes): +- mov -7(%rdi), %eax +- mov -7(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin4bytes) +- mov -4(%rdi), %eax +- mov -4(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin4bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(3bytes): +- movzwl -3(%rdi), %eax +- movzwl -3(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin2bytes) +-L(1bytes): +- movzbl -1(%rdi), %eax +- movzbl -1(%rsi), %ecx +- sub %ecx, %eax +- ret +-# endif +- +- .p2align 4 +-L(68bytes): +- movdqu -68(%rdi), %xmm2 +- movdqu -68(%rsi), %xmm1 +- mov $-68, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(52bytes): +- movdqu -52(%rdi), %xmm2 +- movdqu -52(%rsi), %xmm1 +- mov $-52, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(36bytes): +- movdqu -36(%rdi), %xmm2 +- movdqu -36(%rsi), %xmm1 +- mov $-36, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(20bytes): +- movdqu -20(%rdi), %xmm2 +- movdqu -20(%rsi), %xmm1 +- mov $-20, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -4(%rsi), %ecx +- +-# ifndef USE_AS_WMEMCMP +- mov -4(%rdi), %eax +- cmp %eax, %ecx +-# else +- cmp -4(%rdi), %ecx +-# endif +- jne L(diffin4bytes) +- xor %eax, %eax +- ret +- +-# ifndef USE_AS_WMEMCMP +-/* unreal cases for wmemcmp */ +- .p2align 4 +-L(69bytes): +- movdqu -69(%rsi), %xmm1 +- movdqu -69(%rdi), %xmm2 +- mov $-69, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(53bytes): +- movdqu -53(%rsi), %xmm1 +- movdqu -53(%rdi), %xmm2 +- mov $-53, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(37bytes): +- movdqu -37(%rsi), %xmm1 +- movdqu -37(%rdi), %xmm2 +- mov $-37, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(21bytes): +- movdqu -21(%rsi), %xmm1 +- movdqu -21(%rdi), %xmm2 +- mov $-21, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(70bytes): +- movdqu -70(%rsi), %xmm1 +- movdqu -70(%rdi), %xmm2 +- mov $-70, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(54bytes): +- movdqu -54(%rsi), %xmm1 +- movdqu -54(%rdi), %xmm2 +- mov $-54, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(38bytes): +- movdqu -38(%rsi), %xmm1 +- movdqu -38(%rdi), %xmm2 +- mov $-38, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(22bytes): +- movdqu -22(%rsi), %xmm1 +- movdqu -22(%rdi), %xmm2 +- mov $-22, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(71bytes): +- movdqu -71(%rsi), %xmm1 +- movdqu -71(%rdi), %xmm2 +- mov $-71, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(55bytes): +- movdqu -55(%rdi), %xmm2 +- movdqu -55(%rsi), %xmm1 +- mov $-55, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(39bytes): +- movdqu -39(%rdi), %xmm2 +- movdqu -39(%rsi), %xmm1 +- mov $-39, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(23bytes): +- movdqu -23(%rdi), %xmm2 +- movdqu -23(%rsi), %xmm1 +- mov $-23, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +-# endif +- +- .p2align 4 +-L(72bytes): +- movdqu -72(%rsi), %xmm1 +- movdqu -72(%rdi), %xmm2 +- mov $-72, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(56bytes): +- movdqu -56(%rdi), %xmm2 +- movdqu -56(%rsi), %xmm1 +- mov $-56, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(40bytes): +- movdqu -40(%rdi), %xmm2 +- movdqu -40(%rsi), %xmm1 +- mov $-40, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(24bytes): +- movdqu -24(%rdi), %xmm2 +- movdqu -24(%rsi), %xmm1 +- mov $-24, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- +- mov -8(%rsi), %rcx +- mov -8(%rdi), %rax +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +-# ifndef USE_AS_WMEMCMP +-/* unreal cases for wmemcmp */ +- .p2align 4 +-L(73bytes): +- movdqu -73(%rsi), %xmm1 +- movdqu -73(%rdi), %xmm2 +- mov $-73, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(57bytes): +- movdqu -57(%rdi), %xmm2 +- movdqu -57(%rsi), %xmm1 +- mov $-57, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(41bytes): +- movdqu -41(%rdi), %xmm2 +- movdqu -41(%rsi), %xmm1 +- mov $-41, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(25bytes): +- movdqu -25(%rdi), %xmm2 +- movdqu -25(%rsi), %xmm1 +- mov $-25, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -9(%rdi), %rax +- mov -9(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- movzbl -1(%rdi), %eax +- movzbl -1(%rsi), %ecx +- sub %ecx, %eax +- ret +- +- .p2align 4 +-L(74bytes): +- movdqu -74(%rsi), %xmm1 +- movdqu -74(%rdi), %xmm2 +- mov $-74, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(58bytes): +- movdqu -58(%rdi), %xmm2 +- movdqu -58(%rsi), %xmm1 +- mov $-58, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(42bytes): +- movdqu -42(%rdi), %xmm2 +- movdqu -42(%rsi), %xmm1 +- mov $-42, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(26bytes): +- movdqu -26(%rdi), %xmm2 +- movdqu -26(%rsi), %xmm1 +- mov $-26, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -10(%rdi), %rax +- mov -10(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- movzwl -2(%rdi), %eax +- movzwl -2(%rsi), %ecx +- jmp L(diffin2bytes) +- +- .p2align 4 +-L(75bytes): +- movdqu -75(%rsi), %xmm1 +- movdqu -75(%rdi), %xmm2 +- mov $-75, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(59bytes): +- movdqu -59(%rdi), %xmm2 +- movdqu -59(%rsi), %xmm1 +- mov $-59, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(43bytes): +- movdqu -43(%rdi), %xmm2 +- movdqu -43(%rsi), %xmm1 +- mov $-43, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(27bytes): +- movdqu -27(%rdi), %xmm2 +- movdqu -27(%rsi), %xmm1 +- mov $-27, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -11(%rdi), %rax +- mov -11(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -4(%rdi), %eax +- mov -4(%rsi), %ecx +- cmp %eax, %ecx +- jne L(diffin4bytes) +- xor %eax, %eax +- ret +-# endif +- .p2align 4 +-L(76bytes): +- movdqu -76(%rsi), %xmm1 +- movdqu -76(%rdi), %xmm2 +- mov $-76, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(60bytes): +- movdqu -60(%rdi), %xmm2 +- movdqu -60(%rsi), %xmm1 +- mov $-60, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(44bytes): +- movdqu -44(%rdi), %xmm2 +- movdqu -44(%rsi), %xmm1 +- mov $-44, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(28bytes): +- movdqu -28(%rdi), %xmm2 +- movdqu -28(%rsi), %xmm1 +- mov $-28, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -12(%rdi), %rax +- mov -12(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -4(%rsi), %ecx +-# ifndef USE_AS_WMEMCMP +- mov -4(%rdi), %eax +- cmp %eax, %ecx +-# else +- cmp -4(%rdi), %ecx +-# endif +- jne L(diffin4bytes) +- xor %eax, %eax +- ret +- +-# ifndef USE_AS_WMEMCMP +-/* unreal cases for wmemcmp */ +- .p2align 4 +-L(77bytes): +- movdqu -77(%rsi), %xmm1 +- movdqu -77(%rdi), %xmm2 +- mov $-77, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(61bytes): +- movdqu -61(%rdi), %xmm2 +- movdqu -61(%rsi), %xmm1 +- mov $-61, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(45bytes): +- movdqu -45(%rdi), %xmm2 +- movdqu -45(%rsi), %xmm1 +- mov $-45, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(29bytes): +- movdqu -29(%rdi), %xmm2 +- movdqu -29(%rsi), %xmm1 +- mov $-29, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- +- mov -13(%rdi), %rax +- mov -13(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(78bytes): +- movdqu -78(%rsi), %xmm1 +- movdqu -78(%rdi), %xmm2 +- mov $-78, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(62bytes): +- movdqu -62(%rdi), %xmm2 +- movdqu -62(%rsi), %xmm1 +- mov $-62, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(46bytes): +- movdqu -46(%rdi), %xmm2 +- movdqu -46(%rsi), %xmm1 +- mov $-46, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(30bytes): +- movdqu -30(%rdi), %xmm2 +- movdqu -30(%rsi), %xmm1 +- mov $-30, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -14(%rdi), %rax +- mov -14(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +- .p2align 4 +-L(79bytes): +- movdqu -79(%rsi), %xmm1 +- movdqu -79(%rdi), %xmm2 +- mov $-79, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(63bytes): +- movdqu -63(%rdi), %xmm2 +- movdqu -63(%rsi), %xmm1 +- mov $-63, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(47bytes): +- movdqu -47(%rdi), %xmm2 +- movdqu -47(%rsi), %xmm1 +- mov $-47, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(31bytes): +- movdqu -31(%rdi), %xmm2 +- movdqu -31(%rsi), %xmm1 +- mov $-31, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- mov -15(%rdi), %rax +- mov -15(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +-# endif +- .p2align 4 +-L(64bytes): +- movdqu -64(%rdi), %xmm2 +- movdqu -64(%rsi), %xmm1 +- mov $-64, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(48bytes): +- movdqu -48(%rdi), %xmm2 +- movdqu -48(%rsi), %xmm1 +- mov $-48, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +-L(32bytes): +- movdqu -32(%rdi), %xmm2 +- movdqu -32(%rsi), %xmm1 +- mov $-32, %dl +- pxor %xmm1, %xmm2 +- ptest %xmm2, %xmm0 +- jnc L(less16bytes) +- +- mov -16(%rdi), %rax +- mov -16(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- +- mov -8(%rdi), %rax +- mov -8(%rsi), %rcx +- cmp %rax, %rcx +- jne L(diffin8bytes) +- xor %eax, %eax +- ret +- +-/* +- * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block. +- */ +- .p2align 3 +-L(less16bytes): +- movsbq %dl, %rdx +- mov (%rsi, %rdx), %rcx +- mov (%rdi, %rdx), %rax +- cmp %rax, %rcx +- jne L(diffin8bytes) +- mov 8(%rsi, %rdx), %rcx +- mov 8(%rdi, %rdx), %rax +-L(diffin8bytes): +- cmp %eax, %ecx +- jne L(diffin4bytes) +- shr $32, %rcx +- shr $32, %rax +- ++ pmovmskb %xmm0, %ecx ++ incw %cx ++ jnz L(loop_end_ret) ++ ++ pmovmskb %xmm1, %ecx ++ notw %cx ++ sall $16, %ecx ++ jnz L(loop_end_ret) ++ ++ pmovmskb %xmm2, %ecx ++ notw %cx ++ shlq $32, %rcx ++ jnz L(loop_end_ret) ++ ++ addq $48, %rdi ++ addq $48, %rsi ++ movq %rax, %rcx ++ ++ .p2align 4,, 6 ++L(loop_end_ret): ++ bsfq %rcx, %rcx + # ifdef USE_AS_WMEMCMP +-/* for wmemcmp */ +- cmp %eax, %ecx +- jne L(diffin4bytes) +- xor %eax, %eax +- ret +-# endif +- +-L(diffin4bytes): +-# ifndef USE_AS_WMEMCMP +- cmp %cx, %ax +- jne L(diffin2bytes) +- shr $16, %ecx +- shr $16, %eax +-L(diffin2bytes): +- cmp %cl, %al +- jne L(end) +- and $0xffff, %eax +- and $0xffff, %ecx +- sub %ecx, %eax +- ret +- +- .p2align 4 +-L(end): +- and $0xff, %eax +- and $0xff, %ecx +- sub %ecx, %eax +- ret ++ movl (%rdi, %rcx), %eax ++ xorl %edx, %edx ++ cmpl (%rsi, %rcx), %eax ++ setg %dl ++ leal -1(%rdx, %rdx), %eax + # else +- +-/* for wmemcmp */ +- mov $1, %eax +- jl L(nequal_bigger) +- neg %eax +- ret +- +- .p2align 4 +-L(nequal_bigger): +- ret +- +-L(unreal_case): +- xor %eax, %eax +- ret ++ movzbl (%rdi, %rcx), %eax ++ movzbl (%rsi, %rcx), %ecx ++ subl %ecx, %eax + # endif +- ++ ret + END (MEMCMP) +- +- .section .rodata.sse4.1,"a",@progbits +- .p2align 3 +-# ifndef USE_AS_WMEMCMP +-L(table_64bytes): +- .int JMPTBL (L(0bytes), L(table_64bytes)) +- .int JMPTBL (L(1bytes), L(table_64bytes)) +- .int JMPTBL (L(2bytes), L(table_64bytes)) +- .int JMPTBL (L(3bytes), L(table_64bytes)) +- .int JMPTBL (L(4bytes), L(table_64bytes)) +- .int JMPTBL (L(5bytes), L(table_64bytes)) +- .int JMPTBL (L(6bytes), L(table_64bytes)) +- .int JMPTBL (L(7bytes), L(table_64bytes)) +- .int JMPTBL (L(8bytes), L(table_64bytes)) +- .int JMPTBL (L(9bytes), L(table_64bytes)) +- .int JMPTBL (L(10bytes), L(table_64bytes)) +- .int JMPTBL (L(11bytes), L(table_64bytes)) +- .int JMPTBL (L(12bytes), L(table_64bytes)) +- .int JMPTBL (L(13bytes), L(table_64bytes)) +- .int JMPTBL (L(14bytes), L(table_64bytes)) +- .int JMPTBL (L(15bytes), L(table_64bytes)) +- .int JMPTBL (L(16bytes), L(table_64bytes)) +- .int JMPTBL (L(17bytes), L(table_64bytes)) +- .int JMPTBL (L(18bytes), L(table_64bytes)) +- .int JMPTBL (L(19bytes), L(table_64bytes)) +- .int JMPTBL (L(20bytes), L(table_64bytes)) +- .int JMPTBL (L(21bytes), L(table_64bytes)) +- .int JMPTBL (L(22bytes), L(table_64bytes)) +- .int JMPTBL (L(23bytes), L(table_64bytes)) +- .int JMPTBL (L(24bytes), L(table_64bytes)) +- .int JMPTBL (L(25bytes), L(table_64bytes)) +- .int JMPTBL (L(26bytes), L(table_64bytes)) +- .int JMPTBL (L(27bytes), L(table_64bytes)) +- .int JMPTBL (L(28bytes), L(table_64bytes)) +- .int JMPTBL (L(29bytes), L(table_64bytes)) +- .int JMPTBL (L(30bytes), L(table_64bytes)) +- .int JMPTBL (L(31bytes), L(table_64bytes)) +- .int JMPTBL (L(32bytes), L(table_64bytes)) +- .int JMPTBL (L(33bytes), L(table_64bytes)) +- .int JMPTBL (L(34bytes), L(table_64bytes)) +- .int JMPTBL (L(35bytes), L(table_64bytes)) +- .int JMPTBL (L(36bytes), L(table_64bytes)) +- .int JMPTBL (L(37bytes), L(table_64bytes)) +- .int JMPTBL (L(38bytes), L(table_64bytes)) +- .int JMPTBL (L(39bytes), L(table_64bytes)) +- .int JMPTBL (L(40bytes), L(table_64bytes)) +- .int JMPTBL (L(41bytes), L(table_64bytes)) +- .int JMPTBL (L(42bytes), L(table_64bytes)) +- .int JMPTBL (L(43bytes), L(table_64bytes)) +- .int JMPTBL (L(44bytes), L(table_64bytes)) +- .int JMPTBL (L(45bytes), L(table_64bytes)) +- .int JMPTBL (L(46bytes), L(table_64bytes)) +- .int JMPTBL (L(47bytes), L(table_64bytes)) +- .int JMPTBL (L(48bytes), L(table_64bytes)) +- .int JMPTBL (L(49bytes), L(table_64bytes)) +- .int JMPTBL (L(50bytes), L(table_64bytes)) +- .int JMPTBL (L(51bytes), L(table_64bytes)) +- .int JMPTBL (L(52bytes), L(table_64bytes)) +- .int JMPTBL (L(53bytes), L(table_64bytes)) +- .int JMPTBL (L(54bytes), L(table_64bytes)) +- .int JMPTBL (L(55bytes), L(table_64bytes)) +- .int JMPTBL (L(56bytes), L(table_64bytes)) +- .int JMPTBL (L(57bytes), L(table_64bytes)) +- .int JMPTBL (L(58bytes), L(table_64bytes)) +- .int JMPTBL (L(59bytes), L(table_64bytes)) +- .int JMPTBL (L(60bytes), L(table_64bytes)) +- .int JMPTBL (L(61bytes), L(table_64bytes)) +- .int JMPTBL (L(62bytes), L(table_64bytes)) +- .int JMPTBL (L(63bytes), L(table_64bytes)) +- .int JMPTBL (L(64bytes), L(table_64bytes)) +- .int JMPTBL (L(65bytes), L(table_64bytes)) +- .int JMPTBL (L(66bytes), L(table_64bytes)) +- .int JMPTBL (L(67bytes), L(table_64bytes)) +- .int JMPTBL (L(68bytes), L(table_64bytes)) +- .int JMPTBL (L(69bytes), L(table_64bytes)) +- .int JMPTBL (L(70bytes), L(table_64bytes)) +- .int JMPTBL (L(71bytes), L(table_64bytes)) +- .int JMPTBL (L(72bytes), L(table_64bytes)) +- .int JMPTBL (L(73bytes), L(table_64bytes)) +- .int JMPTBL (L(74bytes), L(table_64bytes)) +- .int JMPTBL (L(75bytes), L(table_64bytes)) +- .int JMPTBL (L(76bytes), L(table_64bytes)) +- .int JMPTBL (L(77bytes), L(table_64bytes)) +- .int JMPTBL (L(78bytes), L(table_64bytes)) +- .int JMPTBL (L(79bytes), L(table_64bytes)) +-# else +-L(table_64bytes): +- .int JMPTBL (L(0bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(4bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(8bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(12bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(16bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(20bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(24bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(28bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(32bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(36bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(40bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(44bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(48bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(52bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(56bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(60bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(64bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(68bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(72bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(76bytes), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +- .int JMPTBL (L(unreal_case), L(table_64bytes)) +-# endif + #endif diff --git a/glibc-upstream-2.34-184.patch b/glibc-upstream-2.34-184.patch new file mode 100644 index 0000000..805f91e --- /dev/null +++ b/glibc-upstream-2.34-184.patch @@ -0,0 +1,104 @@ +commit 4bbd0f866ad0ff197f72346f776ebee9b7e1a706 +Author: Noah Goldstein +Date: Fri Dec 3 15:29:25 2021 -0800 + + x86-64: Use notl in EVEX strcmp [BZ #28646] + + Must use notl %edi here as lower bits are for CHAR comparisons + potentially out of range thus can be 0 without indicating mismatch. + This fixes BZ #28646. + + Co-Authored-By: H.J. Lu + (cherry picked from commit 4df1fa6ddc8925a75f3da644d5da3bb16eb33f02) + +diff --git a/string/test-strcmp.c b/string/test-strcmp.c +index 7feababf4ddc5603..a0255b9625fbcedd 100644 +--- a/string/test-strcmp.c ++++ b/string/test-strcmp.c +@@ -25,6 +25,7 @@ + # define TEST_NAME "strcmp" + #endif + #include "test-string.h" ++#include + + #ifdef WIDE + # include +@@ -392,6 +393,32 @@ check2 (void) + } + } + ++static void ++check3 (void) ++{ ++ size_t size = 0xd000 + 0x4000; ++ CHAR *s1, *s2; ++ CHAR *buffer1 = mmap (NULL, size, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANON, -1, 0); ++ CHAR *buffer2 = mmap (NULL, size, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANON, -1, 0); ++ if (buffer1 == MAP_FAILED || buffer1 == MAP_FAILED) ++ error (EXIT_UNSUPPORTED, errno, "mmap failed"); ++ ++ s1 = (CHAR *) (buffer1 + 0x8f8 / sizeof (CHAR)); ++ s2 = (CHAR *) (buffer2 + 0xcff3 / sizeof (CHAR)); ++ ++ STRCPY(s1, L("/export/redhat/rpms/BUILD/java-1.8.0-openjdk-1.8.0.312.b07-2.fc35.x86_64/openjdk/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/util/PathDocFileFactory.java")); ++ STRCPY(s2, L("/export/redhat/rpms/BUILD/java-1.8.0-openjdk-1.8.0.312.b07-2.fc35.x86_64/openjdk/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/taglets/ThrowsTaglet.java")); ++ ++ int exp_result = SIMPLE_STRCMP (s1, s2); ++ FOR_EACH_IMPL (impl, 0) ++ check_result (impl, s1, s2, exp_result); ++ ++ munmap ((void *) buffer1, size); ++ munmap ((void *) buffer2, size); ++} ++ + int + test_main (void) + { +@@ -400,6 +427,7 @@ test_main (void) + test_init (); + check(); + check2 (); ++ check3 (); + + printf ("%23s", ""); + FOR_EACH_IMPL (impl, 0) +diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S +index 82f12ac89bcae20b..6f5c4bf984da2b80 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-evex.S ++++ b/sysdeps/x86_64/multiarch/strcmp-evex.S +@@ -656,12 +656,13 @@ L(loop_cross_page): + in YMM3 and 32 bytes at VEC_SIZE(%rdx, %r10). */ + VPCMP $0, VEC_SIZE(%rdx, %r10), %YMM3, %k3{%k4} + kmovd %k3, %edi ++ /* Must use notl %edi here as lower bits are for CHAR ++ comparisons potentially out of range thus can be 0 without ++ indicating mismatch. */ ++ notl %edi + # ifdef USE_AS_WCSCMP + /* Don't use subl since it is the upper 8 bits of EDI below. */ +- notl %edi + andl $0xff, %edi +-# else +- incl %edi + # endif + + # ifdef USE_AS_WCSCMP +@@ -743,12 +744,13 @@ L(loop_cross_page_2_vec): + in YMM1 and 32 bytes at (VEC_SIZE * 3)(%rdx, %r10). */ + VPCMP $0, (VEC_SIZE * 3)(%rdx, %r10), %YMM1, %k3{%k4} + kmovd %k3, %edi ++ /* Must use notl %edi here as lower bits are for CHAR ++ comparisons potentially out of range thus can be 0 without ++ indicating mismatch. */ ++ notl %edi + # ifdef USE_AS_WCSCMP + /* Don't use subl since it is the upper 8 bits of EDI below. */ +- notl %edi + andl $0xff, %edi +-# else +- incl %edi + # endif + + # ifdef USE_AS_WCSCMP diff --git a/glibc-upstream-2.34-185.patch b/glibc-upstream-2.34-185.patch new file mode 100644 index 0000000..f06f86f --- /dev/null +++ b/glibc-upstream-2.34-185.patch @@ -0,0 +1,30 @@ +commit f3a99b2216114f89b20329ae7664b764248b4bbd +Author: H.J. Lu +Date: Mon Dec 6 07:14:12 2021 -0800 + + x86: Don't set Prefer_No_AVX512 for processors with AVX512 and AVX-VNNI + + Don't set Prefer_No_AVX512 on processors with AVX512 and AVX-VNNI since + they won't lower CPU frequency when ZMM load and store instructions are + used. + + (cherry picked from commit ceeffe968c01b1202e482f4855cb6baf5c6cb713) + +diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c +index f4d4049e391cbabd..09590d8794b1c6fb 100644 +--- a/sysdeps/x86/cpu-features.c ++++ b/sysdeps/x86/cpu-features.c +@@ -566,8 +566,11 @@ disable_tsx: + |= bit_arch_Prefer_No_VZEROUPPER; + else + { +- cpu_features->preferred[index_arch_Prefer_No_AVX512] +- |= bit_arch_Prefer_No_AVX512; ++ /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency ++ when ZMM load and store instructions are used. */ ++ if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI)) ++ cpu_features->preferred[index_arch_Prefer_No_AVX512] ++ |= bit_arch_Prefer_No_AVX512; + + /* Avoid RTM abort triggered by VZEROUPPER inside a + transactionally executing RTM region. */ diff --git a/glibc-upstream-2.34-186.patch b/glibc-upstream-2.34-186.patch new file mode 100644 index 0000000..a046844 --- /dev/null +++ b/glibc-upstream-2.34-186.patch @@ -0,0 +1,384 @@ +commit c796418d00f65c8c5fbed477f3ba6da2bee64ece +Author: Noah Goldstein +Date: Fri Dec 24 18:54:41 2021 -0600 + + x86: Optimize L(less_vec) case in memcmp-evex-movbe.S + + No bug. + Optimizations are twofold. + + 1) Replace page cross and 0/1 checks with masked load instructions in + L(less_vec). In applications this reduces branch-misses in the + hot [0, 32] case. + 2) Change controlflow so that L(less_vec) case gets the fall through. + + Change 2) helps copies in the [0, 32] size range but comes at the cost + of copies in the [33, 64] size range. From profiles of GCC and + Python3, 94%+ and 99%+ of calls are in the [0, 32] range so this + appears to the the right tradeoff. + + Signed-off-by: Noah Goldstein + Reviewed-by: H.J. Lu + (cherry picked from commit abddd61de090ae84e380aff68a98bd94ef704667) + +diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +index 640f6757fac8a356..d2899e7c7078cd41 100644 +--- a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S ++++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +@@ -62,15 +62,18 @@ Latency: + # define VMOVU vmovdqu64 + + # ifdef USE_AS_WMEMCMP ++# define VMOVU_MASK vmovdqu32 + # define CHAR_SIZE 4 + # define VPCMP vpcmpd + # define VPTEST vptestmd + # else ++# define VMOVU_MASK vmovdqu8 + # define CHAR_SIZE 1 + # define VPCMP vpcmpub + # define VPTEST vptestmb + # endif + ++ + # define VEC_SIZE 32 + # define PAGE_SIZE 4096 + # define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) +@@ -102,12 +105,48 @@ ENTRY_P2ALIGN (MEMCMP, 6) + movl %edx, %edx + # endif + cmp $CHAR_PER_VEC, %RDX_LP +- jb L(less_vec) ++ /* Fall through for [0, VEC_SIZE] as its the hottest. */ ++ ja L(more_1x_vec) ++ ++ /* Create mask for CHAR's we want to compare. This allows us to ++ avoid having to include page cross logic. */ ++ movl $-1, %ecx ++ bzhil %edx, %ecx, %ecx ++ kmovd %ecx, %k2 ++ ++ /* Safe to load full ymm with mask. */ ++ VMOVU_MASK (%rsi), %YMM2{%k2} ++ VPCMP $4,(%rdi), %YMM2, %k1{%k2} ++ kmovd %k1, %eax ++ testl %eax, %eax ++ jnz L(return_vec_0) ++ ret + ++ .p2align 4 ++L(return_vec_0): ++ tzcntl %eax, %eax ++# ifdef USE_AS_WMEMCMP ++ movl (%rdi, %rax, CHAR_SIZE), %ecx ++ xorl %edx, %edx ++ cmpl (%rsi, %rax, CHAR_SIZE), %ecx ++ /* NB: no partial register stall here because xorl zero idiom ++ above. */ ++ setg %dl ++ leal -1(%rdx, %rdx), %eax ++# else ++ movzbl (%rsi, %rax), %ecx ++ movzbl (%rdi, %rax), %eax ++ subl %ecx, %eax ++# endif ++ ret ++ ++ ++ .p2align 4 ++L(more_1x_vec): + /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */ + VMOVU (%rsi), %YMM1 + /* Use compare not equals to directly check for mismatch. */ +- VPCMP $4, (%rdi), %YMM1, %k1 ++ VPCMP $4,(%rdi), %YMM1, %k1 + kmovd %k1, %eax + /* NB: eax must be destination register if going to + L(return_vec_[0,2]). For L(return_vec_3) destination register +@@ -131,13 +170,13 @@ ENTRY_P2ALIGN (MEMCMP, 6) + + /* Check third and fourth VEC no matter what. */ + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 +- VPCMP $4, (VEC_SIZE * 2)(%rdi), %YMM3, %k1 ++ VPCMP $4,(VEC_SIZE * 2)(%rdi), %YMM3, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(return_vec_2) + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 +- VPCMP $4, (VEC_SIZE * 3)(%rdi), %YMM4, %k1 ++ VPCMP $4,(VEC_SIZE * 3)(%rdi), %YMM4, %k1 + kmovd %k1, %ecx + testl %ecx, %ecx + jnz L(return_vec_3) +@@ -169,7 +208,7 @@ ENTRY_P2ALIGN (MEMCMP, 6) + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + /* Ternary logic to xor (VEC_SIZE * 3)(%rdi) with YMM4 while + oring with YMM1. Result is stored in YMM4. */ +- vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM1, %YMM4 ++ vpternlogd $0xde,(VEC_SIZE * 3)(%rdi), %YMM1, %YMM4 + + /* Or together YMM2, YMM3, and YMM4 into YMM4. */ + vpternlogd $0xfe, %YMM2, %YMM3, %YMM4 +@@ -184,7 +223,8 @@ ENTRY_P2ALIGN (MEMCMP, 6) + /* NB: eax must be zero to reach here. */ + ret + +- .p2align 4 ++ ++ .p2align 4,, 8 + L(8x_end_return_vec_0_1_2_3): + movq %rdx, %rdi + L(8x_return_vec_0_1_2_3): +@@ -222,23 +262,6 @@ L(return_vec_3): + # endif + ret + +- .p2align 4 +-L(return_vec_0): +- tzcntl %eax, %eax +-# ifdef USE_AS_WMEMCMP +- movl (%rdi, %rax, CHAR_SIZE), %ecx +- xorl %edx, %edx +- cmpl (%rsi, %rax, CHAR_SIZE), %ecx +- /* NB: no partial register stall here because xorl zero idiom +- above. */ +- setg %dl +- leal -1(%rdx, %rdx), %eax +-# else +- movzbl (%rsi, %rax), %ecx +- movzbl (%rdi, %rax), %eax +- subl %ecx, %eax +-# endif +- ret + + .p2align 4 + L(return_vec_1): +@@ -297,7 +320,7 @@ L(loop_4x_vec): + VMOVU (VEC_SIZE * 2)(%rsi, %rdi), %YMM3 + vpxorq (VEC_SIZE * 2)(%rdi), %YMM3, %YMM3 + VMOVU (VEC_SIZE * 3)(%rsi, %rdi), %YMM4 +- vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM1, %YMM4 ++ vpternlogd $0xde,(VEC_SIZE * 3)(%rdi), %YMM1, %YMM4 + vpternlogd $0xfe, %YMM2, %YMM3, %YMM4 + VPTEST %YMM4, %YMM4, %k1 + kmovd %k1, %ecx +@@ -324,7 +347,7 @@ L(loop_4x_vec): + VMOVU VEC_SIZE(%rsi, %rdx), %YMM2 + vpxorq VEC_SIZE(%rdx), %YMM2, %YMM2 + VMOVU (VEC_SIZE * 3)(%rsi, %rdx), %YMM4 +- vpternlogd $0xde, (VEC_SIZE * 3)(%rdx), %YMM1, %YMM4 ++ vpternlogd $0xde,(VEC_SIZE * 3)(%rdx), %YMM1, %YMM4 + vpternlogd $0xfe, %YMM2, %YMM3, %YMM4 + VPTEST %YMM4, %YMM4, %k1 + kmovd %k1, %ecx +@@ -336,14 +359,14 @@ L(loop_4x_vec): + /* Only entry is from L(more_8x_vec). */ + .p2align 4,, 10 + L(8x_last_2x_vec): +- VPCMP $4, (VEC_SIZE * 2)(%rdx), %YMM3, %k1 ++ VPCMP $4,(VEC_SIZE * 2)(%rdx), %YMM3, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(8x_return_vec_2) + /* Naturally aligned to 16 bytes. */ + L(8x_last_1x_vec): + VMOVU (VEC_SIZE * 3)(%rsi, %rdx), %YMM1 +- VPCMP $4, (VEC_SIZE * 3)(%rdx), %YMM1, %k1 ++ VPCMP $4,(VEC_SIZE * 3)(%rdx), %YMM1, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(8x_return_vec_3) +@@ -392,7 +415,9 @@ L(last_1x_vec): + jnz L(return_vec_0_end) + ret + +- .p2align 4,, 10 ++ ++ /* Don't align. Takes 2-fetch blocks either way and aligning ++ will cause code to spill into another cacheline. */ + L(return_vec_1_end): + /* Use bsf to save code size. This is necessary to have + L(one_or_less) fit in aligning bytes between. */ +@@ -411,31 +436,8 @@ L(return_vec_1_end): + # endif + ret + +- /* NB: L(one_or_less) fits in alignment padding between +- L(return_vec_1_end) and L(return_vec_0_end). */ +-# ifdef USE_AS_WMEMCMP +-L(one_or_less): +- jb L(zero) +- movl (%rdi), %ecx +- xorl %edx, %edx +- cmpl (%rsi), %ecx +- je L(zero) +- setg %dl +- leal -1(%rdx, %rdx), %eax +- ret +-# else +-L(one_or_less): +- jb L(zero) +- movzbl (%rsi), %ecx +- movzbl (%rdi), %eax +- subl %ecx, %eax +- ret +-# endif +-L(zero): +- xorl %eax, %eax +- ret +- +- .p2align 4 ++ /* Don't align. Takes 2-fetch blocks either way and aligning ++ will cause code to spill into another cacheline. */ + L(return_vec_0_end): + tzcntl %eax, %eax + addl %edx, %eax +@@ -451,146 +453,7 @@ L(return_vec_0_end): + subl %ecx, %eax + # endif + ret ++ /* 1-byte until next cache line. */ + +- .p2align 4 +-L(less_vec): +- /* Check if one or less CHAR. This is necessary for size == 0 +- but is also faster for size == CHAR_SIZE. */ +- cmpl $1, %edx +- jbe L(one_or_less) +- +- /* Check if loading one VEC from either s1 or s2 could cause a +- page cross. This can have false positives but is by far the +- fastest method. */ +- movl %edi, %eax +- orl %esi, %eax +- andl $(PAGE_SIZE - 1), %eax +- cmpl $(PAGE_SIZE - VEC_SIZE), %eax +- jg L(page_cross_less_vec) +- +- /* No page cross possible. */ +- VMOVU (%rsi), %YMM2 +- VPCMP $4, (%rdi), %YMM2, %k1 +- kmovd %k1, %eax +- /* Check if any matches where in bounds. Intentionally not +- storing result in eax to limit dependency chain if it goes to +- L(return_vec_0_lv). */ +- bzhil %edx, %eax, %edx +- jnz L(return_vec_0_lv) +- xorl %eax, %eax +- ret +- +- /* Essentially duplicate of L(return_vec_0). Ends up not costing +- any code as shrinks L(less_vec) by allowing 2-byte encoding of +- the jump and ends up fitting in aligning bytes. As well fits on +- same cache line as L(less_vec) so also saves a line from having +- to be fetched on cold calls to memcmp. */ +- .p2align 4,, 4 +-L(return_vec_0_lv): +- tzcntl %eax, %eax +-# ifdef USE_AS_WMEMCMP +- movl (%rdi, %rax, CHAR_SIZE), %ecx +- xorl %edx, %edx +- cmpl (%rsi, %rax, CHAR_SIZE), %ecx +- /* NB: no partial register stall here because xorl zero idiom +- above. */ +- setg %dl +- leal -1(%rdx, %rdx), %eax +-# else +- movzbl (%rsi, %rax), %ecx +- movzbl (%rdi, %rax), %eax +- subl %ecx, %eax +-# endif +- ret +- +- .p2align 4 +-L(page_cross_less_vec): +- /* if USE_AS_WMEMCMP it can only be 0, 4, 8, 12, 16, 20, 24, 28 +- bytes. */ +- cmpl $(16 / CHAR_SIZE), %edx +- jae L(between_16_31) +-# ifndef USE_AS_WMEMCMP +- cmpl $8, %edx +- jae L(between_8_15) +- cmpl $4, %edx +- jb L(between_2_3) +- +- /* Load as big endian with overlapping movbe to avoid branches. +- */ +- movbe (%rdi), %eax +- movbe (%rsi), %ecx +- shlq $32, %rax +- shlq $32, %rcx +- movbe -4(%rdi, %rdx), %edi +- movbe -4(%rsi, %rdx), %esi +- orq %rdi, %rax +- orq %rsi, %rcx +- subq %rcx, %rax +- /* edx is guranteed to be positive int32 in range [4, 7]. */ +- cmovne %edx, %eax +- /* ecx is -1 if rcx > rax. Otherwise 0. */ +- sbbl %ecx, %ecx +- /* If rcx > rax, then ecx is 0 and eax is positive. If rcx == +- rax then eax and ecx are zero. If rax < rax then ecx is -1 so +- eax doesn't matter. */ +- orl %ecx, %eax +- ret +- +- .p2align 4,, 8 +-L(between_8_15): +-# endif +- /* If USE_AS_WMEMCMP fall through into 8-15 byte case. */ +- vmovq (%rdi), %xmm1 +- vmovq (%rsi), %xmm2 +- VPCMP $4, %xmm1, %xmm2, %k1 +- kmovd %k1, %eax +- testl %eax, %eax +- jnz L(return_vec_0_lv) +- /* Use overlapping loads to avoid branches. */ +- vmovq -8(%rdi, %rdx, CHAR_SIZE), %xmm1 +- vmovq -8(%rsi, %rdx, CHAR_SIZE), %xmm2 +- VPCMP $4, %xmm1, %xmm2, %k1 +- addl $(CHAR_PER_VEC - (8 / CHAR_SIZE)), %edx +- kmovd %k1, %eax +- testl %eax, %eax +- jnz L(return_vec_0_end) +- ret +- +- .p2align 4,, 8 +-L(between_16_31): +- /* From 16 to 31 bytes. No branch when size == 16. */ +- +- /* Use movups to save code size. */ +- vmovdqu (%rsi), %xmm2 +- VPCMP $4, (%rdi), %xmm2, %k1 +- kmovd %k1, %eax +- testl %eax, %eax +- jnz L(return_vec_0_lv) +- /* Use overlapping loads to avoid branches. */ +- vmovdqu -16(%rsi, %rdx, CHAR_SIZE), %xmm2 +- VPCMP $4, -16(%rdi, %rdx, CHAR_SIZE), %xmm2, %k1 +- addl $(CHAR_PER_VEC - (16 / CHAR_SIZE)), %edx +- kmovd %k1, %eax +- testl %eax, %eax +- jnz L(return_vec_0_end) +- ret +- +-# ifndef USE_AS_WMEMCMP +-L(between_2_3): +- /* Load as big endian to avoid branches. */ +- movzwl (%rdi), %eax +- movzwl (%rsi), %ecx +- shll $8, %eax +- shll $8, %ecx +- bswap %eax +- bswap %ecx +- movzbl -1(%rdi, %rdx), %edi +- movzbl -1(%rsi, %rdx), %esi +- orl %edi, %eax +- orl %esi, %ecx +- /* Subtraction is okay because the upper 8 bits are zero. */ +- subl %ecx, %eax +- ret +-# endif + END (MEMCMP) + #endif diff --git a/glibc-upstream-2.34-187.patch b/glibc-upstream-2.34-187.patch new file mode 100644 index 0000000..6186aeb --- /dev/null +++ b/glibc-upstream-2.34-187.patch @@ -0,0 +1,42 @@ +commit 9681691402052b727e01ae3375c73e0f76566593 +Author: Adhemerval Zanella +Date: Wed Apr 27 13:59:26 2022 -0300 + + linux: Fix missing internal 64 bit time_t stat usage + + These are two missing spots initially done by 52a5fe70a2c77935. + + Checked on i686-linux-gnu. + + (cherry picked from commit 834ddd0432f68d6dc85b6aac95065721af0d86e9) + +diff --git a/sysdeps/unix/sysv/linux/faccessat.c b/sysdeps/unix/sysv/linux/faccessat.c +index 13160d32499c4e58..00e4ce7f80ee2dfe 100644 +--- a/sysdeps/unix/sysv/linux/faccessat.c ++++ b/sysdeps/unix/sysv/linux/faccessat.c +@@ -39,8 +39,8 @@ __faccessat (int fd, const char *file, int mode, int flag) + if ((flag == 0 || ((flag & ~AT_EACCESS) == 0 && ! __libc_enable_secure))) + return INLINE_SYSCALL (faccessat, 3, fd, file, mode); + +- struct stat64 stats; +- if (__fstatat64 (fd, file, &stats, flag & AT_SYMLINK_NOFOLLOW)) ++ struct __stat64_t64 stats; ++ if (__fstatat64_time64 (fd, file, &stats, flag & AT_SYMLINK_NOFOLLOW)) + return -1; + + mode &= (X_OK | W_OK | R_OK); /* Clear any bogus bits. */ +diff --git a/sysdeps/unix/sysv/linux/pathconf.c b/sysdeps/unix/sysv/linux/pathconf.c +index b599a66c930cad4d..f79930303118ebcd 100644 +--- a/sysdeps/unix/sysv/linux/pathconf.c ++++ b/sysdeps/unix/sysv/linux/pathconf.c +@@ -110,8 +110,8 @@ distinguish_extX (const struct statfs *fsbuf, const char *file, int fd) + && strcmp (mntbuf.mnt_type, "ext4") != 0) + continue; + +- struct stat64 fsst; +- if (__stat64 (mntbuf.mnt_dir, &fsst) >= 0 ++ struct __stat64_t64 fsst; ++ if (__stat64_time64 (mntbuf.mnt_dir, &fsst) >= 0 + && st.st_dev == fsst.st_dev) + { + if (strcmp (mntbuf.mnt_type, "ext4") == 0) diff --git a/glibc-upstream-2.34-188.patch b/glibc-upstream-2.34-188.patch new file mode 100644 index 0000000..8b49369 --- /dev/null +++ b/glibc-upstream-2.34-188.patch @@ -0,0 +1,39 @@ +commit 55640ed3fde48360a8e8083be4843bd2dc7cecfe +Author: Carlos O'Donell +Date: Tue Apr 26 10:52:41 2022 -0400 + + i386: Regenerate ulps + + These failures were caught while building glibc master for Fedora + Rawhide which is built with '-mtune=generic -msse2 -mfpmath=sse' + using gcc 11.3 (gcc-11.3.1-2.fc35) on a Cascadelake Intel Xeon + processor. + + (cherry picked from commit e465d97653311c3687aee49de782177353acfe86) + +diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps +index 7601049110789201..84e6686eba5fe79a 100644 +--- a/sysdeps/i386/fpu/libm-test-ulps ++++ b/sysdeps/i386/fpu/libm-test-ulps +@@ -668,7 +668,7 @@ ldouble: 4 + + Function: Imaginary part of "clog10": + double: 2 +-float: 1 ++float: 2 + float128: 2 + ldouble: 2 + +diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps +index a39c89cec1141935..cc21e6907fe8b6a3 100644 +--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps ++++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps +@@ -668,7 +668,7 @@ ldouble: 4 + + Function: Imaginary part of "clog10": + double: 2 +-float: 1 ++float: 2 + float128: 2 + ldouble: 2 + diff --git a/glibc-upstream-2.34-189.patch b/glibc-upstream-2.34-189.patch new file mode 100644 index 0000000..3a5889c --- /dev/null +++ b/glibc-upstream-2.34-189.patch @@ -0,0 +1,116 @@ +commit 88a8637cb4658cd91a002659db05867716b88b36 +Author: Adhemerval Zanella +Date: Wed Apr 27 13:40:30 2022 -0300 + + linux: Fix fchmodat with AT_SYMLINK_NOFOLLOW for 64 bit time_t (BZ#29097) + + The AT_SYMLINK_NOFOLLOW emulation ues the default 32 bit stat internal + calls, which fails with EOVERFLOW if the file constains timestamps + beyond 2038. + + Checked on i686-linux-gnu. + + (cherry picked from commit 118a2aee07f64d605b6668cbe195c1f44eac6be6) + +diff --git a/io/Makefile b/io/Makefile +index 9871ecbc74020a6d..01968b81042e01e4 100644 +--- a/io/Makefile ++++ b/io/Makefile +@@ -81,16 +81,17 @@ tests := test-utime test-stat test-stat2 test-lfs tst-getcwd \ + tst-closefrom \ + + tests-time64 := \ ++ tst-fcntl-time64 \ ++ tst-fts-time64 \ + tst-futimens-time64 \ + tst-futimes-time64\ +- tst-fts-time64 \ ++ tst-futimesat-time64 \ ++ tst-lchmod-time64 \ + tst-lutimes-time64 \ + tst-stat-time64 \ +- tst-futimesat-time64 \ + tst-utime-time64 \ + tst-utimensat-time64 \ + tst-utimes-time64 \ +- tst-fcntl-time64 \ + # tests-time64 + + # Likewise for statx, but we do not need static linking here. +@@ -134,6 +135,7 @@ CFLAGS-close.c += -fexceptions -fasynchronous-unwind-tables + + CFLAGS-test-stat.c += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE + CFLAGS-test-lfs.c += -D_LARGEFILE64_SOURCE ++CFLAGS-tst-lchmod.c += -D_FILE_OFFSET_BITS=64 + + test-stat2-ARGS = Makefile . $(objpfx)test-stat2 + +diff --git a/io/tst-lchmod-time64.c b/io/tst-lchmod-time64.c +new file mode 100644 +index 0000000000000000..f2b7cc9d358f2a77 +--- /dev/null ++++ b/io/tst-lchmod-time64.c +@@ -0,0 +1,2 @@ ++#define CHECK_TIME64 ++#include "tst-lchmod.c" +diff --git a/io/tst-lchmod.c b/io/tst-lchmod.c +index 0fe98e01b74b713d..472766b186975922 100644 +--- a/io/tst-lchmod.c ++++ b/io/tst-lchmod.c +@@ -66,10 +66,27 @@ select_path (bool do_relative_path, const char *full_path, const char *relative_ + return full_path; + } + ++static void ++update_file_time_to_y2038 (const char *fname, int flags) ++{ ++#ifdef CHECK_TIME64 ++ /* Y2038 threshold plus 1 second. */ ++ const struct timespec ts[] = { { 0x80000001LL, 0}, { 0x80000001LL } }; ++ TEST_VERIFY_EXIT (utimensat (AT_FDCWD, fname, ts, flags) == 0); ++#endif ++} ++ + static void + test_1 (bool do_relative_path, int (*chmod_func) (int fd, const char *, mode_t, int)) + { + char *tempdir = support_create_temp_directory ("tst-lchmod-"); ++#ifdef CHECK_TIME64 ++ if (!support_path_support_time64 (tempdir)) ++ { ++ puts ("info: test skipped, filesystem does not support 64 bit time_t"); ++ return; ++ } ++#endif + + char *path_dangling = xasprintf ("%s/dangling", tempdir); + char *path_file = xasprintf ("%s/file", tempdir); +@@ -93,9 +110,12 @@ test_1 (bool do_relative_path, int (*chmod_func) (int fd, const char *, mode_t, + xsymlink ("loop", path_loop); + xsymlink ("target-does-not-exist", path_dangling); + ++ update_file_time_to_y2038 (path_file, 0); ++ update_file_time_to_y2038 (path_to_file, AT_SYMLINK_NOFOLLOW); ++ + /* Check that the modes do not collide with what we will use in the + test. */ +- struct stat64 st; ++ struct stat st; + xstat (path_file, &st); + TEST_VERIFY ((st.st_mode & 0777) != 1); + xlstat (path_to_file, &st); +diff --git a/sysdeps/unix/sysv/linux/fchmodat.c b/sysdeps/unix/sysv/linux/fchmodat.c +index 5bd1eb96a5d78130..b0cf61949a9302d9 100644 +--- a/sysdeps/unix/sysv/linux/fchmodat.c ++++ b/sysdeps/unix/sysv/linux/fchmodat.c +@@ -48,8 +48,8 @@ fchmodat (int fd, const char *file, mode_t mode, int flag) + + /* Use fstatat because fstat does not work on O_PATH descriptors + before Linux 3.6. */ +- struct stat64 st; +- if (__fstatat64 (pathfd, "", &st, AT_EMPTY_PATH) != 0) ++ struct __stat64_t64 st; ++ if (__fstatat64_time64 (pathfd, "", &st, AT_EMPTY_PATH) != 0) + { + __close_nocancel (pathfd); + return -1; diff --git a/glibc-upstream-2.34-190.patch b/glibc-upstream-2.34-190.patch new file mode 100644 index 0000000..f21a4bf --- /dev/null +++ b/glibc-upstream-2.34-190.patch @@ -0,0 +1,189 @@ +commit c66c92181ddbd82306537a608e8c0282587131de +Author: DJ Delorie +Date: Wed Mar 30 17:44:02 2022 -0400 + + posix/glob.c: update from gnulib + + Copied from gnulib/lib/glob.c in order to fix rhbz 1982608 + Also fixes swbz 25659 + + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + (cherry picked from commit 7c477b57a31487eda516db02b9e04f22d1a6e6af) + +diff --git a/posix/glob.c b/posix/glob.c +index 593a4c358f3d42e5..6af310a1aa31401a 100644 +--- a/posix/glob.c ++++ b/posix/glob.c +@@ -21,13 +21,14 @@ + optimizes away the pattern == NULL test below. */ + # define _GL_ARG_NONNULL(params) + +-# include ++# include + + #endif + + #include + + #include ++#include + #include + #include + #include +@@ -56,6 +57,8 @@ + # define sysconf(id) __sysconf (id) + # define closedir(dir) __closedir (dir) + # define opendir(name) __opendir (name) ++# undef dirfd ++# define dirfd(str) __dirfd (str) + # define readdir(str) __readdir64 (str) + # define getpwnam_r(name, bufp, buf, len, res) \ + __getpwnam_r (name, bufp, buf, len, res) +@@ -69,11 +72,8 @@ + # ifndef GLOB_LSTAT + # define GLOB_LSTAT gl_lstat + # endif +-# ifndef GLOB_STAT64 +-# define GLOB_STAT64 __stat64 +-# endif +-# ifndef GLOB_LSTAT64 +-# define GLOB_LSTAT64 __lstat64 ++# ifndef GLOB_FSTATAT64 ++# define GLOB_FSTATAT64 __fstatat64 + # endif + # include + #else /* !_LIBC */ +@@ -88,8 +88,7 @@ + # define struct_stat struct stat + # define struct_stat64 struct stat + # define GLOB_LSTAT gl_lstat +-# define GLOB_STAT64 stat +-# define GLOB_LSTAT64 lstat ++# define GLOB_FSTATAT64 fstatat + #endif /* _LIBC */ + + #include +@@ -215,7 +214,8 @@ glob_lstat (glob_t *pglob, int flags, const char *fullname) + } ust; + return (__glibc_unlikely (flags & GLOB_ALTDIRFUNC) + ? pglob->GLOB_LSTAT (fullname, &ust.st) +- : GLOB_LSTAT64 (fullname, &ust.st64)); ++ : GLOB_FSTATAT64 (AT_FDCWD, fullname, &ust.st64, ++ AT_SYMLINK_NOFOLLOW)); + } + + /* Set *R = A + B. Return true if the answer is mathematically +@@ -257,7 +257,8 @@ is_dir (char const *filename, int flags, glob_t const *pglob) + struct_stat64 st64; + return (__glibc_unlikely (flags & GLOB_ALTDIRFUNC) + ? pglob->gl_stat (filename, &st) == 0 && S_ISDIR (st.st_mode) +- : GLOB_STAT64 (filename, &st64) == 0 && S_ISDIR (st64.st_mode)); ++ : (GLOB_FSTATAT64 (AT_FDCWD, filename, &st64, 0) == 0 ++ && S_ISDIR (st64.st_mode))); + } + + /* Find the end of the sub-pattern in a brace expression. */ +@@ -747,6 +748,8 @@ __glob (const char *pattern, int flags, int (*errfunc) (const char *, int), + else + { + #ifndef WINDOWS32 ++ /* Recognize ~user as a shorthand for the specified user's home ++ directory. */ + char *end_name = strchr (dirname, '/'); + char *user_name; + int malloc_user_name = 0; +@@ -885,7 +888,22 @@ __glob (const char *pattern, int flags, int (*errfunc) (const char *, int), + } + scratch_buffer_free (&pwtmpbuf); + } +-#endif /* !WINDOWS32 */ ++#else /* WINDOWS32 */ ++ /* On native Windows, access to a user's home directory ++ (via GetUserProfileDirectory) or to a user's environment ++ variables (via ExpandEnvironmentStringsForUser) requires ++ the credentials of the user. Therefore we cannot support ++ the ~user syntax on this platform. ++ Handling ~user specially (and treat it like plain ~) if ++ user is getenv ("USERNAME") would not be a good idea, ++ since it would make people think that ~user is supported ++ in general. */ ++ if (flags & GLOB_TILDE_CHECK) ++ { ++ retval = GLOB_NOMATCH; ++ goto out; ++ } ++#endif /* WINDOWS32 */ + } + } + +@@ -1266,6 +1284,8 @@ glob_in_dir (const char *pattern, const char *directory, int flags, + { + size_t dirlen = strlen (directory); + void *stream = NULL; ++ struct scratch_buffer s; ++ scratch_buffer_init (&s); + # define GLOBNAMES_MEMBERS(nnames) \ + struct globnames *next; size_t count; char *name[nnames]; + struct globnames { GLOBNAMES_MEMBERS (FLEXIBLE_ARRAY_MEMBER) }; +@@ -1337,6 +1357,7 @@ glob_in_dir (const char *pattern, const char *directory, int flags, + } + else + { ++ int dfd = dirfd (stream); + int fnm_flags = ((!(flags & GLOB_PERIOD) ? FNM_PERIOD : 0) + | ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0)); + flags |= GLOB_MAGCHAR; +@@ -1364,8 +1385,32 @@ glob_in_dir (const char *pattern, const char *directory, int flags, + if (flags & GLOB_ONLYDIR) + switch (readdir_result_type (d)) + { +- case DT_DIR: case DT_LNK: case DT_UNKNOWN: break; + default: continue; ++ case DT_DIR: break; ++ case DT_LNK: case DT_UNKNOWN: ++ /* The filesystem was too lazy to give us a hint, ++ so we have to do it the hard way. */ ++ if (__glibc_unlikely (dfd < 0 || flags & GLOB_ALTDIRFUNC)) ++ { ++ size_t namelen = strlen (d.name); ++ size_t need = dirlen + 1 + namelen + 1; ++ if (s.length < need ++ && !scratch_buffer_set_array_size (&s, need, 1)) ++ goto memory_error; ++ char *p = mempcpy (s.data, directory, dirlen); ++ *p = '/'; ++ p += p[-1] != '/'; ++ memcpy (p, d.name, namelen + 1); ++ if (! is_dir (s.data, flags, pglob)) ++ continue; ++ } ++ else ++ { ++ struct_stat64 st64; ++ if (! (GLOB_FSTATAT64 (dfd, d.name, &st64, 0) == 0 ++ && S_ISDIR (st64.st_mode))) ++ continue; ++ } + } + + if (fnmatch (pattern, d.name, fnm_flags) == 0) +@@ -1497,5 +1542,6 @@ glob_in_dir (const char *pattern, const char *directory, int flags, + __set_errno (save); + } + ++ scratch_buffer_free (&s); + return result; + } +diff --git a/sysdeps/unix/sysv/linux/glob64-time64.c b/sysdeps/unix/sysv/linux/glob64-time64.c +index a465f70905e5a8a3..95efe4c4f4624967 100644 +--- a/sysdeps/unix/sysv/linux/glob64-time64.c ++++ b/sysdeps/unix/sysv/linux/glob64-time64.c +@@ -37,6 +37,7 @@ + # define GLOB_LSTAT gl_lstat + # define GLOB_STAT64 __stat64_time64 + # define GLOB_LSTAT64 __lstat64_time64 ++# define GLOB_FSTATAT64 __fstatat64_time64 + + # define COMPILE_GLOB64 1 + diff --git a/glibc.spec b/glibc.spec index 1db258f..61f2ecc 100644 --- a/glibc.spec +++ b/glibc.spec @@ -148,7 +148,7 @@ end \ Summary: The GNU libc libraries Name: glibc Version: %{glibcversion} -Release: 29%{?dist} +Release: 32%{?dist} # In general, GPLv2+ is used by programs, LGPLv2+ is used for # libraries. @@ -379,17 +379,17 @@ Patch175: glibc-rh2058224-2.patch Patch176: glibc-rh2058230.patch Patch177: glibc-rh2054789.patch Patch178: glibc-upstream-2.34-108.patch -Patch179: glibc-upstream-2.34-110.patch # glibc-2.34-109-gd64b08d5ba only changes NEWS. +Patch179: glibc-upstream-2.34-110.patch Patch180: glibc-upstream-2.34-111.patch Patch181: glibc-upstream-2.34-112.patch Patch182: glibc-upstream-2.34-113.patch Patch183: glibc-upstream-2.34-114.patch +# glibc-2.34-115-gd5d1c95aaf only changes NEWS. +# glibc-2.34-116-g852361b5a3 is glibc-rh2054789.patch. Patch184: glibc-upstream-2.34-117.patch Patch185: glibc-upstream-2.34-118.patch Patch186: glibc-upstream-2.34-119.patch -# glibc-2.34-115-gd5d1c95aaf only changes NEWS. -# glibc-2.34-116-g852361b5a3 is glibc-rh2054789.patch. Patch187: glibc-upstream-2.34-120.patch Patch188: glibc-upstream-2.34-121.patch Patch189: glibc-upstream-2.34-122.patch @@ -403,6 +403,64 @@ Patch196: glibc-upstream-2.34-129.patch Patch197: glibc-upstream-2.34-130.patch Patch198: glibc-upstream-2.34-131.patch Patch199: glibc-upstream-2.34-132.patch +Patch200: glibc-upstream-2.34-133.patch +Patch201: glibc-upstream-2.34-134.patch +Patch202: glibc-upstream-2.34-135.patch +Patch203: glibc-upstream-2.34-136.patch +Patch204: glibc-upstream-2.34-137.patch +Patch205: glibc-upstream-2.34-138.patch +Patch206: glibc-upstream-2.34-139.patch +Patch207: glibc-upstream-2.34-140.patch +Patch208: glibc-upstream-2.34-141.patch +Patch209: glibc-upstream-2.34-142.patch +Patch210: glibc-upstream-2.34-143.patch +Patch211: glibc-upstream-2.34-144.patch +Patch212: glibc-upstream-2.34-145.patch +Patch213: glibc-upstream-2.34-146.patch +Patch214: glibc-upstream-2.34-147.patch +Patch215: glibc-upstream-2.34-148.patch +Patch216: glibc-upstream-2.34-149.patch +Patch217: glibc-upstream-2.34-150.patch +Patch218: glibc-upstream-2.34-151.patch +Patch219: glibc-upstream-2.34-152.patch +Patch220: glibc-upstream-2.34-153.patch +Patch221: glibc-upstream-2.34-154.patch +Patch222: glibc-upstream-2.34-155.patch +Patch223: glibc-upstream-2.34-156.patch +Patch224: glibc-upstream-2.34-157.patch +Patch225: glibc-upstream-2.34-158.patch +Patch226: glibc-upstream-2.34-159.patch +Patch227: glibc-upstream-2.34-160.patch +# glibc-2.34-161-gceed89d089 only changes NEWS. +Patch228: glibc-upstream-2.34-162.patch +Patch229: glibc-upstream-2.34-163.patch +Patch230: glibc-upstream-2.34-164.patch +Patch231: glibc-upstream-2.34-165.patch +Patch232: glibc-upstream-2.34-166.patch +Patch233: glibc-upstream-2.34-167.patch +Patch234: glibc-upstream-2.34-168.patch +Patch235: glibc-upstream-2.34-169.patch +Patch236: glibc-upstream-2.34-170.patch +Patch237: glibc-upstream-2.34-171.patch +Patch238: glibc-upstream-2.34-172.patch +Patch239: glibc-upstream-2.34-173.patch +Patch240: glibc-upstream-2.34-174.patch +Patch241: glibc-upstream-2.34-175.patch +Patch242: glibc-upstream-2.34-176.patch +Patch243: glibc-upstream-2.34-177.patch +Patch244: glibc-upstream-2.34-178.patch +Patch245: glibc-upstream-2.34-179.patch +Patch246: glibc-upstream-2.34-180.patch +Patch247: glibc-upstream-2.34-181.patch +Patch248: glibc-upstream-2.34-182.patch +Patch249: glibc-upstream-2.34-183.patch +Patch250: glibc-upstream-2.34-184.patch +Patch251: glibc-upstream-2.34-185.patch +Patch252: glibc-upstream-2.34-186.patch +Patch253: glibc-upstream-2.34-187.patch +Patch254: glibc-upstream-2.34-188.patch +Patch255: glibc-upstream-2.34-189.patch +Patch256: glibc-upstream-2.34-190.patch ############################################################################## # Continued list of core "glibc" package information: @@ -2459,6 +2517,76 @@ fi %files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared %changelog +* Thu Apr 28 2022 Carlos O'Donell - 2.34-32 +- Sync with upstream branch release/2.34/master, + commit c66c92181ddbd82306537a608e8c0282587131de: +- posix/glob.c: update from gnulib (BZ#25659) +- linux: Fix fchmodat with AT_SYMLINK_NOFOLLOW for 64 bit time_t (BZ#29097) + +* Wed Apr 27 2022 Carlos O'Donell - 2.34-31 +- Sync with upstream branch release/2.34/master, + commit 55640ed3fde48360a8e8083be4843bd2dc7cecfe: +- i386: Regenerate ulps +- linux: Fix missing internal 64 bit time_t stat usage +- x86: Optimize L(less_vec) case in memcmp-evex-movbe.S +- x86: Don't set Prefer_No_AVX512 for processors with AVX512 and AVX-VNNI +- x86-64: Use notl in EVEX strcmp [BZ #28646] +- x86: Shrink memcmp-sse4.S code size +- x86: Double size of ERMS rep_movsb_threshold in dl-cacheinfo.h +- x86: Optimize memmove-vec-unaligned-erms.S +- x86-64: Replace movzx with movzbl +- x86-64: Remove Prefer_AVX2_STRCMP +- x86-64: Improve EVEX strcmp with masked load +- x86: Replace sse2 instructions with avx in memcmp-evex-movbe.S +- x86: Optimize memset-vec-unaligned-erms.S +- x86: Optimize memcmp-evex-movbe.S for frontend behavior and size +- x86: Modify ENTRY in sysdep.h so that p2align can be specified +- x86-64: Optimize load of all bits set into ZMM register [BZ #28252] +- scripts/glibcelf.py: Mark as UNSUPPORTED on Python 3.5 and earlier +- dlfcn: Do not use rtld_active () to determine ld.so state (bug 29078) +- INSTALL: Rephrase -with-default-link documentation +- misc: Fix rare fortify crash on wchar funcs. [BZ 29030] +- Default to --with-default-link=no (bug 25812) +- scripts: Add glibcelf.py module + +* Thu Apr 21 2022 Carlos O'Donell - 2.34-30 +- Sync with upstream branch release/2.34/master, + commit 71326f1f2fd09dafb9c34404765fb88129e94237: +- nptl: Fix pthread_cancel cancelhandling atomic operations +- mips: Fix mips64n32 64 bit time_t stat support (BZ#29069) +- hurd: Fix arbitrary error code +- nptl: Handle spurious EINTR when thread cancellation is disabled (BZ#29029) +- S390: Add new s390 platform z16. +- NEWS: Update fixed bug list for LD_AUDIT backports. +- hppa: Fix bind-now audit (BZ #28857) +- elf: Replace tst-audit24bmod2.so with tst-audit24bmod2 +- Fix elf/tst-audit25a with default bind now toolchains +- elf: Fix runtime linker auditing on aarch64 (BZ #26643) +- elf: Issue la_symbind for bind-now (BZ #23734) +- elf: Fix initial-exec TLS access on audit modules (BZ #28096) +- elf: Add la_activity during application exit +- elf: Do not fail for failed dlmopen on audit modules (BZ #28061) +- elf: Issue audit la_objopen for vDSO +- elf: Add audit tests for modules with TLSDESC +- elf: Avoid unnecessary slowdown from profiling with audit (BZ#15533) +- elf: Add _dl_audit_pltexit +- elf: Add _dl_audit_pltenter +- elf: Add _dl_audit_preinit +- elf: Add _dl_audit_symbind_alt and _dl_audit_symbind +- elf: Add _dl_audit_objclose +- elf: Add _dl_audit_objsearch +- elf: Add _dl_audit_activity_map and _dl_audit_activity_nsid +- elf: Add _dl_audit_objopen +- elf: Move la_activity (LA_ACT_ADD) after _dl_add_to_namespace_list() (BZ #28062) +- elf: Move LAV_CURRENT to link_lavcurrent.h +- elf: Fix elf_get_dynamic_info() for bootstrap +- elf: Fix dynamic-link.h usage on rtld.c +- elf: Fix elf_get_dynamic_info definition +- elf: Avoid nested functions in the loader [BZ #27220] +- powerpc: Delete unneeded ELF_MACHINE_BEFORE_RTLD_RELOC +- hppa: Use END instead of PSEUDO_END in swapcontext.S +- hppa: Implement swapcontext in assembler (bug 28960) + * Tue Mar 15 2022 Florian Weimer - 2.34-29 - Sync with upstream branch release/2.34/master, commit 224d8c1890b6c57c7e4e8ddbb792dd9552086704: