import glibc-2.28-221.el8

This commit is contained in:
CentOS Sources 2022-11-26 10:10:28 +00:00 committed by Stepan Oksanichenko
parent cc1cae06b4
commit d7b7c4067c
13 changed files with 3571 additions and 1 deletions

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,79 @@
commit dbb75513f5cf9285c77c9e55777c5c35b653f890
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Sep 6 07:38:10 2022 +0200
elf: Rename _dl_sort_maps parameter from skip to force_first
The new implementation will not be able to skip an arbitrary number
of objects.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c
index 99354dc08a010dd3..7a586749adc3fa7d 100644
--- a/elf/dl-sort-maps.c
+++ b/elf/dl-sort-maps.c
@@ -27,12 +27,12 @@
If FOR_FINI is true, this is called for finishing an object. */
static void
_dl_sort_maps_original (struct link_map **maps, unsigned int nmaps,
- unsigned int skip, bool for_fini)
+ bool force_first, bool for_fini)
{
/* Allows caller to do the common optimization of skipping the first map,
usually the main binary. */
- maps += skip;
- nmaps -= skip;
+ maps += force_first;
+ nmaps -= force_first;
/* A list of one element need not be sorted. */
if (nmaps <= 1)
@@ -182,7 +182,7 @@ dfs_traversal (struct link_map ***rpo, struct link_map *map,
static void
_dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps,
- unsigned int skip __attribute__ ((unused)), bool for_fini)
+ bool force_first __attribute__ ((unused)), bool for_fini)
{
for (int i = nmaps - 1; i >= 0; i--)
maps[i]->l_visited = 0;
@@ -286,7 +286,7 @@ _dl_sort_maps_init (void)
void
_dl_sort_maps (struct link_map **maps, unsigned int nmaps,
- unsigned int skip, bool for_fini)
+ bool force_first, bool for_fini)
{
/* It can be tempting to use a static function pointer to store and call
the current selected sorting algorithm routine, but experimentation
@@ -296,9 +296,9 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps,
input cases. A simple if-case with direct function calls appears to
be the fastest. */
if (__glibc_likely (GLRO(dl_dso_sort_algo) == dso_sort_algorithm_original))
- _dl_sort_maps_original (maps, nmaps, skip, for_fini);
+ _dl_sort_maps_original (maps, nmaps, force_first, for_fini);
else
- _dl_sort_maps_dfs (maps, nmaps, skip, for_fini);
+ _dl_sort_maps_dfs (maps, nmaps, force_first, for_fini);
}
#endif /* HAVE_TUNABLES. */
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 9f09a4a280396659..2c1b4c47c6a6c643 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -1056,9 +1056,11 @@ extern void _dl_init (struct link_map *main_map, int argc, char **argv,
initializer functions have completed. */
extern void _dl_fini (void) attribute_hidden;
-/* Sort array MAPS according to dependencies of the contained objects. */
+/* Sort array MAPS according to dependencies of the contained objects.
+ If FORCE_FIRST, MAPS[0] keeps its place even if the dependencies
+ say otherwise. */
extern void _dl_sort_maps (struct link_map **maps, unsigned int nmaps,
- unsigned int skip, bool for_fini) attribute_hidden;
+ bool force_first, bool for_fini) attribute_hidden;
/* The dynamic linker calls this function before and having changing
any shared object mappings. The `r_state' member of `struct r_debug'

View File

@ -0,0 +1,90 @@
commit 1df71d32fe5f5905ffd5d100e5e9ca8ad6210891
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Sep 20 11:00:42 2022 +0200
elf: Implement force_first handling in _dl_sort_maps_dfs (bug 28937)
The implementation in _dl_close_worker requires that the first
element of l_initfini is always this very map (“We are always the
zeroth entry, and since we don't include ourselves in the
dependency analysis start at 1.”). Rather than fixing that
assumption, this commit adds an implementation of the force_first
argument to the new dependency sorting algorithm. This also means
that the directly dlopen'ed shared object is always initialized last,
which is the least surprising behavior in the presence of cycles.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c
index 7a586749adc3fa7d..6f5c17b47b98fbc7 100644
--- a/elf/dl-sort-maps.c
+++ b/elf/dl-sort-maps.c
@@ -182,8 +182,9 @@ dfs_traversal (struct link_map ***rpo, struct link_map *map,
static void
_dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps,
- bool force_first __attribute__ ((unused)), bool for_fini)
+ bool force_first, bool for_fini)
{
+ struct link_map *first_map = maps[0];
for (int i = nmaps - 1; i >= 0; i--)
maps[i]->l_visited = 0;
@@ -208,14 +209,6 @@ _dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps,
Adjusting the order so that maps[0] is last traversed naturally avoids
this problem.
- Further, the old "optimization" of skipping the main object at maps[0]
- from the call-site (i.e. _dl_sort_maps(maps+1,nmaps-1)) is in general
- no longer valid, since traversing along object dependency-links
- may "find" the main object even when it is not included in the initial
- order (e.g. a dlopen()'ed shared object can have circular dependencies
- linked back to itself). In such a case, traversing N-1 objects will
- create a N-object result, and raise problems.
-
To summarize, just passing in the full list, and iterating from back
to front makes things much more straightforward. */
@@ -274,6 +267,27 @@ _dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps,
}
memcpy (maps, rpo, sizeof (struct link_map *) * nmaps);
+
+ /* Skipping the first object at maps[0] is not valid in general,
+ since traversing along object dependency-links may "find" that
+ first object even when it is not included in the initial order
+ (e.g., a dlopen'ed shared object can have circular dependencies
+ linked back to itself). In such a case, traversing N-1 objects
+ will create a N-object result, and raise problems. Instead,
+ force the object back into first place after sorting. This naive
+ approach may introduce further dependency ordering violations
+ compared to rotating the cycle until the first map is again in
+ the first position, but as there is a cycle, at least one
+ violation is already present. */
+ if (force_first && maps[0] != first_map)
+ {
+ int i;
+ for (i = 0; maps[i] != first_map; ++i)
+ ;
+ assert (i < nmaps);
+ memmove (&maps[1], maps, i * sizeof (maps[0]));
+ maps[0] = first_map;
+ }
}
void
diff --git a/elf/dso-sort-tests-1.def b/elf/dso-sort-tests-1.def
index 5f7f18ef270bc12d..4bf9052db16fb352 100644
--- a/elf/dso-sort-tests-1.def
+++ b/elf/dso-sort-tests-1.def
@@ -64,3 +64,10 @@ output: b>a>{}<a<b
tst-bz15311: {+a;+e;+f;+g;+d;%d;-d;-g;-f;-e;-a};a->b->c->d;d=>[ba];c=>a;b=>e=>a;c=>f=>b;d=>g=>c
output(glibc.rtld.dynamic_sort=1): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[<a<c<d<g<f<b<e];}
output(glibc.rtld.dynamic_sort=2): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[<g<f<a<b<c<d<e];}
+
+# Test that even in the presence of dependency loops involving dlopen'ed
+# object, that object is initialized last (and not unloaded prematurely).
+# Final destructor order is indeterminate due to the cycle.
+tst-bz28937: {+a;+b;-b;+c;%c};a->a1;a->a2;a2->a;b->b1;c->a1;c=>a1
+output(glibc.rtld.dynamic_sort=1): {+a[a2>a1>a>];+b[b1>b>];-b[<b<b1];+c[c>];%c(a1());}<a<a2<c<a1
+output(glibc.rtld.dynamic_sort=2): {+a[a2>a1>a>];+b[b1>b>];-b[<b<b1];+c[c>];%c(a1());}<a2<a<c<a1

View File

@ -0,0 +1,35 @@
Downstream-specific patch to link DSO sorting tests with -ldl
if needed. Upstream does not need this because <dlfcn.h> interfaces
are part of libc.
diff --git a/scripts/dso-ordering-test.py b/scripts/dso-ordering-test.py
index 43b5ec4d920ad6a3..ae85e0f4a6ae5b3e 100644
--- a/scripts/dso-ordering-test.py
+++ b/scripts/dso-ordering-test.py
@@ -657,6 +657,8 @@ def process_testcase(t):
% (test_name + "-" + dep + ".FAKE.so",
("$(objpfx)" + test_subdir + "/"
+ test_name + "-" + dep + ".so")))
+ makefile.write(
+ "LDLIBS-%s += -Wl,--as-needed -ldl -Wl,--no-as-needed\n" % dso)
rule = ("$(objpfx)" + test_subdir + "/"
+ test_name + "-" + dep + ".FAKE.os: "
"$(objpfx)" + test_srcdir
@@ -685,6 +687,8 @@ def process_testcase(t):
+ test_descr.soname_map[o] + ".so")
ldflags += (" -Wl,-soname=" + soname)
makefile.write("LDFLAGS-%s = %s\n" % (dso, ldflags))
+ makefile.write(
+ "LDLIBS-%s += -Wl,--as-needed -ldl -Wl,--no-as-needed\n" % dso)
if o in test_descr.callrefs:
makefile.write("%s-no-z-defs = yes\n" % (dso))
@@ -702,6 +706,8 @@ def process_testcase(t):
+ test_descr.soname_map['#'] + ".so")
ldflags += (" -Wl,-soname=" + soname)
makefile.write("LDFLAGS-%s = %s\n" % (test_name, ldflags))
+ makefile.write(
+ "LDLIBS-%s += -Wl,--as-needed -ldl -Wl,--no-as-needed\n" % test_name)
rule = ("$(objpfx)" + test_subdir + "/" + test_name + ".o: "
"$(objpfx)" + test_srcdir + test_name + ".c\n"
"\t$(compile.c) $(OUTPUT_OPTION)\n")

View File

@ -0,0 +1,189 @@
commit b4bbedb1e75737a80bcc3d53d6eef1fbe0b5f4d5
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sat Nov 6 14:13:27 2021 -0700
dso-ordering-test.py: Put all sources in one directory [BZ #28550]
Put all sources for DSO sorting tests in the dso-sort-tests-src directory
and compile test relocatable objects with
$(objpfx)tst-dso-ordering1-dir/tst-dso-ordering1-a.os: $(objpfx)dso-sort-tests-src/tst-dso-ordering1-a.c
$(compile.c) $(OUTPUT_OPTION)
to avoid random $< values from $(before-compile) when compiling test
relocatable objects with
$(objpfx)%$o: $(objpfx)%.c $(before-compile); $$(compile-command.c)
compile-command.c = $(compile.c) $(OUTPUT_OPTION) $(compile-mkdep-flags)
compile.c = $(CC) $< -c $(CFLAGS) $(CPPFLAGS)
for 3 "make -j 28" parallel builds on a machine with 112 cores at the
same time.
This partially fixes BZ #28550.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/scripts/dso-ordering-test.py b/scripts/dso-ordering-test.py
index 944ee740527d60fd..bde0406be9da14fc 100644
--- a/scripts/dso-ordering-test.py
+++ b/scripts/dso-ordering-test.py
@@ -526,9 +526,13 @@ def process_testcase(t):
base_test_name = t.test_name
test_subdir = base_test_name + "-dir"
testpfx = objpfx + test_subdir + "/"
+ test_srcdir = "dso-sort-tests-src/"
+ testpfx_src = objpfx + test_srcdir
if not os.path.exists(testpfx):
os.mkdir(testpfx)
+ if not os.path.exists(testpfx_src):
+ os.mkdir(testpfx_src)
def find_objs_not_depended_on(t):
objs_not_depended_on = []
@@ -595,6 +599,11 @@ def process_testcase(t):
# Print out needed Makefile fragments for use in glibc/elf/Makefile.
module_names = ""
for o in test_descr.objs:
+ rule = ("$(objpfx)" + test_subdir + "/" + test_name
+ + "-" + o + ".os: $(objpfx)" + test_srcdir
+ + test_name + "-" + o + ".c\n"
+ "\t$(compile.c) $(OUTPUT_OPTION)\n")
+ makefile.write (rule)
module_names += " " + test_subdir + "/" + test_name + "-" + o
makefile.write("modules-names +=%s\n" % (module_names))
@@ -637,7 +646,7 @@ def process_testcase(t):
# object. This only needs to be done at most once for
# an object name.
if not dep in fake_created:
- f = open(testpfx + test_name + "-" + dep
+ f = open(testpfx_src + test_name + "-" + dep
+ ".FAKE.c", "w")
f.write(" \n")
f.close()
@@ -648,6 +657,12 @@ def process_testcase(t):
% (test_name + "-" + dep + ".FAKE.so",
("$(objpfx)" + test_subdir + "/"
+ test_name + "-" + dep + ".so")))
+ rule = ("$(objpfx)" + test_subdir + "/"
+ + test_name + "-" + dep + ".FAKE.os: "
+ "$(objpfx)" + test_srcdir
+ + test_name + "-" + dep + ".FAKE.c\n"
+ "\t$(compile.c) $(OUTPUT_OPTION)\n")
+ makefile.write (rule)
makefile.write \
("modules-names += %s\n"
% (test_subdir + "/"
@@ -687,6 +702,10 @@ def process_testcase(t):
+ test_descr.soname_map['#'] + ".so")
ldflags += (" -Wl,-soname=" + soname)
makefile.write("LDFLAGS-%s = %s\n" % (test_name, ldflags))
+ rule = ("$(objpfx)" + test_subdir + "/" + test_name + ".o: "
+ "$(objpfx)" + test_srcdir + test_name + ".c\n"
+ "\t$(compile.c) $(OUTPUT_OPTION)\n")
+ makefile.write (rule)
not_depended_objs = find_objs_not_depended_on(test_descr)
if not_depended_objs:
@@ -745,7 +764,7 @@ def process_testcase(t):
" something_failed=true\n"
"else\n"
" diff -wu ${common_objpfx}elf/%s/%s%s.output \\\n"
- " ${common_objpfx}elf/%s/%s%s.exp\n"
+ " ${common_objpfx}elf/%s%s%s.exp\n"
" if [ $? -ne 0 ]; then\n"
" echo '%sFAIL: %s%s expected output comparison'\n"
" something_failed=true\n"
@@ -753,14 +772,14 @@ def process_testcase(t):
"fi\n"
% (("X" if xfail else ""), test_name, tunable_descr,
test_subdir, test_name, tunable_sfx,
- test_subdir, base_test_name, exp_tunable_sfx,
+ test_srcdir, base_test_name, exp_tunable_sfx,
("X" if xfail else ""), test_name, tunable_descr))
# Generate C files according to dependency and calling relations from
# description string.
for obj in test_descr.objs:
src_name = test_name + "-" + obj + ".c"
- f = open(testpfx + src_name, "w")
+ f = open(testpfx_src + src_name, "w")
if obj in test_descr.callrefs:
called_objs = test_descr.callrefs[obj]
for callee in called_objs:
@@ -804,7 +823,7 @@ def process_testcase(t):
f.close()
# Open C file for writing main program
- f = open(testpfx + test_name + ".c", "w")
+ f = open(testpfx_src + test_name + ".c", "w")
# if there are some operations in main(), it means we need -ldl
f.write("#include <stdio.h>\n")
@@ -885,7 +904,7 @@ def process_testcase(t):
for obj in test_descr.objs:
src_name = test_name + "-" + obj + ".c"
obj_name = test_name + "-" + obj + ".os"
- run_cmd([build_gcc, "-c", "-fPIC", testpfx + src_name,
+ run_cmd([build_gcc, "-c", "-fPIC", testpfx_src + src_name,
"-o", testpfx + obj_name])
obj_processed = {}
@@ -903,10 +922,12 @@ def process_testcase(t):
deps.append(dep + ".FAKE")
if not dep in fake_created:
base_name = testpfx + test_name + "-" + dep
+ src_base_name = (testpfx_src + test_name
+ + "-" + dep)
cmd = [build_gcc, "-Wl,--no-as-needed",
("-Wl,-soname=" + base_name + ".so"),
"-shared", base_name + ".FAKE.c",
- "-o", base_name + ".FAKE.so"]
+ "-o", src_base_name + ".FAKE.so"]
run_cmd(cmd)
fake_created[dep] = True
dso_deps = map(lambda d: testpfx + test_name + "-" + d + ".so",
@@ -932,7 +953,7 @@ def process_testcase(t):
main_deps = map(lambda d: testpfx + test_name + "-" + d + ".so",
deps)
cmd = [build_gcc, "-Wl,--no-as-needed", "-o", testpfx + test_name,
- testpfx + test_name + ".c", "-L%s" % (os.getcwd()),
+ testpfx_src + test_name + ".c", "-L%s" % (os.getcwd()),
"-Wl,-rpath-link=%s" % (os.getcwd())]
if '#' in test_descr.soname_map:
soname = ("-Wl,-soname=" + testpfx + test_name + "-"
@@ -987,14 +1008,14 @@ def process_testcase(t):
sfx = ""
if r[0] != "":
sfx = "-" + r[0].replace("=","_")
- f = open(testpfx + t.test_name + sfx + ".exp", "w")
+ f = open(testpfx_src + t.test_name + sfx + ".exp", "w")
(output, xfail) = r[1]
f.write('%s' % output)
f.close()
# Create header part of top-level testcase shell script, to wrap execution
# and output comparison together.
- t.sh = open(testpfx + t.test_name + ".sh", "w")
+ t.sh = open(testpfx_src + t.test_name + ".sh", "w")
t.sh.write("#!/bin/sh\n")
t.sh.write("# Test driver for %s, generated by "
"dso-ordering-test.py\n" % (t.test_name))
@@ -1022,12 +1043,12 @@ def process_testcase(t):
sfx = ""
if r[0] != "":
sfx = "-" + r[0].replace("=","_")
- expected_output_files += " $(objpfx)%s/%s%s.exp" % (test_subdir,
+ expected_output_files += " $(objpfx)%s%s%s.exp" % (test_srcdir,
t.test_name, sfx)
makefile.write \
- ("$(objpfx)%s.out: $(objpfx)%s/%s.sh%s "
+ ("$(objpfx)%s.out: $(objpfx)%s%s.sh%s "
"$(common-objpfx)support/test-run-command\n"
- % (t.test_name, test_subdir, t.test_name,
+ % (t.test_name, test_srcdir, t.test_name,
expected_output_files))
makefile.write("\t$(SHELL) $< $(common-objpfx) '$(test-wrapper-env)' "
"'$(run-program-env)' > $@; $(evaluate-test)\n")

View File

@ -0,0 +1,589 @@
commit 15a0c5730d1d5aeb95f50c9ec7470640084feae8
Author: Chung-Lin Tang <cltang@codesourcery.com>
Date: Thu Oct 21 21:41:22 2021 +0800
elf: Fix slow DSO sorting behavior in dynamic loader (BZ #17645)
This second patch contains the actual implementation of a new sorting algorithm
for shared objects in the dynamic loader, which solves the slow behavior that
the current "old" algorithm falls into when the DSO set contains circular
dependencies.
The new algorithm implemented here is simply depth-first search (DFS) to obtain
the Reverse-Post Order (RPO) sequence, a topological sort. A new l_visited:1
bitfield is added to struct link_map to more elegantly facilitate such a search.
The DFS algorithm is applied to the input maps[nmap-1] backwards towards
maps[0]. This has the effect of a more "shallow" recursion depth in general
since the input is in BFS. Also, when combined with the natural order of
processing l_initfini[] at each node, this creates a resulting output sorting
closer to the intuitive "left-to-right" order in most cases.
Another notable implementation adjustment related to this _dl_sort_maps change
is the removing of two char arrays 'used' and 'done' in _dl_close_worker to
represent two per-map attributes. This has been changed to simply use two new
bit-fields l_map_used:1, l_map_done:1 added to struct link_map. This also allows
discarding the clunky 'used' array sorting that _dl_sort_maps had to sometimes
do along the way.
Tunable support for switching between different sorting algorithms at runtime is
also added. A new tunable 'glibc.rtld.dynamic_sort' with current valid values 1
(old algorithm) and 2 (new DFS algorithm) has been added. At time of commit
of this patch, the default setting is 1 (old algorithm).
Signed-off-by: Chung-Lin Tang <cltang@codesourcery.com>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Conflicts:
elf/dl-tunables.list
(No mem.tagging tunable downstream.)
diff --git a/elf/dl-close.c b/elf/dl-close.c
index 74ca9a85dd309780..22225efb3226c3e1 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -167,8 +167,6 @@ _dl_close_worker (struct link_map *map, bool force)
bool any_tls = false;
const unsigned int nloaded = ns->_ns_nloaded;
- char used[nloaded];
- char done[nloaded];
struct link_map *maps[nloaded];
/* Run over the list and assign indexes to the link maps and enter
@@ -176,24 +174,21 @@ _dl_close_worker (struct link_map *map, bool force)
int idx = 0;
for (struct link_map *l = ns->_ns_loaded; l != NULL; l = l->l_next)
{
+ l->l_map_used = 0;
+ l->l_map_done = 0;
l->l_idx = idx;
maps[idx] = l;
++idx;
-
}
assert (idx == nloaded);
- /* Prepare the bitmaps. */
- memset (used, '\0', sizeof (used));
- memset (done, '\0', sizeof (done));
-
/* Keep track of the lowest index link map we have covered already. */
int done_index = -1;
while (++done_index < nloaded)
{
struct link_map *l = maps[done_index];
- if (done[done_index])
+ if (l->l_map_done)
/* Already handled. */
continue;
@@ -204,12 +199,12 @@ _dl_close_worker (struct link_map *map, bool force)
/* See CONCURRENCY NOTES in cxa_thread_atexit_impl.c to know why
acquire is sufficient and correct. */
&& atomic_load_acquire (&l->l_tls_dtor_count) == 0
- && !used[done_index])
+ && !l->l_map_used)
continue;
/* We need this object and we handle it now. */
- done[done_index] = 1;
- used[done_index] = 1;
+ l->l_map_used = 1;
+ l->l_map_done = 1;
/* Signal the object is still needed. */
l->l_idx = IDX_STILL_USED;
@@ -225,9 +220,9 @@ _dl_close_worker (struct link_map *map, bool force)
{
assert ((*lp)->l_idx >= 0 && (*lp)->l_idx < nloaded);
- if (!used[(*lp)->l_idx])
+ if (!(*lp)->l_map_used)
{
- used[(*lp)->l_idx] = 1;
+ (*lp)->l_map_used = 1;
/* If we marked a new object as used, and we've
already processed it, then we need to go back
and process again from that point forward to
@@ -250,9 +245,9 @@ _dl_close_worker (struct link_map *map, bool force)
{
assert (jmap->l_idx >= 0 && jmap->l_idx < nloaded);
- if (!used[jmap->l_idx])
+ if (!jmap->l_map_used)
{
- used[jmap->l_idx] = 1;
+ jmap->l_map_used = 1;
if (jmap->l_idx - 1 < done_index)
done_index = jmap->l_idx - 1;
}
@@ -262,8 +257,7 @@ _dl_close_worker (struct link_map *map, bool force)
/* Sort the entries. We can skip looking for the binary itself which is
at the front of the search list for the main namespace. */
- _dl_sort_maps (maps + (nsid == LM_ID_BASE), nloaded - (nsid == LM_ID_BASE),
- used + (nsid == LM_ID_BASE), true);
+ _dl_sort_maps (maps, nloaded, (nsid == LM_ID_BASE), true);
/* Call all termination functions at once. */
bool unload_any = false;
@@ -277,7 +271,7 @@ _dl_close_worker (struct link_map *map, bool force)
/* All elements must be in the same namespace. */
assert (imap->l_ns == nsid);
- if (!used[i])
+ if (!imap->l_map_used)
{
assert (imap->l_type == lt_loaded && !imap->l_nodelete_active);
@@ -315,7 +309,7 @@ _dl_close_worker (struct link_map *map, bool force)
if (i < first_loaded)
first_loaded = i;
}
- /* Else used[i]. */
+ /* Else imap->l_map_used. */
else if (imap->l_type == lt_loaded)
{
struct r_scope_elem *new_list = NULL;
@@ -524,7 +518,7 @@ _dl_close_worker (struct link_map *map, bool force)
for (unsigned int i = first_loaded; i < nloaded; ++i)
{
struct link_map *imap = maps[i];
- if (!used[i])
+ if (!imap->l_map_used)
{
assert (imap->l_type == lt_loaded);
diff --git a/elf/dl-deps.c b/elf/dl-deps.c
index 007069f670eced95..9365d54c8e03e5f4 100644
--- a/elf/dl-deps.c
+++ b/elf/dl-deps.c
@@ -612,10 +612,9 @@ Filters not supported with LD_TRACE_PRELINKING"));
/* If libc.so.6 is the main map, it participates in the sort, so
that the relocation order is correct regarding libc.so.6. */
- if (l_initfini[0] == GL (dl_ns)[l_initfini[0]->l_ns].libc_map)
- _dl_sort_maps (l_initfini, nlist, NULL, false);
- else
- _dl_sort_maps (&l_initfini[1], nlist - 1, NULL, false);
+ _dl_sort_maps (l_initfini, nlist,
+ (l_initfini[0] != GL (dl_ns)[l_initfini[0]->l_ns].libc_map),
+ false);
/* Terminate the list of dependencies. */
l_initfini[nlist] = NULL;
diff --git a/elf/dl-fini.c b/elf/dl-fini.c
index eea9d8aad736a99e..e14259a3c8806e0d 100644
--- a/elf/dl-fini.c
+++ b/elf/dl-fini.c
@@ -95,8 +95,7 @@ _dl_fini (void)
/* Now we have to do the sorting. We can skip looking for the
binary itself which is at the front of the search list for
the main namespace. */
- _dl_sort_maps (maps + (ns == LM_ID_BASE), nmaps - (ns == LM_ID_BASE),
- NULL, true);
+ _dl_sort_maps (maps, nmaps, (ns == LM_ID_BASE), true);
/* We do not rely on the linked list of loaded object anymore
from this point on. We have our own list here (maps). The
diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c
index b2a01ede627be1e9..398a08f28c4d9ff1 100644
--- a/elf/dl-sort-maps.c
+++ b/elf/dl-sort-maps.c
@@ -16,16 +16,24 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <assert.h>
#include <ldsodefs.h>
+#include <elf/dl-tunables.h>
+/* Note: this is the older, "original" sorting algorithm, being used as
+ default up to 2.35.
-/* Sort array MAPS according to dependencies of the contained objects.
- Array USED, if non-NULL, is permutated along MAPS. If FOR_FINI this is
- called for finishing an object. */
-void
-_dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used,
- bool for_fini)
+ Sort array MAPS according to dependencies of the contained objects.
+ If FOR_FINI is true, this is called for finishing an object. */
+static void
+_dl_sort_maps_original (struct link_map **maps, unsigned int nmaps,
+ unsigned int skip, bool for_fini)
{
+ /* Allows caller to do the common optimization of skipping the first map,
+ usually the main binary. */
+ maps += skip;
+ nmaps -= skip;
+
/* A list of one element need not be sorted. */
if (nmaps <= 1)
return;
@@ -66,14 +74,6 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used,
(k - i) * sizeof (maps[0]));
maps[k] = thisp;
- if (used != NULL)
- {
- char here_used = used[i];
- memmove (&used[i], &used[i + 1],
- (k - i) * sizeof (used[0]));
- used[k] = here_used;
- }
-
if (seen[i + 1] > nmaps - i)
{
++i;
@@ -120,3 +120,183 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used,
next:;
}
}
+
+#if !HAVE_TUNABLES
+/* In this case, just default to the original algorithm. */
+strong_alias (_dl_sort_maps_original, _dl_sort_maps);
+#else
+
+/* We use a recursive function due to its better clarity and ease of
+ implementation, as well as faster execution speed. We already use
+ alloca() for list allocation during the breadth-first search of
+ dependencies in _dl_map_object_deps(), and this should be on the
+ same order of worst-case stack usage.
+
+ Note: the '*rpo' parameter is supposed to point to one past the
+ last element of the array where we save the sort results, and is
+ decremented before storing the current map at each level. */
+
+static void
+dfs_traversal (struct link_map ***rpo, struct link_map *map,
+ bool *do_reldeps)
+{
+ if (map->l_visited)
+ return;
+
+ map->l_visited = 1;
+
+ if (map->l_initfini)
+ {
+ for (int i = 0; map->l_initfini[i] != NULL; i++)
+ {
+ struct link_map *dep = map->l_initfini[i];
+ if (dep->l_visited == 0
+ && dep->l_main_map == 0)
+ dfs_traversal (rpo, dep, do_reldeps);
+ }
+ }
+
+ if (__glibc_unlikely (do_reldeps != NULL && map->l_reldeps != NULL))
+ {
+ /* Indicate that we encountered relocation dependencies during
+ traversal. */
+ *do_reldeps = true;
+
+ for (int m = map->l_reldeps->act - 1; m >= 0; m--)
+ {
+ struct link_map *dep = map->l_reldeps->list[m];
+ if (dep->l_visited == 0
+ && dep->l_main_map == 0)
+ dfs_traversal (rpo, dep, do_reldeps);
+ }
+ }
+
+ *rpo -= 1;
+ **rpo = map;
+}
+
+/* Topologically sort array MAPS according to dependencies of the contained
+ objects. */
+
+static void
+_dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps,
+ unsigned int skip __attribute__ ((unused)), bool for_fini)
+{
+ for (int i = nmaps - 1; i >= 0; i--)
+ maps[i]->l_visited = 0;
+
+ /* We apply DFS traversal for each of maps[i] until the whole total order
+ is found and we're at the start of the Reverse-Postorder (RPO) sequence,
+ which is a topological sort.
+
+ We go from maps[nmaps - 1] backwards towards maps[0] at this level.
+ Due to the breadth-first search (BFS) ordering we receive, going
+ backwards usually gives a more shallow depth-first recursion depth,
+ adding more stack usage safety. Also, combined with the natural
+ processing order of l_initfini[] at each node during DFS, this maintains
+ an ordering closer to the original link ordering in the sorting results
+ under most simpler cases.
+
+ Another reason we order the top level backwards, it that maps[0] is
+ usually exactly the main object of which we're in the midst of
+ _dl_map_object_deps() processing, and maps[0]->l_initfini[] is still
+ blank. If we start the traversal from maps[0], since having no
+ dependencies yet filled in, maps[0] will always be immediately
+ incorrectly placed at the last place in the order (first in reverse).
+ Adjusting the order so that maps[0] is last traversed naturally avoids
+ this problem.
+
+ Further, the old "optimization" of skipping the main object at maps[0]
+ from the call-site (i.e. _dl_sort_maps(maps+1,nmaps-1)) is in general
+ no longer valid, since traversing along object dependency-links
+ may "find" the main object even when it is not included in the initial
+ order (e.g. a dlopen()'ed shared object can have circular dependencies
+ linked back to itself). In such a case, traversing N-1 objects will
+ create a N-object result, and raise problems.
+
+ To summarize, just passing in the full list, and iterating from back
+ to front makes things much more straightforward. */
+
+ /* Array to hold RPO sorting results, before we copy back to maps[]. */
+ struct link_map *rpo[nmaps];
+
+ /* The 'head' position during each DFS iteration. Note that we start at
+ one past the last element due to first-decrement-then-store (see the
+ bottom of above dfs_traversal() routine). */
+ struct link_map **rpo_head = &rpo[nmaps];
+
+ bool do_reldeps = false;
+ bool *do_reldeps_ref = (for_fini ? &do_reldeps : NULL);
+
+ for (int i = nmaps - 1; i >= 0; i--)
+ {
+ dfs_traversal (&rpo_head, maps[i], do_reldeps_ref);
+
+ /* We can break early if all objects are already placed. */
+ if (rpo_head == rpo)
+ goto end;
+ }
+ assert (rpo_head == rpo);
+
+ end:
+ /* Here we may do a second pass of sorting, using only l_initfini[]
+ static dependency links. This is avoided if !FOR_FINI or if we didn't
+ find any reldeps in the first DFS traversal.
+
+ The reason we do this is: while it is unspecified how circular
+ dependencies should be handled, the presumed reasonable behavior is to
+ have destructors to respect static dependency links as much as possible,
+ overriding reldeps if needed. And the first sorting pass, which takes
+ l_initfini/l_reldeps links equally, may not preserve this priority.
+
+ Hence we do a 2nd sorting pass, taking only DT_NEEDED links into account
+ (see how the do_reldeps argument to dfs_traversal() is NULL below). */
+ if (do_reldeps)
+ {
+ for (int i = nmaps - 1; i >= 0; i--)
+ rpo[i]->l_visited = 0;
+
+ struct link_map **maps_head = &maps[nmaps];
+ for (int i = nmaps - 1; i >= 0; i--)
+ {
+ dfs_traversal (&maps_head, rpo[i], NULL);
+
+ /* We can break early if all objects are already placed.
+ The below memcpy is not needed in the do_reldeps case here,
+ since we wrote back to maps[] during DFS traversal. */
+ if (maps_head == maps)
+ return;
+ }
+ assert (maps_head == maps);
+ return;
+ }
+
+ memcpy (maps, rpo, sizeof (struct link_map *) * nmaps);
+}
+
+void
+_dl_sort_maps_init (void)
+{
+ int32_t algorithm = TUNABLE_GET (glibc, rtld, dynamic_sort, int32_t, NULL);
+ GLRO(dl_dso_sort_algo) = algorithm == 1 ? dso_sort_algorithm_original
+ : dso_sort_algorithm_dfs;
+}
+
+void
+_dl_sort_maps (struct link_map **maps, unsigned int nmaps,
+ unsigned int skip, bool for_fini)
+{
+ /* It can be tempting to use a static function pointer to store and call
+ the current selected sorting algorithm routine, but experimentation
+ shows that current processors still do not handle indirect branches
+ that efficiently, plus a static function pointer will involve
+ PTR_MANGLE/DEMANGLE, further impairing performance of small, common
+ input cases. A simple if-case with direct function calls appears to
+ be the fastest. */
+ if (__glibc_likely (GLRO(dl_dso_sort_algo) == dso_sort_algorithm_original))
+ _dl_sort_maps_original (maps, nmaps, skip, for_fini);
+ else
+ _dl_sort_maps_dfs (maps, nmaps, skip, for_fini);
+}
+
+#endif /* HAVE_TUNABLES. */
diff --git a/elf/dl-support.c b/elf/dl-support.c
index e9943e889ef447ad..ae03aec9764e29d3 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -155,6 +155,8 @@ size_t _dl_phnum;
uint64_t _dl_hwcap __attribute__ ((nocommon));
uint64_t _dl_hwcap2 __attribute__ ((nocommon));
+enum dso_sort_algorithm _dl_dso_sort_algo;
+
/* The value of the FPU control word the kernel will preset in hardware. */
fpu_control_t _dl_fpu_control = _FPU_DEFAULT;
diff --git a/elf/dl-sysdep.c b/elf/dl-sysdep.c
index 998c5d52bcab8193..4e8a986541fc4c09 100644
--- a/elf/dl-sysdep.c
+++ b/elf/dl-sysdep.c
@@ -223,6 +223,9 @@ _dl_sysdep_start (void **start_argptr,
__tunables_init (_environ);
+ /* Initialize DSO sorting algorithm after tunables. */
+ _dl_sort_maps_init ();
+
#ifdef DL_SYSDEP_INIT
DL_SYSDEP_INIT;
#endif
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index 6408a8e5ae92d2c6..54ef2a921310b229 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -140,4 +140,13 @@ glibc {
default: 512
}
}
+
+ rtld {
+ dynamic_sort {
+ type: INT_32
+ minval: 1
+ maxval: 2
+ default: 1
+ }
+ }
}
diff --git a/elf/dso-sort-tests-1.def b/elf/dso-sort-tests-1.def
index 873ddf55d91155c6..5f7f18ef270bc12d 100644
--- a/elf/dso-sort-tests-1.def
+++ b/elf/dso-sort-tests-1.def
@@ -62,5 +62,5 @@ output: b>a>{}<a<b
# The below expected outputs are what the two algorithms currently produce
# respectively, for regression testing purposes.
tst-bz15311: {+a;+e;+f;+g;+d;%d;-d;-g;-f;-e;-a};a->b->c->d;d=>[ba];c=>a;b=>e=>a;c=>f=>b;d=>g=>c
-xfail_output(glibc.rtld.dynamic_sort=1): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[<a<c<d<g<f<b<e];}
+output(glibc.rtld.dynamic_sort=1): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[<a<c<d<g<f<b<e];}
output(glibc.rtld.dynamic_sort=2): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[<g<f<a<b<c<d<e];}
diff --git a/elf/rtld.c b/elf/rtld.c
index b47e84ca2fb6f03c..cd2cc4024a3581c2 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -1453,6 +1453,9 @@ dl_main (const ElfW(Phdr) *phdr,
main_map->l_name = (char *) "";
*user_entry = main_map->l_entry;
+ /* Set bit indicating this is the main program map. */
+ main_map->l_main_map = 1;
+
#ifdef HAVE_AUX_VECTOR
/* Adjust the on-stack auxiliary vector so that it looks like the
binary was executed directly. */
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
index 4f3f7ee4e30a2b42..118afc271057afd4 100644
--- a/elf/tst-rtld-list-tunables.exp
+++ b/elf/tst-rtld-list-tunables.exp
@@ -10,5 +10,6 @@ glibc.malloc.tcache_max: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.top_pad: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0x[f]+)
+glibc.rtld.dynamic_sort: 1 (min: 1, max: 2)
glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10)
glibc.rtld.optional_static_tls: 0x200 (min: 0x0, max: 0x[f]+)
diff --git a/include/link.h b/include/link.h
index dd491989beb41353..041ff5f753a9ee11 100644
--- a/include/link.h
+++ b/include/link.h
@@ -181,6 +181,11 @@ struct link_map
unsigned int l_init_called:1; /* Nonzero if DT_INIT function called. */
unsigned int l_global:1; /* Nonzero if object in _dl_global_scope. */
unsigned int l_reserved:2; /* Reserved for internal use. */
+ unsigned int l_main_map:1; /* Nonzero for the map of the main program. */
+ unsigned int l_visited:1; /* Used internally for map dependency
+ graph traversal. */
+ unsigned int l_map_used:1; /* These two bits are used during traversal */
+ unsigned int l_map_done:1; /* of maps in _dl_close_worker. */
unsigned int l_phdr_allocated:1; /* Nonzero if the data structure pointed
to by `l_phdr' is allocated. */
unsigned int l_soname_added:1; /* Nonzero if the SONAME is for sure in
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 43272cf885d1e3e6..c3f96cdc85208926 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -303,6 +303,17 @@ changed once allocated at process startup. The default allocation of
optional static TLS is 512 bytes and is allocated in every thread.
@end deftp
+@deftp Tunable glibc.rtld.dynamic_sort
+Sets the algorithm to use for DSO sorting, valid values are @samp{1} and
+@samp{2}. For value of @samp{1}, an older O(n^3) algorithm is used, which is
+long time tested, but may have performance issues when dependencies between
+shared objects contain cycles due to circular dependencies. When set to the
+value of @samp{2}, a different algorithm is used, which implements a
+topological sort through depth-first search, and does not exhibit the
+performance issues of @samp{1}.
+
+The default value of this tunable is @samp{1}.
+@end deftp
@node Elision Tunables
@section Elision Tunables
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 5e56550a4d556fa7..9f09a4a280396659 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -240,6 +240,13 @@ enum allowmask
};
+/* DSO sort algorithm to use (check dl-sort-maps.c). */
+enum dso_sort_algorithm
+ {
+ dso_sort_algorithm_original,
+ dso_sort_algorithm_dfs
+ };
+
struct audit_ifaces
{
void (*activity) (uintptr_t *, unsigned int);
@@ -633,6 +640,8 @@ struct rtld_global_ro
platforms. */
EXTERN uint64_t _dl_hwcap2;
+ EXTERN enum dso_sort_algorithm _dl_dso_sort_algo;
+
#ifdef SHARED
/* We add a function table to _rtld_global which is then used to
call the function instead of going through the PLT. The result
@@ -1049,7 +1058,7 @@ extern void _dl_fini (void) attribute_hidden;
/* Sort array MAPS according to dependencies of the contained objects. */
extern void _dl_sort_maps (struct link_map **maps, unsigned int nmaps,
- char *used, bool for_fini) attribute_hidden;
+ unsigned int skip, bool for_fini) attribute_hidden;
/* The dynamic linker calls this function before and having changing
any shared object mappings. The `r_state' member of `struct r_debug'
@@ -1167,6 +1176,9 @@ extern struct link_map * _dl_get_dl_main_map (void)
# endif
#endif
+/* Initialize the DSO sort algorithm to use. */
+extern void _dl_sort_maps_init (void) attribute_hidden;
+
/* Initialization of libpthread for statically linked applications.
If libpthread is not linked in, this is an empty function. */
void __pthread_initialize_minimal (void) weak_function;

View File

@ -0,0 +1,25 @@
commit d3bf2f5927d51258a51ac7fde04f4805f8ee294a
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Wed Nov 3 09:19:30 2021 -0300
elf: Do not run DSO sorting if tunables is not enabled
Since the argorithm selection requires tunables.
Checked on x86_64-linux-gnu with --enable-tunables=no.
diff --git a/elf/Makefile b/elf/Makefile
index e92f62f279566684..3b5e1f59e6696a2b 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -998,8 +998,10 @@ include $(objpfx)$(1).generated-makefile
endef
# Generate from each testcase description file
+ifeq (yes,$(have-tunables))
$(eval $(call include_dsosort_tests,dso-sort-tests-1.def))
$(eval $(call include_dsosort_tests,dso-sort-tests-2.def))
+endif
check-abi: $(objpfx)check-abi-ld.out
tests-special += $(objpfx)check-abi-ld.out

View File

@ -0,0 +1,45 @@
commit 1f67d8286b5da9266a138198ef1f15c27cbb0010
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Mon Nov 15 16:28:39 2021 -0800
elf: Use a temporary file to generate Makefile fragments [BZ #28550]
1. Use a temporary file to generate Makefile fragments for DSO sorting
tests and use -include on them.
2. Add Makefile fragments to postclean-generated so that a "make clean"
removes the autogenerated fragments and a subsequent "make" regenerates
them.
This partially fixes BZ #28550.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/elf/Makefile b/elf/Makefile
index 3b5e1f59e6696a2b..22a8060f7d3bb1a1 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -986,6 +986,7 @@ tests-special += \
# tests-special
endif
+ifndef avoid-generated
# DSO sorting tests:
# The dso-ordering-test.py script generates testcase source files in $(objpfx),
# creating a $(objpfx)<testcase-name>-dir for each testcase, and creates a
@@ -993,9 +994,14 @@ endif
define include_dsosort_tests
$(objpfx)$(1).generated-makefile: $(1)
$(PYTHON) $(..)scripts/dso-ordering-test.py \
- --description-file $$< --objpfx $(objpfx) --output-makefile $$@
-include $(objpfx)$(1).generated-makefile
+ --description-file $$< --objpfx $(objpfx) --output-makefile $$@T
+ mv $$@T $$@
+-include $(objpfx)$(1).generated-makefile
endef
+endif
+
+postclean-generated += $(objpfx)/dso-sort-tests-2.generated-makefile \
+ $(objpfx)/dso-sort-tests-2.generated-makefile
# Generate from each testcase description file
ifeq (yes,$(have-tunables))

View File

@ -0,0 +1,49 @@
commit 0884724a95b60452ad483dbe086d237d02ba624d
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Dec 14 12:37:44 2021 +0100
elf: Use new dependency sorting algorithm by default
The default has to change eventually, and there are no known failures
that require a delay.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index 54ef2a921310b229..f11ca5b3e8b09b43 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -146,7 +146,7 @@ glibc {
type: INT_32
minval: 1
maxval: 2
- default: 1
+ default: 2
}
}
}
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
index 118afc271057afd4..478ee8ab091685eb 100644
--- a/elf/tst-rtld-list-tunables.exp
+++ b/elf/tst-rtld-list-tunables.exp
@@ -10,6 +10,6 @@ glibc.malloc.tcache_max: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.top_pad: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0x[f]+)
-glibc.rtld.dynamic_sort: 1 (min: 1, max: 2)
+glibc.rtld.dynamic_sort: 2 (min: 1, max: 2)
glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10)
glibc.rtld.optional_static_tls: 0x200 (min: 0x0, max: 0x[f]+)
diff --git a/manual/tunables.texi b/manual/tunables.texi
index c3f96cdc85208926..7b70e80391ee87f7 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -312,7 +312,7 @@ value of @samp{2}, a different algorithm is used, which implements a
topological sort through depth-first search, and does not exhibit the
performance issues of @samp{1}.
-The default value of this tunable is @samp{1}.
+The default value of this tunable is @samp{2}.
@end deftp
@node Elision Tunables

View File

@ -0,0 +1,357 @@
commit 3a0588ae48fb35384a6bd33f9b66403badfa1262
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Tue Feb 8 15:22:49 2022 -0300
elf: Fix DFS sorting algorithm for LD_TRACE_LOADED_OBJECTS with missing libraries (BZ #28868)
On _dl_map_object the underlying file is not opened in trace mode
(in other cases where the underlying file can't be opened,
_dl_map_object quits with an error). If there any missing libraries
being processed, they will not be considered on final nlist size
passed on _dl_sort_maps later in the function. And it is then used by
_dl_sort_maps_dfs on the stack allocated working maps:
222 /* Array to hold RPO sorting results, before we copy back to maps[]. */
223 struct link_map *rpo[nmaps];
224
225 /* The 'head' position during each DFS iteration. Note that we start at
226 one past the last element due to first-decrement-then-store (see the
227 bottom of above dfs_traversal() routine). */
228 struct link_map **rpo_head = &rpo[nmaps];
However while transversing the 'l_initfini' on dfs_traversal it will
still consider the l_faked maps and thus update rpo more times than the
allocated working 'rpo', overflowing the stack object.
As suggested in bugzilla, one option would be to avoid sorting the maps
for trace mode. However I think ignoring l_faked object does make
sense (there is one less constraint to call the sorting function), it
allows a slight less stack usage for trace, and it is slight simpler
solution.
The tests does trigger the stack overflow, however I tried to make
it more generic to check different scenarios or missing objects.
Checked on x86_64-linux-gnu.
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
Conflicts:
elf/Makefile
(differences in backported tests)
diff --git a/elf/Makefile b/elf/Makefile
index 22a8060f7d3bb1a1..634c3113227d64a6 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -584,6 +584,11 @@ modules-names = \
libmarkermod5-3 \
libmarkermod5-4 \
libmarkermod5-5 \
+ libtracemod1-1 \
+ libtracemod2-1 \
+ libtracemod3-1 \
+ libtracemod4-1 \
+ libtracemod5-1 \
ltglobmod1 \
ltglobmod2 \
neededobj1 \
@@ -983,6 +988,11 @@ tests-special += \
$(objpfx)tst-initorder2-cmp.out \
$(objpfx)tst-unused-dep-cmp.out \
$(objpfx)tst-unused-dep.out \
+ $(objpfx)tst-trace1.out \
+ $(objpfx)tst-trace2.out \
+ $(objpfx)tst-trace3.out \
+ $(objpfx)tst-trace4.out \
+ $(objpfx)tst-trace5.out \
# tests-special
endif
@@ -2619,6 +2629,51 @@ $(objpfx)tst-rtld-run-static.out: $(objpfx)/ldconfig
$(objpfx)tst-dlmopen-gethostbyname: $(libdl)
$(objpfx)tst-dlmopen-gethostbyname.out: $(objpfx)tst-dlmopen-gethostbyname-mod.so
+
+LDFLAGS-libtracemod1-1.so += -Wl,-soname,libtracemod1.so
+LDFLAGS-libtracemod2-1.so += -Wl,-soname,libtracemod2.so
+LDFLAGS-libtracemod3-1.so += -Wl,-soname,libtracemod3.so
+LDFLAGS-libtracemod4-1.so += -Wl,-soname,libtracemod4.so
+LDFLAGS-libtracemod5-1.so += -Wl,-soname,libtracemod5.so
+
+$(objpfx)libtracemod1-1.so: $(objpfx)libtracemod2-1.so \
+ $(objpfx)libtracemod3-1.so
+$(objpfx)libtracemod2-1.so: $(objpfx)libtracemod4-1.so \
+ $(objpfx)libtracemod5-1.so
+
+define libtracemod-x
+$(objpfx)libtracemod$(1)/libtracemod$(1).so: $(objpfx)libtracemod$(1)-1.so
+ $$(make-target-directory)
+ cp $$< $$@
+endef
+libtracemod-suffixes = 1 2 3 4 5
+$(foreach i,$(libtracemod-suffixes), $(eval $(call libtracemod-x,$(i))))
+
+define tst-trace-skeleton
+$(objpfx)tst-trace$(1).out: $(objpfx)libtracemod1/libtracemod1.so \
+ $(objpfx)libtracemod2/libtracemod2.so \
+ $(objpfx)libtracemod3/libtracemod3.so \
+ $(objpfx)libtracemod4/libtracemod4.so \
+ $(objpfx)libtracemod5/libtracemod5.so \
+ $(..)scripts/tst-ld-trace.py \
+ tst-trace$(1).exp
+ ${ $(PYTHON) $(..)scripts/tst-ld-trace.py \
+ "$(test-wrapper-env) $(elf-objpfx)$(rtld-installed-name) \
+ --library-path $(common-objpfx):$(strip $(2)) \
+ $(objpfx)libtracemod1/libtracemod1.so" tst-trace$(1).exp \
+ } > $$@; $$(evaluate-test)
+endef
+
+$(eval $(call tst-trace-skeleton,1,))
+$(eval $(call tst-trace-skeleton,2,\
+ $(objpfx)libtracemod2))
+$(eval $(call tst-trace-skeleton,3,\
+ $(objpfx)libtracemod2:$(objpfx)libtracemod3))
+$(eval $(call tst-trace-skeleton,4,\
+ $(objpfx)libtracemod2:$(objpfx)libtracemod3:$(objpfx)libtracemod4))
+$(eval $(call tst-trace-skeleton,5,\
+ $(objpfx)libtracemod2:$(objpfx)libtracemod3:$(objpfx)libtracemod4:$(objpfx)libtracemod5))
+
$(objpfx)tst-audit-tlsdesc: $(objpfx)tst-audit-tlsdesc-mod1.so \
$(objpfx)tst-audit-tlsdesc-mod2.so \
$(shared-thread-library)
diff --git a/elf/dl-deps.c b/elf/dl-deps.c
index 9365d54c8e03e5f4..9ff589c8562b2dd1 100644
--- a/elf/dl-deps.c
+++ b/elf/dl-deps.c
@@ -489,6 +489,8 @@ _dl_map_object_deps (struct link_map *map,
for (nlist = 0, runp = known; runp; runp = runp->next)
{
+ /* _dl_sort_maps ignores l_faked object, so it is safe to not consider
+ them for nlist. */
if (__builtin_expect (trace_mode, 0) && runp->map->l_faked)
/* This can happen when we trace the loading. */
--map->l_searchlist.r_nlist;
diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c
index 398a08f28c4d9ff1..99354dc08a010dd3 100644
--- a/elf/dl-sort-maps.c
+++ b/elf/dl-sort-maps.c
@@ -140,7 +140,9 @@ static void
dfs_traversal (struct link_map ***rpo, struct link_map *map,
bool *do_reldeps)
{
- if (map->l_visited)
+ /* _dl_map_object_deps ignores l_faked objects when calculating the
+ number of maps before calling _dl_sort_maps, ignore them as well. */
+ if (map->l_visited || map->l_faked)
return;
map->l_visited = 1;
diff --git a/elf/libtracemod1-1.c b/elf/libtracemod1-1.c
new file mode 100644
index 0000000000000000..7c89c9a5a40b9668
--- /dev/null
+++ b/elf/libtracemod1-1.c
@@ -0,0 +1 @@
+/* Empty */
diff --git a/elf/libtracemod2-1.c b/elf/libtracemod2-1.c
new file mode 100644
index 0000000000000000..7c89c9a5a40b9668
--- /dev/null
+++ b/elf/libtracemod2-1.c
@@ -0,0 +1 @@
+/* Empty */
diff --git a/elf/libtracemod3-1.c b/elf/libtracemod3-1.c
new file mode 100644
index 0000000000000000..7c89c9a5a40b9668
--- /dev/null
+++ b/elf/libtracemod3-1.c
@@ -0,0 +1 @@
+/* Empty */
diff --git a/elf/libtracemod4-1.c b/elf/libtracemod4-1.c
new file mode 100644
index 0000000000000000..7c89c9a5a40b9668
--- /dev/null
+++ b/elf/libtracemod4-1.c
@@ -0,0 +1 @@
+/* Empty */
diff --git a/elf/libtracemod5-1.c b/elf/libtracemod5-1.c
new file mode 100644
index 0000000000000000..7c89c9a5a40b9668
--- /dev/null
+++ b/elf/libtracemod5-1.c
@@ -0,0 +1 @@
+/* Empty */
diff --git a/elf/tst-trace1.exp b/elf/tst-trace1.exp
new file mode 100644
index 0000000000000000..4a6f5211a68fe2c8
--- /dev/null
+++ b/elf/tst-trace1.exp
@@ -0,0 +1,4 @@
+ld 1
+libc 1
+libtracemod2.so 0
+libtracemod3.so 0
diff --git a/elf/tst-trace2.exp b/elf/tst-trace2.exp
new file mode 100644
index 0000000000000000..e13506e2eb9aeca2
--- /dev/null
+++ b/elf/tst-trace2.exp
@@ -0,0 +1,6 @@
+ld 1
+libc 1
+libtracemod2.so 1
+libtracemod3.so 0
+libtracemod4.so 0
+libtracemod5.so 0
diff --git a/elf/tst-trace3.exp b/elf/tst-trace3.exp
new file mode 100644
index 0000000000000000..e574549d12a53d72
--- /dev/null
+++ b/elf/tst-trace3.exp
@@ -0,0 +1,6 @@
+ld 1
+libc 1
+libtracemod2.so 1
+libtracemod3.so 1
+libtracemod4.so 0
+libtracemod5.so 0
diff --git a/elf/tst-trace4.exp b/elf/tst-trace4.exp
new file mode 100644
index 0000000000000000..31ca97b35bde0009
--- /dev/null
+++ b/elf/tst-trace4.exp
@@ -0,0 +1,6 @@
+ld 1
+libc 1
+libtracemod2.so 1
+libtracemod3.so 1
+libtracemod4.so 1
+libtracemod5.so 0
diff --git a/elf/tst-trace5.exp b/elf/tst-trace5.exp
new file mode 100644
index 0000000000000000..5d7d95372656396f
--- /dev/null
+++ b/elf/tst-trace5.exp
@@ -0,0 +1,6 @@
+ld 1
+libc 1
+libtracemod2.so 1
+libtracemod3.so 1
+libtracemod4.so 1
+libtracemod5.so 1
diff --git a/scripts/tst-ld-trace.py b/scripts/tst-ld-trace.py
new file mode 100755
index 0000000000000000..f5a402800377f44b
--- /dev/null
+++ b/scripts/tst-ld-trace.py
@@ -0,0 +1,108 @@
+#!/usr/bin/python3
+# Dump the output of LD_TRACE_LOADED_OBJECTS in architecture neutral format.
+# Copyright (C) 2022 Free Software Foundation, Inc.
+# Copyright The GNU Toolchain Authors.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+import argparse
+import os
+import subprocess
+import sys
+
+try:
+ subprocess.run
+except:
+ class _CompletedProcess:
+ def __init__(self, args, returncode, stdout=None, stderr=None):
+ self.args = args
+ self.returncode = returncode
+ self.stdout = stdout
+ self.stderr = stderr
+
+ def _run(*popenargs, input=None, timeout=None, check=False, **kwargs):
+ assert(timeout is None)
+ with subprocess.Popen(*popenargs, **kwargs) as process:
+ try:
+ stdout, stderr = process.communicate(input)
+ except:
+ process.kill()
+ process.wait()
+ raise
+ returncode = process.poll()
+ if check and returncode:
+ raise subprocess.CalledProcessError(returncode, popenargs)
+ return _CompletedProcess(popenargs, returncode, stdout, stderr)
+
+ subprocess.run = _run
+
+def is_vdso(lib):
+ return lib.startswith('linux-gate') or lib.startswith('linux-vdso')
+
+
+def parse_trace(cmd, fref):
+ new_env = os.environ.copy()
+ new_env['LD_TRACE_LOADED_OBJECTS'] = '1'
+ trace_out = subprocess.run(cmd, stdout=subprocess.PIPE, check=True,
+ universal_newlines=True, env=new_env).stdout
+ trace = []
+ for line in trace_out.splitlines():
+ line = line.strip()
+ if is_vdso(line):
+ continue
+ fields = line.split('=>' if '=>' in line else ' ')
+ lib = os.path.basename(fields[0].strip())
+ if lib.startswith('ld'):
+ lib = 'ld'
+ elif lib.startswith('libc'):
+ lib = 'libc'
+ found = 1 if fields[1].strip() != 'not found' else 0
+ trace += ['{} {}'.format(lib, found)]
+ trace = sorted(trace)
+
+ reference = sorted(line.replace('\n','') for line in fref.readlines())
+
+ ret = 0 if trace == reference else 1
+ if ret != 0:
+ for i in reference:
+ if i not in trace:
+ print("Only in {}: {}".format(fref.name, i))
+ for i in trace:
+ if i not in reference:
+ print("Only in trace: {}".format(i))
+
+ sys.exit(ret)
+
+
+def get_parser():
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument('command',
+ help='comand to run')
+ parser.add_argument('reference',
+ help='reference file to compare')
+ return parser
+
+
+def main(argv):
+ parser = get_parser()
+ opts = parser.parse_args(argv)
+ with open(opts.reference, 'r') as fref:
+ # Remove the initial 'env' command.
+ parse_trace(opts.command.split()[1:], fref)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])

View File

@ -0,0 +1,36 @@
commit a2211c76c3b994099fd58a06d6072d7495d699cd
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Mar 18 18:18:35 2022 +0100
scripts/dso-ordering-test.py: Fix C&P error in * callrefs processing
The elf/dso-sort-tests-src subdirectory is not changed by this commit,
so it seems that the cut-and-paste error was not material.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/scripts/dso-ordering-test.py b/scripts/dso-ordering-test.py
index bde0406be9da14fc..ee476c810c76f1b0 100644
--- a/scripts/dso-ordering-test.py
+++ b/scripts/dso-ordering-test.py
@@ -551,17 +551,17 @@ def process_testcase(t):
if obj in t.deps:
deps = t.deps[obj]
if '*' in deps:
- t.deps[obj].remove('*')
+ deps.remove('*')
t.add_deps([obj], non_dep_tgt_objs)
if obj in t.callrefs:
deps = t.callrefs[obj]
if '*' in deps:
- t.deps[obj].remove('*')
+ deps.remove('*')
t.add_callrefs([obj], non_dep_tgt_objs)
if "#" in t.deps:
deps = t.deps["#"]
if '*' in deps:
- t.deps["#"].remove('*')
+ deps.remove('*')
t.add_deps(["#"], non_dep_tgt_objs)
# If no main program was specified in dependency description, make a

View File

@ -0,0 +1,37 @@
commit 183d99737298bb3200f0610fdcd1c7549c8ed560
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Sep 6 07:38:10 2022 +0200
scripts/dso-ordering-test.py: Generate program run-time dependencies
The main program needs to depend on all shared objects, even objects
that have link-time dependencies among shared objects. Filtering
out shared objects that already have an link-time dependencies is not
necessary here; make will do this automatically.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/scripts/dso-ordering-test.py b/scripts/dso-ordering-test.py
index ee476c810c76f1b0..43b5ec4d920ad6a3 100644
--- a/scripts/dso-ordering-test.py
+++ b/scripts/dso-ordering-test.py
@@ -707,13 +707,12 @@ def process_testcase(t):
"\t$(compile.c) $(OUTPUT_OPTION)\n")
makefile.write (rule)
- not_depended_objs = find_objs_not_depended_on(test_descr)
- if not_depended_objs:
- depstr = ""
- for dep in not_depended_objs:
- depstr += (" $(objpfx)" + test_subdir + "/"
- + test_name + "-" + dep + ".so")
- makefile.write("$(objpfx)%s.out:%s\n" % (base_test_name, depstr))
+ # Ensure that all shared objects are built before running the
+ # test, whether there link-time dependencies or not.
+ depobjs = ["$(objpfx){}/{}-{}.so".format(test_subdir, test_name, dep)
+ for dep in test_descr.objs]
+ makefile.write("$(objpfx){}.out: {}\n".format(
+ base_test_name, " ".join(depobjs)))
# Add main executable to test-srcs
makefile.write("test-srcs += %s/%s\n" % (test_subdir, test_name))

View File

@ -1,6 +1,6 @@
%define glibcsrcdir glibc-2.28
%define glibcversion 2.28
%define glibcrelease 220%{?dist}
%define glibcrelease 221%{?dist}
# Pre-release tarballs are pulled in from git using a command that is
# effectively:
#
@ -1012,6 +1012,18 @@ Patch819: glibc-rh2109510-23.patch
Patch820: glibc-rh2139875-1.patch
Patch821: glibc-rh2139875-2.patch
Patch822: glibc-rh2139875-3.patch
Patch823: glibc-rh1159809-1.patch
Patch824: glibc-rh1159809-2.patch
Patch825: glibc-rh1159809-3.patch
Patch826: glibc-rh1159809-4.patch
Patch827: glibc-rh1159809-5.patch
Patch828: glibc-rh1159809-6.patch
Patch829: glibc-rh1159809-7.patch
Patch830: glibc-rh1159809-8.patch
Patch831: glibc-rh1159809-9.patch
Patch832: glibc-rh1159809-10.patch
Patch833: glibc-rh1159809-11.patch
Patch834: glibc-rh1159809-12.patch
##############################################################################
# Continued list of core "glibc" package information:
@ -2842,6 +2854,9 @@ fi
%files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared
%changelog
* Fri Nov 18 2022 Florian Weimer <fweimer@redhat.com> - 2.28-221
- Switch to fast DSO dependency sorting algorithm (#1159809)
* Thu Nov 3 2022 Florian Weimer <fweimer@redhat.com> - 2.28-220
- Explicitly switch to --with-default-link=no (#2109510)
- Define MAP_SYNC on ppc64le (#2139875)