import CS glibc-2.28-236.el8.1

This commit is contained in:
Andrew Lukoshko 2023-10-03 11:34:49 +00:00
parent 7db8dcfedc
commit 3f52bf0a36
23 changed files with 2758 additions and 3 deletions

View File

@ -0,0 +1,121 @@
commit 969e9733c7d17edf1e239a73fa172f357561f440
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Feb 21 09:20:28 2023 +0100
gshadow: Matching sgetsgent, sgetsgent_r ERANGE handling (bug 30151)
Before this change, sgetsgent_r did not set errno to ERANGE, but
sgetsgent only check errno, not the return value from sgetsgent_r.
Consequently, sgetsgent did not detect any error, and reported
success to the caller, without initializing the struct sgrp object
whose address was returned.
This commit changes sgetsgent_r to set errno as well. This avoids
similar issues in applications which only change errno.
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
diff --git a/gshadow/Makefile b/gshadow/Makefile
index 796fbbf473..a95524593a 100644
--- a/gshadow/Makefile
+++ b/gshadow/Makefile
@@ -26,7 +26,7 @@ headers = gshadow.h
routines = getsgent getsgnam sgetsgent fgetsgent putsgent \
getsgent_r getsgnam_r sgetsgent_r fgetsgent_r
-tests = tst-gshadow tst-putsgent tst-fgetsgent_r
+tests = tst-gshadow tst-putsgent tst-fgetsgent_r tst-sgetsgent
CFLAGS-getsgent_r.c += -fexceptions
CFLAGS-getsgent.c += -fexceptions
diff --git a/gshadow/sgetsgent_r.c b/gshadow/sgetsgent_r.c
index ea085e91d7..c75624e1f7 100644
--- a/gshadow/sgetsgent_r.c
+++ b/gshadow/sgetsgent_r.c
@@ -61,7 +61,10 @@ __sgetsgent_r (const char *string, struct sgrp *resbuf, char *buffer,
buffer[buflen - 1] = '\0';
sp = strncpy (buffer, string, buflen);
if (buffer[buflen - 1] != '\0')
- return ERANGE;
+ {
+ __set_errno (ERANGE);
+ return ERANGE;
+ }
}
else
sp = (char *) string;
diff --git a/gshadow/tst-sgetsgent.c b/gshadow/tst-sgetsgent.c
new file mode 100644
index 0000000000..0370c10fd0
--- /dev/null
+++ b/gshadow/tst-sgetsgent.c
@@ -0,0 +1,69 @@
+/* Test large input for sgetsgent (bug 30151).
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <gshadow.h>
+#include <stddef.h>
+#include <support/check.h>
+#include <support/support.h>
+#include <support/xmemstream.h>
+#include <stdlib.h>
+
+static int
+do_test (void)
+{
+ /* Create a shadow group with 1000 members. */
+ struct xmemstream mem;
+ xopen_memstream (&mem);
+ const char *passwd = "k+zD0nucwfxAo3sw1NXUj6K5vt5M16+X0TVGdE1uFvq5R8V7efJ";
+ fprintf (mem.out, "group-name:%s::m0", passwd);
+ for (int i = 1; i < 1000; ++i)
+ fprintf (mem.out, ",m%d", i);
+ xfclose_memstream (&mem);
+
+ /* Call sgetsgent. */
+ char *input = mem.buffer;
+ struct sgrp *e = sgetsgent (input);
+ TEST_VERIFY_EXIT (e != NULL);
+ TEST_COMPARE_STRING (e->sg_namp, "group-name");
+ TEST_COMPARE_STRING (e->sg_passwd, passwd);
+ /* No administrators. */
+ TEST_COMPARE_STRING (e->sg_adm[0], NULL);
+ /* Check the members list. */
+ for (int i = 0; i < 1000; ++i)
+ {
+ char *member = xasprintf ("m%d", i);
+ TEST_COMPARE_STRING (e->sg_mem[i], member);
+ free (member);
+ }
+ TEST_COMPARE_STRING (e->sg_mem[1000], NULL);
+
+ /* Check that putsgent brings back the input string. */
+ xopen_memstream (&mem);
+ TEST_COMPARE (putsgent (e, mem.out), 0);
+ xfclose_memstream (&mem);
+ /* Compare without the trailing '\n' that putsgent added. */
+ TEST_COMPARE (mem.buffer[mem.length - 1], '\n');
+ mem.buffer[mem.length - 1] = '\0';
+ TEST_COMPARE_STRING (mem.buffer, input);
+
+ free (mem.buffer);
+ free (input);
+ return 0;
+}
+
+#include <support/test-driver.c>

View File

@ -0,0 +1,144 @@
From 436a604b7dc741fc76b5a6704c6cd8bb178518e7 Mon Sep 17 00:00:00 2001
From: Adam Yi <ayi@janestreet.com>
Date: Tue, 7 Mar 2023 07:30:02 -0500
Subject: posix: Fix system blocks SIGCHLD erroneously [BZ #30163]
Fix bug that SIGCHLD is erroneously blocked forever in the following
scenario:
1. Thread A calls system but hasn't returned yet
2. Thread B calls another system but returns
SIGCHLD would be blocked forever in thread B after its system() returns,
even after the system() in thread A returns.
Although POSIX does not require, glibc system implementation aims to be
thread and cancellation safe. This bug was introduced in
5fb7fc96350575c9adb1316833e48ca11553be49 when we moved reverting signal
mask to happen when the last concurrently running system returns,
despite that signal mask is per thread. This commit reverts this logic
and adds a test.
Signed-off-by: Adam Yi <ayi@janestreet.com>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
[DJ: Edited to use integer sleep() instead of nanosleep() dependency rabbit hole]
diff --git a/stdlib/tst-system.c b/stdlib/tst-system.c
index 634acfe264..47a0afe6bf 100644
--- a/stdlib/tst-system.c
+++ b/stdlib/tst-system.c
@@ -25,6 +25,7 @@
#include <support/check.h>
#include <support/temp_file.h>
#include <support/support.h>
+#include <support/xthread.h>
#include <support/xunistd.h>
static char *tmpdir;
@@ -71,6 +72,20 @@ call_system (void *closure)
}
}
+static void *
+sleep_and_check_sigchld (void *closure)
+{
+ double *seconds = (double *) closure;
+ char cmd[namemax];
+ sprintf (cmd, "sleep %lf" , *seconds);
+ TEST_COMPARE (system (cmd), 0);
+
+ sigset_t blocked = {0};
+ TEST_COMPARE (sigprocmask (SIG_BLOCK, NULL, &blocked), 0);
+ TEST_COMPARE (sigismember (&blocked, SIGCHLD), 0);
+ return NULL;
+}
+
static int
do_test (void)
{
@@ -154,6 +169,17 @@ do_test (void)
xchmod (_PATH_BSHELL, st.st_mode);
}
+ {
+ pthread_t long_sleep_thread = xpthread_create (NULL,
+ sleep_and_check_sigchld,
+ &(double) { 2 });
+ pthread_t short_sleep_thread = xpthread_create (NULL,
+ sleep_and_check_sigchld,
+ &(double) { 1 });
+ xpthread_join (short_sleep_thread);
+ xpthread_join (long_sleep_thread);
+ }
+
TEST_COMPARE (system (""), 0);
return 0;
diff --git a/support/shell-container.c b/support/shell-container.c
index ffa3378b5e..b1f9e793c1 100644
--- a/support/shell-container.c
+++ b/support/shell-container.c
@@ -169,6 +170,31 @@ kill_func (char **argv)
return 0;
}
+/* Emulate the "/bin/sleep" command. No suffix support. Options are
+ ignored. */
+static int
+sleep_func (char **argv)
+{
+ if (argv[0] == NULL)
+ {
+ fprintf (stderr, "sleep: missing operand\n");
+ return 1;
+ }
+ char *endptr = NULL;
+ long sec = strtol (argv[0], &endptr, 0);
+ if (endptr == argv[0] || errno == ERANGE || sec < 0)
+ {
+ fprintf (stderr, "sleep: invalid time interval '%s'\n", argv[0]);
+ return 1;
+ }
+ if (sleep (sec) < 0)
+ {
+ fprintf (stderr, "sleep: failed to nanosleep\n");
+ return 1;
+ }
+ return 0;
+}
+
/* This is a list of all the built-in commands we understand. */
static struct {
const char *name;
@@ -179,6 +206,7 @@ static struct {
{ "cp", copy_func },
{ "exit", exit_func },
{ "kill", kill_func },
+ { "sleep", sleep_func },
{ NULL, NULL }
};
diff --git a/sysdeps/posix/system.c b/sysdeps/posix/system.c
index 2335a99184..d77720a625 100644
--- a/sysdeps/posix/system.c
+++ b/sysdeps/posix/system.c
@@ -179,16 +179,16 @@ do_system (const char *line)
as if the shell had terminated using _exit(127). */
status = W_EXITCODE (127, 0);
+ /* sigaction can not fail with SIGINT/SIGQUIT used with old
+ disposition. Same applies for sigprocmask. */
DO_LOCK ();
if (SUB_REF () == 0)
{
- /* sigaction can not fail with SIGINT/SIGQUIT used with old
- disposition. Same applies for sigprocmask. */
__sigaction (SIGINT, &intr, NULL);
__sigaction (SIGQUIT, &quit, NULL);
- __sigprocmask (SIG_SETMASK, &omask, NULL);
}
DO_UNLOCK ();
+ __sigprocmask (SIG_SETMASK, &omask, NULL);
if (ret != 0)
__set_errno (ret);

View File

@ -0,0 +1,27 @@
From d03094649d39949a30513bf3ffb03a28fecbccd8 Mon Sep 17 00:00:00 2001
From: Adam Yi <ayi@janestreet.com>
Date: Wed, 8 Mar 2023 03:11:47 -0500
Subject: hurd: fix build of tst-system.c
We made tst-system.c depend on pthread, but that requires linking with
$(shared-thread-library). It does not fail under Linux because the
variable expands to nothing under Linux, but it fails for Hurd.
I tested verified via cross-compiling that "make check" now works
for Hurd.
Signed-off-by: Adam Yi <ayi@janestreet.com>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
[DJ: Edited for RHEL 8]
diff -rup a/stdlib/Makefile b/stdlib/Makefile
--- a/stdlib/Makefile 2023-07-07 00:44:55.810981644 -0400
+++ b/stdlib/Makefile 2023-07-07 00:46:47.541411091 -0400
@@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-threa
LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
LDLIBS-test-on_exit-race = $(shared-thread-library)
+LDLIBS-tst-system = $(shared-thread-library)
LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)

View File

@ -0,0 +1,42 @@
This patch is a RHEL-only patch which modifies the custom changes
in the previous patches in this series to make the test case look
more like the upstream test case.
diff -rup a/stdlib/tst-system.c b/stdlib/tst-system.c
--- a/stdlib/tst-system.c 2023-07-10 13:37:53.089505036 -0400
+++ b/stdlib/tst-system.c 2023-07-10 14:04:03.922610279 -0400
@@ -173,10 +173,10 @@ do_test (void)
{
pthread_t long_sleep_thread = xpthread_create (NULL,
sleep_and_check_sigchld,
- &(double) { 2 });
+ &(double) { 0.2 });
pthread_t short_sleep_thread = xpthread_create (NULL,
sleep_and_check_sigchld,
- &(double) { 1 });
+ &(double) { 0.1 });
xpthread_join (short_sleep_thread);
xpthread_join (long_sleep_thread);
}
diff -rup a/support/shell-container.c b/support/shell-container.c
--- a/support/shell-container.c 2023-07-10 13:37:53.089505036 -0400
+++ b/support/shell-container.c 2023-07-10 14:03:20.392920627 -0400
@@ -182,15 +182,15 @@ sleep_func (char **argv)
return 1;
}
char *endptr = NULL;
- long sec = strtol (argv[0], &endptr, 0);
+ double sec = strtod (argv[0], &endptr);
if (endptr == argv[0] || errno == ERANGE || sec < 0)
{
fprintf (stderr, "sleep: invalid time interval '%s'\n", argv[0]);
return 1;
}
- if (sleep (sec) < 0)
+ if (usleep ((useconds_t)(sec * 1000000.0)) < 0)
{
- fprintf (stderr, "sleep: failed to nanosleep\n");
+ fprintf (stderr, "sleep: failed to usleep: %s\n", strerror (errno));
return 1;
}
return 0;

View File

@ -0,0 +1,70 @@
commit 801af9fafd4689337ebf27260aa115335a0cb2bc
Author: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
Date: Sat Feb 4 14:41:38 2023 +0300
gmon: Fix allocated buffer overflow (bug 29444)
The `__monstartup()` allocates a buffer used to store all the data
accumulated by the monitor.
The size of this buffer depends on the size of the internal structures
used and the address range for which the monitor is activated, as well
as on the maximum density of call instructions and/or callable functions
that could be potentially on a segment of executable code.
In particular a hash table of arcs is placed at the end of this buffer.
The size of this hash table is calculated in bytes as
p->fromssize = p->textsize / HASHFRACTION;
but actually should be
p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
This results in writing beyond the end of the allocated buffer when an
added arc corresponds to a call near from the end of the monitored
address range, since `_mcount()` check the incoming caller address for
monitored range but not the intermediate result hash-like index that
uses to write into the table.
It should be noted that when the results are output to `gmon.out`, the
table is read to the last element calculated from the allocated size in
bytes, so the arcs stored outside the buffer boundary did not fall into
`gprof` for analysis. Thus this "feature" help me to found this bug
during working with https://sourceware.org/bugzilla/show_bug.cgi?id=29438
Just in case, I will explicitly note that the problem breaks the
`make test t=gmon/tst-gmon-dso` added for Bug 29438.
There, the arc of the `f3()` call disappears from the output, since in
the DSO case, the call to `f3` is located close to the end of the
monitored range.
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
Another minor error seems a related typo in the calculation of
`kcountsize`, but since kcounts are smaller than froms, this is
actually to align the p->froms data.
Co-authored-by: DJ Delorie <dj@redhat.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/gmon/gmon.c b/gmon/gmon.c
index dee64803ada583d7..bf76358d5b1aa2da 100644
--- a/gmon/gmon.c
+++ b/gmon/gmon.c
@@ -132,6 +132,8 @@ __monstartup (u_long lowpc, u_long highpc)
p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER));
p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER));
p->textsize = p->highpc - p->lowpc;
+ /* This looks like a typo, but it's here to align the p->froms
+ section. */
p->kcountsize = ROUNDUP(p->textsize / HISTFRACTION, sizeof(*p->froms));
p->hashfraction = HASHFRACTION;
p->log_hashfraction = -1;
@@ -142,7 +144,7 @@ __monstartup (u_long lowpc, u_long highpc)
instead of integer division. Precompute shift amount. */
p->log_hashfraction = ffs(p->hashfraction * sizeof(*p->froms)) - 1;
}
- p->fromssize = p->textsize / HASHFRACTION;
+ p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
p->tolimit = p->textsize * ARCDENSITY / 100;
if (p->tolimit < MINARCS)
p->tolimit = MINARCS;

View File

@ -0,0 +1,477 @@
This patch adds the required @order directives to preserve the
GLIBC_PRIVATE ABI.
commit 31be941e4367c001b2009308839db5c67bf9dcbc
Author: Simon Kissane <skissane@gmail.com>
Date: Sat Feb 11 20:12:13 2023 +1100
gmon: improve mcount overflow handling [BZ# 27576]
When mcount overflows, no gmon.out file is generated, but no message is printed
to the user, leaving the user with no idea why, and thinking maybe there is
some bug - which is how BZ 27576 ended up being logged. Print a message to
stderr in this case so the user knows what is going on.
As a comment in sys/gmon.h acknowledges, the hardcoded MAXARCS value is too
small for some large applications, including the test case in that BZ. Rather
than increase it, add tunables to enable MINARCS and MAXARCS to be overridden
at runtime (glibc.gmon.minarcs and glibc.gmon.maxarcs). So if a user gets the
mcount overflow error, they can try increasing maxarcs (they might need to
increase minarcs too if the heuristic is wrong in their case.)
Note setting minarcs/maxarcs too large can cause monstartup to fail with an
out of memory error. If you set them large enough, it can cause an integer
overflow in calculating the buffer size. I haven't done anything to defend
against that - it would not generally be a security vulnerability, since these
tunables will be ignored in suid/sgid programs (due to the SXID_ERASE default),
and if you can set GLIBC_TUNABLES in the environment of a process, you can take
it over anyway (LD_PRELOAD, LD_LIBRARY_PATH, etc). I thought about modifying
the code of monstartup to defend against integer overflows, but doing so is
complicated, and I realise the existing code is susceptible to them even prior
to this change (e.g. try passing a pathologically large highpc argument to
monstartup), so I decided just to leave that possibility in-place.
Add a test case which demonstrates mcount overflow and the tunables.
Document the new tunables in the manual.
Signed-off-by: Simon Kissane <skissane@gmail.com>
Reviewed-by: DJ Delorie <dj@redhat.com>
Conflicts:
manual/tunables.texi
(missing tunables downstream)
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index f11ca5b3e8b09b43..dc2999796042dbaf 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -149,4 +149,17 @@ glibc {
default: 2
}
}
+
+ gmon {
+ minarcs {
+ type: INT_32
+ minval: 50
+ default: 50
+ }
+ maxarcs {
+ type: INT_32
+ minval: 50
+ default: 1048576
+ }
+ }
}
diff --git a/gmon/Makefile b/gmon/Makefile
index d94593c9d8a882eb..54f05894d4dd8c4a 100644
--- a/gmon/Makefile
+++ b/gmon/Makefile
@@ -25,7 +25,7 @@ include ../Makeconfig
headers := sys/gmon.h sys/gmon_out.h sys/profil.h
routines := gmon mcount profil sprofil prof-freq
-tests = tst-sprofil tst-gmon
+tests = tst-sprofil tst-gmon tst-mcount-overflow
ifeq ($(build-profile),yes)
tests += tst-profile-static
tests-static += tst-profile-static
@@ -56,6 +56,18 @@ ifeq ($(run-built-tests),yes)
tests-special += $(objpfx)tst-gmon-gprof.out
endif
+CFLAGS-tst-mcount-overflow.c := -fno-omit-frame-pointer -pg
+tst-mcount-overflow-no-pie = yes
+CRT-tst-mcount-overflow := $(csu-objpfx)g$(start-installed-name)
+# Intentionally use invalid config where maxarcs<minarcs to check warning is printed
+tst-mcount-overflow-ENV := GMON_OUT_PREFIX=$(objpfx)tst-mcount-overflow.data \
+ GLIBC_TUNABLES=glibc.gmon.minarcs=51:glibc.gmon.maxarcs=50
+# Send stderr into output file because we make sure expected messages are printed
+tst-mcount-overflow-ARGS := 2>&1 1>/dev/null | cat
+ifeq ($(run-built-tests),yes)
+tests-special += $(objpfx)tst-mcount-overflow-check.out
+endif
+
CFLAGS-tst-gmon-static.c := $(PIE-ccflag) -fno-omit-frame-pointer -pg
CRT-tst-gmon-static := $(csu-objpfx)gcrt1.o
tst-gmon-static-no-pie = yes
@@ -103,6 +115,14 @@ $(objpfx)tst-gmon.out: clean-tst-gmon-data
clean-tst-gmon-data:
rm -f $(objpfx)tst-gmon.data.*
+$(objpfx)tst-mcount-overflow.o: clean-tst-mcount-overflow-data
+clean-tst-mcount-overflow-data:
+ rm -f $(objpfx)tst-mcount-overflow.data.*
+
+$(objpfx)tst-mcount-overflow-check.out: tst-mcount-overflow-check.sh $(objpfx)tst-mcount-overflow.out
+ $(SHELL) $< $(objpfx)tst-mcount-overflow > $@; \
+ $(evaluate-test)
+
$(objpfx)tst-gmon-gprof.out: tst-gmon-gprof.sh $(objpfx)tst-gmon.out
$(SHELL) $< $(GPROF) $(objpfx)tst-gmon $(objpfx)tst-gmon.data.* > $@; \
$(evaluate-test)
diff --git a/gmon/gmon.c b/gmon/gmon.c
index bf76358d5b1aa2da..689bf80141e559ca 100644
--- a/gmon/gmon.c
+++ b/gmon/gmon.c
@@ -46,6 +46,11 @@
#include <libc-internal.h>
#include <not-cancel.h>
+#if HAVE_TUNABLES
+# define TUNABLE_NAMESPACE gmon
+# include <elf/dl-tunables.h>
+#endif
+
#ifdef PIC
# include <link.h>
@@ -124,6 +129,22 @@ __monstartup (u_long lowpc, u_long highpc)
int o;
char *cp;
struct gmonparam *p = &_gmonparam;
+ long int minarcs, maxarcs;
+
+#if HAVE_TUNABLES
+ /* Read minarcs/maxarcs tunables. */
+ minarcs = TUNABLE_GET (minarcs, int32_t, NULL);
+ maxarcs = TUNABLE_GET (maxarcs, int32_t, NULL);
+ if (maxarcs < minarcs)
+ {
+ ERR("monstartup: maxarcs < minarcs, setting maxarcs = minarcs\n");
+ maxarcs = minarcs;
+ }
+#else
+ /* No tunables, we use hardcoded defaults */
+ minarcs = MINARCS;
+ maxarcs = MAXARCS;
+#endif
/*
* round lowpc and highpc to multiples of the density we're using
@@ -146,10 +167,10 @@ __monstartup (u_long lowpc, u_long highpc)
}
p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
p->tolimit = p->textsize * ARCDENSITY / 100;
- if (p->tolimit < MINARCS)
- p->tolimit = MINARCS;
- else if (p->tolimit > MAXARCS)
- p->tolimit = MAXARCS;
+ if (p->tolimit < minarcs)
+ p->tolimit = minarcs;
+ else if (p->tolimit > maxarcs)
+ p->tolimit = maxarcs;
p->tossize = p->tolimit * sizeof(struct tostruct);
cp = calloc (p->kcountsize + p->fromssize + p->tossize, 1);
diff --git a/gmon/mcount.c b/gmon/mcount.c
index 9d4a1a50fa6ab21a..f7180fdb83399a14 100644
--- a/gmon/mcount.c
+++ b/gmon/mcount.c
@@ -41,6 +41,10 @@ static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
#include <atomic.h>
+#include <not-cancel.h>
+#include <unistd.h>
+#define ERR(s) __write_nocancel (STDERR_FILENO, s, sizeof (s) - 1)
+
/*
* mcount is called on entry to each function compiled with the profiling
* switch set. _mcount(), which is declared in a machine-dependent way
@@ -170,6 +174,7 @@ done:
return;
overflow:
p->state = GMON_PROF_ERROR;
+ ERR("mcount: call graph buffer size limit exceeded, gmon.out will not be generated\n");
return;
}
diff --git a/gmon/sys/gmon.h b/gmon/sys/gmon.h
index b4cc3b043a2aec77..af0582a3717085b5 100644
--- a/gmon/sys/gmon.h
+++ b/gmon/sys/gmon.h
@@ -111,6 +111,8 @@ extern struct __bb *__bb_head;
* Always allocate at least this many tostructs. This
* hides the inadequacy of the ARCDENSITY heuristic, at least
* for small programs.
+ *
+ * Value can be overridden at runtime by glibc.gmon.minarcs tunable.
*/
#define MINARCS 50
@@ -124,8 +126,8 @@ extern struct __bb *__bb_head;
* Used to be max representable value of ARCINDEX minus 2, but now
* that ARCINDEX is a long, that's too large; we don't really want
* to allow a 48 gigabyte table.
- * The old value of 1<<16 wasn't high enough in practice for large C++
- * programs; will 1<<20 be adequate for long? FIXME
+ *
+ * Value can be overridden at runtime by glibc.gmon.maxarcs tunable.
*/
#define MAXARCS (1 << 20)
diff --git a/gmon/tst-mcount-overflow-check.sh b/gmon/tst-mcount-overflow-check.sh
new file mode 100644
index 0000000000000000..27eb5538fd573a6e
--- /dev/null
+++ b/gmon/tst-mcount-overflow-check.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+# Test expected messages generated when mcount overflows
+# Copyright (C) 2017-2023 Free Software Foundation, Inc.
+# Copyright The GNU Toolchain Authors.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+LC_ALL=C
+export LC_ALL
+set -e
+exec 2>&1
+
+program="$1"
+
+check_msg() {
+ if ! grep -q "$1" "$program.out"; then
+ echo "FAIL: expected message not in output: $1"
+ exit 1
+ fi
+}
+
+check_msg 'monstartup: maxarcs < minarcs, setting maxarcs = minarcs'
+check_msg 'mcount: call graph buffer size limit exceeded, gmon.out will not be generated'
+
+for data_file in $1.data.*; do
+ if [ -f "$data_file" ]; then
+ echo "FAIL: expected no data files, but found $data_file"
+ exit 1
+ fi
+done
+
+echo PASS
diff --git a/gmon/tst-mcount-overflow.c b/gmon/tst-mcount-overflow.c
new file mode 100644
index 0000000000000000..06cc93ef872eb7c1
--- /dev/null
+++ b/gmon/tst-mcount-overflow.c
@@ -0,0 +1,72 @@
+/* Test program to trigger mcount overflow in profiling collection.
+ Copyright (C) 2017-2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Program with sufficiently complex, yet pointless, call graph
+ that it will trigger an mcount overflow, when you set the
+ minarcs/maxarcs tunables to very low values. */
+
+#define PREVENT_TAIL_CALL asm volatile ("")
+
+/* Calls REP(n) macro 16 times, for n=0..15.
+ * You need to define REP(n) before using this.
+ */
+#define REPS \
+ REP(0) REP(1) REP(2) REP(3) REP(4) REP(5) REP(6) REP(7) \
+ REP(8) REP(9) REP(10) REP(11) REP(12) REP(13) REP(14) REP(15)
+
+/* Defines 16 leaf functions named f1_0 to f1_15 */
+#define REP(n) \
+ __attribute__ ((noinline, noclone, weak)) void f1_##n (void) {};
+REPS
+#undef REP
+
+/* Calls all 16 leaf functions f1_* in succession */
+__attribute__ ((noinline, noclone, weak)) void
+f2 (void)
+{
+# define REP(n) f1_##n();
+ REPS
+# undef REP
+ PREVENT_TAIL_CALL;
+}
+
+/* Defines 16 functions named f2_0 to f2_15, which all just call f2 */
+#define REP(n) \
+ __attribute__ ((noinline, noclone, weak)) void \
+ f2_##n (void) { f2(); PREVENT_TAIL_CALL; };
+REPS
+#undef REP
+
+__attribute__ ((noinline, noclone, weak)) void
+f3 (int count)
+{
+ for (int i = 0; i < count; ++i)
+ {
+ /* Calls f1_0(), f2_0(), f1_1(), f2_1(), f3_0(), etc */
+# define REP(n) f1_##n(); f2_##n();
+ REPS
+# undef REP
+ }
+}
+
+int
+main (void)
+{
+ f3 (1000);
+ return 0;
+}
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 7b70e80391ee87f7..00eafcf44b562b9e 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -73,6 +73,9 @@ glibc.malloc.check: 0 (min: 0, max: 3)
* Elision Tunables:: Tunables in elision subsystem
* Hardware Capability Tunables:: Tunables that modify the hardware
capabilities seen by @theglibc{}
+* gmon Tunables:: Tunables that control the gmon profiler, used in
+ conjunction with gprof
+
@end menu
@node Tunable names
@@ -506,3 +509,59 @@ instead.
This tunable is specific to i386 and x86-64.
@end deftp
+
+@node gmon Tunables
+@section gmon Tunables
+@cindex gmon tunables
+
+@deftp {Tunable namespace} glibc.gmon
+This tunable namespace affects the behaviour of the gmon profiler.
+gmon is a component of @theglibc{} which is normally used in
+conjunction with gprof.
+
+When GCC compiles a program with the @code{-pg} option, it instruments
+the program with calls to the @code{mcount} function, to record the
+program's call graph. At program startup, a memory buffer is allocated
+to store this call graph; the size of the buffer is calculated using a
+heuristic based on code size. If during execution, the buffer is found
+to be too small, profiling will be aborted and no @file{gmon.out} file
+will be produced. In that case, you will see the following message
+printed to standard error:
+
+@example
+mcount: call graph buffer size limit exceeded, gmon.out will not be generated
+@end example
+
+Most of the symbols discussed in this section are defined in the header
+@code{sys/gmon.h}. However, some symbols (for example @code{mcount})
+are not defined in any header file, since they are only intended to be
+called from code generated by the compiler.
+@end deftp
+
+@deftp Tunable glibc.mem.minarcs
+The heuristic for sizing the call graph buffer is known to be
+insufficient for small programs; hence, the calculated value is clamped
+to be at least a minimum size. The default minimum (in units of
+call graph entries, @code{struct tostruct}), is given by the macro
+@code{MINARCS}. If you have some program with an unusually complex
+call graph, for which the heuristic fails to allocate enough space,
+you can use this tunable to increase the minimum to a larger value.
+@end deftp
+
+@deftp Tunable glibc.mem.maxarcs
+To prevent excessive memory consumption when profiling very large
+programs, the call graph buffer is allowed to have a maximum of
+@code{MAXARCS} entries. For some very large programs, the default
+value of @code{MAXARCS} defined in @file{sys/gmon.h} is too small; in
+that case, you can use this tunable to increase it.
+
+Note the value of the @code{maxarcs} tunable must be greater or equal
+to that of the @code{minarcs} tunable; if this constraint is violated,
+a warning will printed to standard error at program startup, and
+the @code{minarcs} value will be used as the maximum as well.
+
+Setting either tunable too high may result in a call graph buffer
+whose size exceeds the available memory; in that case, an out of memory
+error will be printed at program startup, the profiler will be
+disabled, and no @file{gmon.out} file will be generated.
+@end deftp
diff --git a/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list b/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list
index 5c3c5292025607a1..265f82ef2be42fd0 100644
--- a/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list
@@ -24,3 +24,7 @@
# Tunables added in RHEL 8.8.0
@order glibc.rtld.dynamic_sort
+
+# Tunables added in RHEL 8.9.0
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs
diff --git a/sysdeps/unix/sysv/linux/i386/dl-tunables.list b/sysdeps/unix/sysv/linux/i386/dl-tunables.list
index b9cad4af62d9f2e5..9c1ccb86501c61e7 100644
--- a/sysdeps/unix/sysv/linux/i386/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/i386/dl-tunables.list
@@ -31,3 +31,7 @@
# Tunables added in RHEL 8.8.0
@order glibc.rtld.dynamic_sort
+
+# Tunables added in RHEL 8.9.0
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list
index ee1e6fca95e1f2da..c8bb1a8ec0283ac8 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list
@@ -24,3 +24,7 @@
# Tunables added in RHEL 8.8.0
@order glibc.rtld.dynamic_sort
+
+# Tunables added in RHEL 8.9.0
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list b/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
index 099e28d8f8e67944..85b3a014ffcadc45 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
@@ -23,3 +23,7 @@
# Tunables added in RHEL 8.8.0
@order glibc.rtld.dynamic_sort
+
+# Tunables added in RHEL 8.9.0
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list b/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list
index b9cad4af62d9f2e5..9c1ccb86501c61e7 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list
@@ -31,3 +31,7 @@
# Tunables added in RHEL 8.8.0
@order glibc.rtld.dynamic_sort
+
+# Tunables added in RHEL 8.9.0
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs

View File

@ -0,0 +1,191 @@
commit bde121872001d8f3224eeafa5b7effb871c3fbca
Author: Simon Kissane <skissane@gmail.com>
Date: Sat Feb 11 08:58:02 2023 +1100
gmon: fix memory corruption issues [BZ# 30101]
V2 of this patch fixes an issue in V1, where the state was changed to ON not
OFF at end of _mcleanup. I hadn't noticed that (counterintuitively) ON=0 and
OFF=3, hence zeroing the buffer turned it back on. So set the state to OFF
after the memset.
1. Prevent double free, and reads from unallocated memory, when
_mcleanup is (incorrectly) called two or more times in a row,
without an intervening call to __monstartup; with this patch, the
second and subsequent calls effectively become no-ops instead.
While setting tos=NULL is minimal fix, safest action is to zero the
whole gmonparam buffer.
2. Prevent memory leak when __monstartup is (incorrectly) called two
or more times in a row, without an intervening call to _mcleanup;
with this patch, the second and subsequent calls effectively become
no-ops instead.
3. After _mcleanup, treat __moncontrol(1) as __moncontrol(0) instead.
With zeroing of gmonparam buffer in _mcleanup, this stops the
state incorrectly being changed to GMON_PROF_ON despite profiling
actually being off. If we'd just done the minimal fix to _mcleanup
of setting tos=NULL, there is risk of far worse memory corruption:
kcount would point to deallocated memory, and the __profil syscall
would make the kernel write profiling data into that memory,
which could have since been reallocated to something unrelated.
4. Ensure __moncontrol(0) still turns off profiling even in error
state. Otherwise, if mcount overflows and sets state to
GMON_PROF_ERROR, when _mcleanup calls __moncontrol(0), the __profil
syscall to disable profiling will not be invoked. _mcleanup will
free the buffer, but the kernel will still be writing profiling
data into it, potentially corrupted arbitrary memory.
Also adds a test case for (1). Issues (2)-(4) are not feasible to test.
Signed-off-by: Simon Kissane <skissane@gmail.com>
Reviewed-by: DJ Delorie <dj@redhat.com>
Conflicts:
gmon/Makefile
(copyright year update)
diff --git a/gmon/Makefile b/gmon/Makefile
index 54f05894d4dd8c4a..1bc4ad6e14e292a9 100644
--- a/gmon/Makefile
+++ b/gmon/Makefile
@@ -1,4 +1,5 @@
-# Copyright (C) 1995-2018 Free Software Foundation, Inc.
+# Copyright (C) 1995-2023 Free Software Foundation, Inc.
+# Copyright The GNU Toolchain Authors.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
@@ -25,7 +26,7 @@ include ../Makeconfig
headers := sys/gmon.h sys/gmon_out.h sys/profil.h
routines := gmon mcount profil sprofil prof-freq
-tests = tst-sprofil tst-gmon tst-mcount-overflow
+tests = tst-sprofil tst-gmon tst-mcount-overflow tst-mcleanup
ifeq ($(build-profile),yes)
tests += tst-profile-static
tests-static += tst-profile-static
@@ -68,6 +69,14 @@ ifeq ($(run-built-tests),yes)
tests-special += $(objpfx)tst-mcount-overflow-check.out
endif
+CFLAGS-tst-mcleanup.c := -fno-omit-frame-pointer -pg
+tst-mcleanup-no-pie = yes
+CRT-tst-mcleanup := $(csu-objpfx)g$(start-installed-name)
+tst-mcleanup-ENV := GMON_OUT_PREFIX=$(objpfx)tst-mcleanup.data
+ifeq ($(run-built-tests),yes)
+tests-special += $(objpfx)tst-mcleanup.out
+endif
+
CFLAGS-tst-gmon-static.c := $(PIE-ccflag) -fno-omit-frame-pointer -pg
CRT-tst-gmon-static := $(csu-objpfx)gcrt1.o
tst-gmon-static-no-pie = yes
@@ -123,6 +132,10 @@ $(objpfx)tst-mcount-overflow-check.out: tst-mcount-overflow-check.sh $(objpfx)ts
$(SHELL) $< $(objpfx)tst-mcount-overflow > $@; \
$(evaluate-test)
+$(objpfx)tst-mcleanup.out: clean-tst-mcleanup-data
+clean-tst-mcleanup-data:
+ rm -f $(objpfx)tst-mcleanup.data.*
+
$(objpfx)tst-gmon-gprof.out: tst-gmon-gprof.sh $(objpfx)tst-gmon.out
$(SHELL) $< $(GPROF) $(objpfx)tst-gmon $(objpfx)tst-gmon.data.* > $@; \
$(evaluate-test)
diff --git a/gmon/gmon.c b/gmon/gmon.c
index 689bf80141e559ca..5e99a7351dc71666 100644
--- a/gmon/gmon.c
+++ b/gmon/gmon.c
@@ -102,11 +102,8 @@ __moncontrol (int mode)
{
struct gmonparam *p = &_gmonparam;
- /* Don't change the state if we ran into an error. */
- if (p->state == GMON_PROF_ERROR)
- return;
-
- if (mode)
+ /* Treat start request as stop if error or gmon not initialized. */
+ if (mode && p->state != GMON_PROF_ERROR && p->tos != NULL)
{
/* start */
__profil((void *) p->kcount, p->kcountsize, p->lowpc, s_scale);
@@ -116,7 +113,9 @@ __moncontrol (int mode)
{
/* stop */
__profil(NULL, 0, 0, 0);
- p->state = GMON_PROF_OFF;
+ /* Don't change the state if we ran into an error. */
+ if (p->state != GMON_PROF_ERROR)
+ p->state = GMON_PROF_OFF;
}
}
libc_hidden_def (__moncontrol)
@@ -146,6 +145,14 @@ __monstartup (u_long lowpc, u_long highpc)
maxarcs = MAXARCS;
#endif
+ /*
+ * If we are incorrectly called twice in a row (without an
+ * intervening call to _mcleanup), ignore the second call to
+ * prevent leaking memory.
+ */
+ if (p->tos != NULL)
+ return;
+
/*
* round lowpc and highpc to multiples of the density we're using
* so the rest of the scaling (here and in gprof) stays in ints.
@@ -463,9 +470,14 @@ _mcleanup (void)
{
__moncontrol (0);
- if (_gmonparam.state != GMON_PROF_ERROR)
+ if (_gmonparam.state != GMON_PROF_ERROR && _gmonparam.tos != NULL)
write_gmon ();
/* free the memory. */
free (_gmonparam.tos);
+
+ /* reset buffer to initial state for safety */
+ memset(&_gmonparam, 0, sizeof _gmonparam);
+ /* somewhat confusingly, ON=0, OFF=3 */
+ _gmonparam.state = GMON_PROF_OFF;
}
diff --git a/gmon/tst-mcleanup.c b/gmon/tst-mcleanup.c
new file mode 100644
index 0000000000000000..b259653ec833aca4
--- /dev/null
+++ b/gmon/tst-mcleanup.c
@@ -0,0 +1,31 @@
+/* Test program for repeated invocation of _mcleanup
+ Copyright The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Intentionally calls _mcleanup() twice: once manually, it will be
+ called again as an atexit handler. This is incorrect use of the API,
+ but the point of the test is to make sure we don't crash when the
+ API is misused in this way. */
+
+#include <sys/gmon.h>
+
+int
+main (void)
+{
+ _mcleanup();
+ return 0;
+}

View File

@ -0,0 +1,216 @@
From af992e7abdc9049714da76cae1e5e18bc4838fb8 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Wed, 7 Jun 2023 13:18:01 -0500
Subject: [PATCH] x86: Increase `non_temporal_threshold` to roughly `sizeof_L3
/ 4`
Content-type: text/plain; charset=UTF-8
Current `non_temporal_threshold` set to roughly '3/4 * sizeof_L3 /
ncores_per_socket'. This patch updates that value to roughly
'sizeof_L3 / 4`
The original value (specifically dividing the `ncores_per_socket`) was
done to limit the amount of other threads' data a `memcpy`/`memset`
could evict.
Dividing by 'ncores_per_socket', however leads to exceedingly low
non-temporal thresholds and leads to using non-temporal stores in
cases where REP MOVSB is multiple times faster.
Furthermore, non-temporal stores are written directly to main memory
so using it at a size much smaller than L3 can place soon to be
accessed data much further away than it otherwise could be. As well,
modern machines are able to detect streaming patterns (especially if
REP MOVSB is used) and provide LRU hints to the memory subsystem. This
in affect caps the total amount of eviction at 1/cache_associativity,
far below meaningfully thrashing the entire cache.
As best I can tell, the benchmarks that lead this small threshold
where done comparing non-temporal stores versus standard cacheable
stores. A better comparison (linked below) is to be REP MOVSB which,
on the measure systems, is nearly 2x faster than non-temporal stores
at the low-end of the previous threshold, and within 10% for over
100MB copies (well past even the current threshold). In cases with a
low number of threads competing for bandwidth, REP MOVSB is ~2x faster
up to `sizeof_L3`.
The divisor of `4` is a somewhat arbitrary value. From benchmarks it
seems Skylake and Icelake both prefer a divisor of `2`, but older CPUs
such as Broadwell prefer something closer to `8`. This patch is meant
to be followed up by another one to make the divisor cpu-specific, but
in the meantime (and for easier backporting), this patch settles on
`4` as a middle-ground.
Benchmarks comparing non-temporal stores, REP MOVSB, and cacheable
stores where done using:
https://github.com/goldsteinn/memcpy-nt-benchmarks
Sheets results (also available in pdf on the github):
https://docs.google.com/spreadsheets/d/e/2PACX-1vS183r0rW_jRX6tG_E90m9qVuFiMbRIJvi5VAE8yYOvEOIEEc3aSNuEsrFbuXw5c3nGboxMmrupZD7K/pubhtml
Reviewed-by: DJ Delorie <dj@redhat.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
---
sysdeps/x86/dl-cacheinfo.h | 70 +++++++++++++++++++++++---------------
1 file changed, 43 insertions(+), 27 deletions(-)
[DJ - ported to C8S]
diff -rup a/sysdeps/x86/cacheinfo.h b/sysdeps/x86/cacheinfo.h
--- a/sysdeps/x86/cacheinfo.h 2023-08-08 11:54:09.969791421 -0400
+++ b/sysdeps/x86/cacheinfo.h 2023-08-08 13:44:55.185333601 -0400
@@ -46,7 +46,7 @@ long int __x86_rep_movsb_threshold attri
long int __x86_rep_stosb_threshold attribute_hidden = 2048;
static void
-get_common_cache_info (long int *shared_ptr, unsigned int *threads_ptr,
+get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, unsigned int *threads_ptr,
long int core)
{
unsigned int eax;
@@ -65,6 +65,7 @@ get_common_cache_info (long int *shared_
unsigned int family = cpu_features->basic.family;
unsigned int model = cpu_features->basic.model;
long int shared = *shared_ptr;
+ long int shared_per_thread = *shared_per_thread_ptr;
unsigned int threads = *threads_ptr;
bool inclusive_cache = true;
bool support_count_mask = true;
@@ -80,6 +81,7 @@ get_common_cache_info (long int *shared_
/* Try L2 otherwise. */
level = 2;
shared = core;
+ shared_per_thread = core;
threads_l2 = 0;
threads_l3 = -1;
}
@@ -236,29 +238,28 @@ get_common_cache_info (long int *shared_
}
else
{
-intel_bug_no_cache_info:
- /* Assume that all logical threads share the highest cache
- level. */
- threads
- = ((cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx
- >> 16) & 0xff);
- }
-
- /* Cap usage of highest cache level to the number of supported
- threads. */
- if (shared > 0 && threads > 0)
- shared /= threads;
+ intel_bug_no_cache_info:
+ /* Assume that all logical threads share the highest cache
+ level. */
+ threads = ((cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx >> 16)
+ & 0xff);
+
+ /* Get per-thread size of highest level cache. */
+ if (shared_per_thread > 0 && threads > 0)
+ shared_per_thread /= threads;
+ }
}
/* Account for non-inclusive L2 and L3 caches. */
if (!inclusive_cache)
{
if (threads_l2 > 0)
- core /= threads_l2;
+ shared_per_thread += core / threads_l2;
shared += core;
}
*shared_ptr = shared;
+ *shared_per_thread_ptr = shared_per_thread;
*threads_ptr = threads;
}
@@ -272,6 +273,7 @@ init_cacheinfo (void)
int max_cpuid_ex;
long int data = -1;
long int shared = -1;
+ long int shared_per_thread = -1;
long int core;
unsigned int threads = 0;
const struct cpu_features *cpu_features = __get_cpu_features ();
@@ -287,22 +289,25 @@ init_cacheinfo (void)
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
+ shared_per_thread = shared;
- get_common_cache_info (&shared, &threads, core);
+ get_common_cache_info (&shared, &shared_per_thread, &threads, core);
}
else if (cpu_features->basic.kind == arch_kind_zhaoxin)
{
data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
+ shared_per_thread = shared;
- get_common_cache_info (&shared, &threads, core);
+ get_common_cache_info (&shared, &shared_per_thread, &threads, core);
}
else if (cpu_features->basic.kind == arch_kind_amd)
{
data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
+ shared_per_thread = shared;
/* Get maximum extended function. */
__cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
@@ -352,6 +357,9 @@ init_cacheinfo (void)
shared += core;
}
}
+
+ if (shared_per_thread <= 0)
+ shared_per_thread = shared;
}
if (cpu_features->data_cache_size != 0)
@@ -380,20 +388,30 @@ init_cacheinfo (void)
__x86_shared_cache_size = shared;
}
- /* The default setting for the non_temporal threshold is 3/4 of one
- thread's share of the chip's cache. For most Intel and AMD processors
- with an initial release date between 2017 and 2020, a thread's typical
- share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4
- threshold leaves 125 KBytes to 500 KBytes of the thread's data
- in cache after a maximum temporal copy, which will maintain
- in cache a reasonable portion of the thread's stack and other
- active data. If the threshold is set higher than one thread's
- share of the cache, it has a substantial risk of negatively
- impacting the performance of other threads running on the chip. */
+ /* The default setting for the non_temporal threshold is 1/4 of size
+ of the chip's cache. For most Intel and AMD processors with an
+ initial release date between 2017 and 2023, a thread's typical
+ share of the cache is from 18-64MB. Using the 1/4 L3 is meant to
+ estimate the point where non-temporal stores begin out-competing
+ REP MOVSB. As well the point where the fact that non-temporal
+ stores are forced back to main memory would already occurred to the
+ majority of the lines in the copy. Note, concerns about the
+ entire L3 cache being evicted by the copy are mostly alleviated
+ by the fact that modern HW detects streaming patterns and
+ provides proper LRU hints so that the maximum thrashing
+ capped at 1/associativity. */
+ unsigned long int non_temporal_threshold = shared / 4;
+ /* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run
+ a higher risk of actually thrashing the cache as they don't have a HW LRU
+ hint. As well, their performance in highly parallel situations is
+ noticeably worse. */
+ if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+ non_temporal_threshold = shared_per_thread * 3 / 4;
+
__x86_shared_non_temporal_threshold
= (cpu_features->non_temporal_threshold != 0
? cpu_features->non_temporal_threshold
- : __x86_shared_cache_size * 3 / 4);
+ : non_temporal_threshold);
/* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */
unsigned int minimum_rep_movsb_threshold;
Only in b/sysdeps/x86: cacheinfo.h~

View File

@ -0,0 +1,47 @@
From 47f747217811db35854ea06741be3685e8bbd44d Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Mon, 17 Jul 2023 23:14:33 -0500
Subject: [PATCH] x86: Fix slight bug in `shared_per_thread` cache size
calculation.
Content-type: text/plain; charset=UTF-8
After:
```
commit af992e7abdc9049714da76cae1e5e18bc4838fb8
Author: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Wed Jun 7 13:18:01 2023 -0500
x86: Increase `non_temporal_threshold` to roughly `sizeof_L3 / 4`
```
Split `shared` (cumulative cache size) from `shared_per_thread` (cache
size per socket), the `shared_per_thread` *can* be slightly off from
the previous calculation.
Previously we added `core` even if `threads_l2` was invalid, and only
used `threads_l2` to divide `core` if it was present. The changed
version only included `core` if `threads_l2` was valid.
This change restores the old behavior if `threads_l2` is invalid by
adding the entire value of `core`.
Reviewed-by: DJ Delorie <dj@redhat.com>
---
sysdeps/x86/dl-cacheinfo.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
[DJ - ported to C8S]
diff -rup b1/sysdeps/x86/cacheinfo.h b2/sysdeps/x86/cacheinfo.h
--- b1/sysdeps/x86/cacheinfo.h 2023-08-08 13:44:55.185333601 -0400
+++ b2/sysdeps/x86/cacheinfo.h 2023-08-08 13:55:16.474680016 -0400
@@ -253,8 +253,8 @@ get_common_cache_info (long int *shared_
/* Account for non-inclusive L2 and L3 caches. */
if (!inclusive_cache)
{
- if (threads_l2 > 0)
- shared_per_thread += core / threads_l2;
+ long int core_per_thread = threads_l2 > 0 ? (core / threads_l2) : core;
+ shared_per_thread += core_per_thread;
shared += core;
}

View File

@ -0,0 +1,44 @@
From 8b9a0af8ca012217bf90d1dc0694f85b49ae09da Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Tue, 18 Jul 2023 10:27:59 -0500
Subject: [PATCH] [PATCH v1] x86: Use `3/4*sizeof(per-thread-L3)` as low bound
for NT threshold.
Content-type: text/plain; charset=UTF-8
On some machines we end up with incomplete cache information. This can
make the new calculation of `sizeof(total-L3)/custom-divisor` end up
lower than intended (and lower than the prior value). So reintroduce
the old bound as a lower bound to avoid potentially regressing code
where we don't have complete information to make the decision.
Reviewed-by: DJ Delorie <dj@redhat.com>
---
sysdeps/x86/dl-cacheinfo.h | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
[DJ - ported to C8S]
diff -rup b2/sysdeps/x86/cacheinfo.h b3/sysdeps/x86/cacheinfo.h
--- b2/sysdeps/x86/cacheinfo.h 2023-08-08 13:55:16.474680016 -0400
+++ b3/sysdeps/x86/cacheinfo.h 2023-08-08 13:59:14.507988958 -0400
@@ -401,12 +401,20 @@ init_cacheinfo (void)
provides proper LRU hints so that the maximum thrashing
capped at 1/associativity. */
unsigned long int non_temporal_threshold = shared / 4;
+ /* If the computed non_temporal_threshold <= 3/4 * per-thread L3, we most
+ likely have incorrect/incomplete cache info in which case, default to
+ 3/4 * per-thread L3 to avoid regressions. */
+ unsigned long int non_temporal_threshold_lowbound
+ = shared_per_thread * 3 / 4;
+ if (non_temporal_threshold < non_temporal_threshold_lowbound)
+ non_temporal_threshold = non_temporal_threshold_lowbound;
+
/* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run
a higher risk of actually thrashing the cache as they don't have a HW LRU
hint. As well, their performance in highly parallel situations is
noticeably worse. */
if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS))
- non_temporal_threshold = shared_per_thread * 3 / 4;
+ non_temporal_threshold = non_temporal_threshold_lowbound;
__x86_shared_non_temporal_threshold
= (cpu_features->non_temporal_threshold != 0

View File

@ -0,0 +1,39 @@
Adjusted for backport to c8s by modifying sysdeps/x86/cacheinfo.h.
commit 885a7f0feee951f514a121788f46f33b2867110f
Author: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Fri Aug 11 12:29:11 2023 -0500
x86: Fix incorrect scope of setting `shared_per_thread` [BZ# 30745]
The:
```
if (shared_per_thread > 0 && threads > 0)
shared_per_thread /= threads;
```
Code was accidentally moved to inside the else scope. This doesn't
match how it was previously (before af992e7abd).
This patch fixes that by putting the division after the `else` block.
diff --git a/sysdeps/x86/cacheinfo.h b/sysdeps/x86/cacheinfo.h
index 4dbfa979ef052eaa..e53fa25106c95253 100644
--- a/sysdeps/x86/cacheinfo.h
+++ b/sysdeps/x86/cacheinfo.h
@@ -243,11 +243,10 @@ get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, u
level. */
threads = ((cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx >> 16)
& 0xff);
-
- /* Get per-thread size of highest level cache. */
- if (shared_per_thread > 0 && threads > 0)
- shared_per_thread /= threads;
}
+ /* Get per-thread size of highest level cache. */
+ if (shared_per_thread > 0 && threads > 0)
+ shared_per_thread /= threads;
}
/* Account for non-inclusive L2 and L3 caches. */

View File

@ -0,0 +1,35 @@
commit 5d1ccdda7b0c625751661d50977f3dfbc73f8eae
Author: Florian Weimer <fweimer@redhat.com>
Date: Mon Apr 3 17:23:11 2023 +0200
x86_64: Fix asm constraints in feraiseexcept (bug 30305)
The divss instruction clobbers its first argument, and the constraints
need to reflect that. Fortunately, with GCC 12, generated code does
not actually change, so there is no externally visible bug.
Suggested-by: Jakub Jelinek <jakub@redhat.com>
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
diff --git a/sysdeps/x86_64/fpu/fraiseexcpt.c b/sysdeps/x86_64/fpu/fraiseexcpt.c
index ca1c223053bf016b..fb886ed540b52100 100644
--- a/sysdeps/x86_64/fpu/fraiseexcpt.c
+++ b/sysdeps/x86_64/fpu/fraiseexcpt.c
@@ -33,7 +33,7 @@ __feraiseexcept (int excepts)
/* One example of an invalid operation is 0.0 / 0.0. */
float f = 0.0;
- __asm__ __volatile__ ("divss %0, %0 " : : "x" (f));
+ __asm__ __volatile__ ("divss %0, %0 " : "+x" (f));
(void) &f;
}
@@ -43,7 +43,7 @@ __feraiseexcept (int excepts)
float f = 1.0;
float g = 0.0;
- __asm__ __volatile__ ("divss %1, %0" : : "x" (f), "x" (g));
+ __asm__ __volatile__ ("divss %1, %0" : "+x" (f) : "x" (g));
(void) &f;
}

View File

@ -0,0 +1,28 @@
Apply a fix similar to upstream commit 5d1ccdda7b0c625751661d50977f3dfbc73f8eae
to the installed header file. Upstream, the header file has been removed
in its uncorrected state, so there is no upstream fix to backport.
Suggested by Jakub Jelinek.
diff --git a/sysdeps/x86/fpu/bits/fenv.h b/sysdeps/x86/fpu/bits/fenv.h
index 4103982d8c8ae014..4ae2d2a04c6754bd 100644
--- a/sysdeps/x86/fpu/bits/fenv.h
+++ b/sysdeps/x86/fpu/bits/fenv.h
@@ -132,7 +132,7 @@ __NTH (__feraiseexcept_invalid_divbyzero (int __excepts))
float __f = 0.0;
# ifdef __SSE_MATH__
- __asm__ __volatile__ ("divss %0, %0 " : : "x" (__f));
+ __asm__ __volatile__ ("divss %0, %0 " : "+x" (__f));
# else
__asm__ __volatile__ ("fdiv %%st, %%st(0); fwait"
: "=t" (__f) : "0" (__f));
@@ -145,7 +145,7 @@ __NTH (__feraiseexcept_invalid_divbyzero (int __excepts))
float __g = 0.0;
# ifdef __SSE_MATH__
- __asm__ __volatile__ ("divss %1, %0" : : "x" (__f), "x" (__g));
+ __asm__ __volatile__ ("divss %1, %0" : "+x" (__f) : "x" (__g));
# else
__asm__ __volatile__ ("fdivp %%st, %%st(1); fwait"
: "=t" (__f) : "0" (__f), "u" (__g) : "st(1)");

View File

@ -0,0 +1,46 @@
Only backport po/it.po and po/ja.po changes for the ESTALE message
translation which we use during CI testing.
commit 7ff33eca6860648fb909df954da4996ce853d01d
Author: Carlos O'Donell <carlos@redhat.com>
Date: Fri Jul 7 11:27:08 2023 -0400
Translations: Add new ro support and update others.
This brings in the new Romanian language translations, and updates
nine other translations. Important translations in this update
include the Italian and Japanese translations for ESTALE which
remove the mention of "NFS" from the error message translation.
diff --git a/po/it.po b/po/it.po
index 2750575a1082f1db..6c2be3a4df5611ff 100644
--- a/po/it.po
+++ b/po/it.po
@@ -6793,10 +6793,8 @@ msgstr "Quota disco superata"
#. TRANS Repairing this condition usually requires unmounting, possibly repairing
#. TRANS and remounting the file system.
#: sysdeps/gnu/errlist.c:788
-#, fuzzy
-#| msgid "Stale NFS file handle"
msgid "Stale file handle"
-msgstr "Gestione del file NFS interrotta"
+msgstr "Riferimento al file obsoleto"
# lf
#. TRANS An attempt was made to NFS-mount a remote file system with a file name that
diff --git a/po/ja.po b/po/ja.po
index bd9b7ffbbd3e3bf6..8fb598c5edbc5891 100644
--- a/po/ja.po
+++ b/po/ja.po
@@ -6360,10 +6360,8 @@ msgstr "ディスク使用量制限を超過しました"
#. TRANS Repairing this condition usually requires unmounting, possibly repairing
#. TRANS and remounting the file system.
#: sysdeps/gnu/errlist.c:788
-#, fuzzy
-#| msgid "Stale NFS file handle"
msgid "Stale file handle"
-msgstr "実効性のないNFSファイルハンドルです"
+msgstr "古いファイルハンドルです"
#. TRANS An attempt was made to NFS-mount a remote file system with a file name that
#. TRANS already specifies an NFS-mounted file.

View File

@ -0,0 +1,26 @@
From abcf8db7fa46b73fd5b8193ce11f9312301b84c7 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Wed, 7 Jun 2023 11:21:48 +0200
Subject: resolv_conf: release lock on allocation failure (bug 30527)
When the initial allocation of global fails, the local lock is left
locked.
Reported by Steffen Lammel of SAP HANA development.
diff --git a/resolv/resolv_conf.c b/resolv/resolv_conf.c
index bd5890773b..8bc9edc634 100644
--- a/resolv/resolv_conf.c
+++ b/resolv/resolv_conf.c
@@ -93,7 +93,10 @@ get_locked_global (void)
{
global_copy = calloc (1, sizeof (*global));
if (global_copy == NULL)
- return NULL;
+ {
+ __libc_lock_unlock (lock);
+ return NULL;
+ }
atomic_store_relaxed (&global, global_copy);
resolv_conf_array_init (&global_copy->array);
}

View File

@ -0,0 +1,27 @@
commit 0fda2a41baf7e978d07322aa278e964f4dce8802
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Jul 20 18:31:48 2023 +0200
debug: Mark libSegFault.so as NODELETE
The signal handler installed in the ELF constructor cannot easily
be removed again (because the program may have changed handlers
in the meantime). Mark the object as NODELETE so that the registered
handler function is never unloaded.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
(cherry picked from commit 23ee92deea4c99d0e6a5f48fa7b942909b123ec5)
diff --git a/debug/Makefile b/debug/Makefile
index b0f0b7beb6d5cef5..8bce89ddcd0a61ed 100644
--- a/debug/Makefile
+++ b/debug/Makefile
@@ -213,6 +213,8 @@ extra-libs-others = $(extra-libs)
libSegFault-routines = segfault
libSegFault-inhibit-o = $(filter-out .os,$(object-suffixes))
+# libSegFault.so installs a signal handler in its ELF constructor.
+LDFLAGS-SegFault.so = -Wl,--enable-new-dtags,-z,nodelete
libpcprofile-routines = pcprofile
libpcprofile-inhibit-o = $(filter-out .os,$(object-suffixes))

View File

@ -0,0 +1,37 @@
commit 7b5bfe77836442b9aeb75cc520f0d1eb7f82be67
Author: Florian Weimer <fweimer@redhat.com>
Date: Mon May 18 15:21:04 2020 +0200
elf: Assert that objects are relocated before their constructors run
If we try to run constructors before relocation, this is always
a dynamic linker bug. An assert is easier to notice than a call
via an invalid function pointer (which may not even produce a valid
call stack).
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/elf/dl-init.c b/elf/dl-init.c
index 45405cd0563845b4..99ce531d7b326f5f 100644
--- a/elf/dl-init.c
+++ b/elf/dl-init.c
@@ -16,6 +16,7 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <assert.h>
#include <stddef.h>
#include <ldsodefs.h>
@@ -27,6 +28,11 @@ typedef void (*init_t) (int, char **, char **);
static void
call_init (struct link_map *l, int argc, char **argv, char **env)
{
+ /* If the object has not been relocated, this is a bug. The
+ function pointers are invalid in this case. (Executables do not
+ need relocation, and neither do proxy objects.) */
+ assert (l->l_real->l_relocated || l->l_real->l_type == lt_executable);
+
if (l->l_init_called)
/* This object is all done. */
return;

View File

@ -0,0 +1,237 @@
commit 6f360366f7f76b158a0f4bf20d42f2854ad56264
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Oct 27 11:36:44 2022 +0200
elf: Introduce to _dl_call_fini
This consolidates the destructor invocations from _dl_fini and
dlclose. Remove the micro-optimization that avoids
calling _dl_call_fini if they are no destructors (as dlclose is quite
expensive anyway). The debug log message is now printed
unconditionally.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Conflicts:
elf/dl-fini.c
(Missing ELF_INITFINI support downstream.)
sysdeps/generic/ldsodefs.h
(Missing dl_init_t declaration downstream.)
diff --git a/elf/Makefile b/elf/Makefile
index 634c3113227d64a6..040d82e243a80c0f 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -50,6 +50,7 @@ routines = \
# profiled libraries.
dl-routines = \
dl-call-libc-early-init \
+ dl-call_fini \
dl-close \
dl-debug \
dl-deps \
diff --git a/elf/dl-call_fini.c b/elf/dl-call_fini.c
new file mode 100644
index 0000000000000000..9e7ba10fa2a4df77
--- /dev/null
+++ b/elf/dl-call_fini.c
@@ -0,0 +1,50 @@
+/* Invoke DT_FINI and DT_FINI_ARRAY callbacks.
+ Copyright (C) 1996-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <sysdep.h>
+
+void
+_dl_call_fini (void *closure_map)
+{
+ struct link_map *map = closure_map;
+
+ /* When debugging print a message first. */
+ if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS))
+ _dl_debug_printf ("\ncalling fini: %s [%lu]\n\n", map->l_name, map->l_ns);
+
+ /* Make sure nothing happens if we are called twice. */
+ map->l_init_called = 0;
+
+ ElfW(Dyn) *fini_array = map->l_info[DT_FINI_ARRAY];
+ if (fini_array != NULL)
+ {
+ ElfW(Addr) *array = (ElfW(Addr) *) (map->l_addr
+ + fini_array->d_un.d_ptr);
+ size_t sz = (map->l_info[DT_FINI_ARRAYSZ]->d_un.d_val
+ / sizeof (ElfW(Addr)));
+
+ while (sz-- > 0)
+ ((fini_t) array[sz]) ();
+ }
+
+ /* Next try the old-style destructor. */
+ ElfW(Dyn) *fini = map->l_info[DT_FINI];
+ if (fini != NULL)
+ DL_CALL_DT_FINI (map, ((void *) map->l_addr + fini->d_un.d_ptr));
+}
diff --git a/elf/dl-close.c b/elf/dl-close.c
index 22225efb3226c3e1..26ea51dfbadc5b85 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -35,11 +35,6 @@
#include <dl-unmap-segments.h>
-
-/* Type of the constructor functions. */
-typedef void (*fini_t) (void);
-
-
/* Special l_idx value used to indicate which objects remain loaded. */
#define IDX_STILL_USED -1
@@ -109,31 +104,6 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp,
return false;
}
-/* Invoke dstructors for CLOSURE (a struct link_map *). Called with
- exception handling temporarily disabled, to make errors fatal. */
-static void
-call_destructors (void *closure)
-{
- struct link_map *map = closure;
-
- if (map->l_info[DT_FINI_ARRAY] != NULL)
- {
- ElfW(Addr) *array =
- (ElfW(Addr) *) (map->l_addr
- + map->l_info[DT_FINI_ARRAY]->d_un.d_ptr);
- unsigned int sz = (map->l_info[DT_FINI_ARRAYSZ]->d_un.d_val
- / sizeof (ElfW(Addr)));
-
- while (sz-- > 0)
- ((fini_t) array[sz]) ();
- }
-
- /* Next try the old-style destructor. */
- if (map->l_info[DT_FINI] != NULL)
- DL_CALL_DT_FINI (map, ((void *) map->l_addr
- + map->l_info[DT_FINI]->d_un.d_ptr));
-}
-
void
_dl_close_worker (struct link_map *map, bool force)
{
@@ -279,17 +249,7 @@ _dl_close_worker (struct link_map *map, bool force)
half-cooked objects. Temporarily disable exception
handling, so that errors are fatal. */
if (imap->l_init_called)
- {
- /* When debugging print a message first. */
- if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS,
- 0))
- _dl_debug_printf ("\ncalling fini: %s [%lu]\n\n",
- imap->l_name, nsid);
-
- if (imap->l_info[DT_FINI_ARRAY] != NULL
- || imap->l_info[DT_FINI] != NULL)
- _dl_catch_exception (NULL, call_destructors, imap);
- }
+ _dl_catch_exception (NULL, _dl_call_fini, imap);
#ifdef SHARED
/* Auditing checkpoint: we remove an object. */
diff --git a/elf/dl-fini.c b/elf/dl-fini.c
index e14259a3c8806e0d..2d34658d4c3a470c 100644
--- a/elf/dl-fini.c
+++ b/elf/dl-fini.c
@@ -20,11 +20,6 @@
#include <string.h>
#include <ldsodefs.h>
-
-/* Type of the constructor functions. */
-typedef void (*fini_t) (void);
-
-
void
_dl_fini (void)
{
@@ -115,38 +110,7 @@ _dl_fini (void)
if (l->l_init_called)
{
- /* Make sure nothing happens if we are called twice. */
- l->l_init_called = 0;
-
- /* Is there a destructor function? */
- if (l->l_info[DT_FINI_ARRAY] != NULL
- || l->l_info[DT_FINI] != NULL)
- {
- /* When debugging print a message first. */
- if (__builtin_expect (GLRO(dl_debug_mask)
- & DL_DEBUG_IMPCALLS, 0))
- _dl_debug_printf ("\ncalling fini: %s [%lu]\n\n",
- DSO_FILENAME (l->l_name),
- ns);
-
- /* First see whether an array is given. */
- if (l->l_info[DT_FINI_ARRAY] != NULL)
- {
- ElfW(Addr) *array =
- (ElfW(Addr) *) (l->l_addr
- + l->l_info[DT_FINI_ARRAY]->d_un.d_ptr);
- unsigned int i = (l->l_info[DT_FINI_ARRAYSZ]->d_un.d_val
- / sizeof (ElfW(Addr)));
- while (i-- > 0)
- ((fini_t) array[i]) ();
- }
-
- /* Next try the old-style destructor. */
- if (l->l_info[DT_FINI] != NULL)
- DL_CALL_DT_FINI
- (l, l->l_addr + l->l_info[DT_FINI]->d_un.d_ptr);
- }
-
+ _dl_call_fini (l);
#ifdef SHARED
/* Auditing checkpoint: another object closed. */
_dl_audit_objclose (l);
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 29bbde3e83e37d7e..0bad34d44a5685d9 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -93,6 +93,9 @@ typedef struct link_map *lookup_t;
: (__glibc_unlikely ((ref)->st_shndx == SHN_ABS) ? 0 \
: LOOKUP_VALUE_ADDRESS (map, map_set)) + (ref)->st_value)
+/* Type of a constructor function, in DT_FINI, DT_FINI_ARRAY. */
+typedef void (*fini_t) (void);
+
/* On some architectures a pointer to a function is not just a pointer
to the actual code of the function but rather an architecture
specific descriptor. */
@@ -1047,6 +1050,11 @@ extern void _dl_init (struct link_map *main_map, int argc, char **argv,
initializer functions have completed. */
extern void _dl_fini (void) attribute_hidden;
+/* Invoke the DT_FINI_ARRAY and DT_FINI destructors for MAP, which
+ must be a struct link_map *. Can be used as an argument to
+ _dl_catch_exception. */
+void _dl_call_fini (void *map) attribute_hidden;
+
/* Sort array MAPS according to dependencies of the contained objects.
If FORCE_FIRST, MAPS[0] keeps its place even if the dependencies
say otherwise. */

View File

@ -0,0 +1,30 @@
commit f6c8204fd7fabf0cf4162eaf10ccf23258e4d10e
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Aug 22 13:56:25 2023 +0200
elf: Do not run constructors for proxy objects
Otherwise, the ld.so constructor runs for each audit namespace
and each dlmopen namespace.
diff --git a/elf/dl-init.c b/elf/dl-init.c
index 99ce531d7b326f5f..73c0259fbe6d19af 100644
--- a/elf/dl-init.c
+++ b/elf/dl-init.c
@@ -28,10 +28,14 @@ typedef void (*init_t) (int, char **, char **);
static void
call_init (struct link_map *l, int argc, char **argv, char **env)
{
+ /* Do not run constructors for proxy objects. */
+ if (l != l->l_real)
+ return;
+
/* If the object has not been relocated, this is a bug. The
function pointers are invalid in this case. (Executables do not
- need relocation, and neither do proxy objects.) */
- assert (l->l_real->l_relocated || l->l_real->l_type == lt_executable);
+ need relocation.) */
+ assert (l->l_relocated || l->l_type == lt_executable);
if (l->l_init_called)
/* This object is all done. */

View File

@ -0,0 +1,637 @@
commit 6985865bc3ad5b23147ee73466583dd7fdf65892
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Sep 8 12:32:14 2023 +0200
elf: Always call destructors in reverse constructor order (bug 30785)
The current implementation of dlclose (and process exit) re-sorts the
link maps before calling ELF destructors. Destructor order is not the
reverse of the constructor order as a result: The second sort takes
relocation dependencies into account, and other differences can result
from ambiguous inputs, such as cycles. (The force_first handling in
_dl_sort_maps is not effective for dlclose.) After the changes in
this commit, there is still a required difference due to
dlopen/dlclose ordering by the application, but the previous
discrepancies went beyond that.
A new global (namespace-spanning) list of link maps,
_dl_init_called_list, is updated right before ELF constructors are
called from _dl_init.
In dl_close_worker, the maps variable, an on-stack variable length
array, is eliminated. (VLAs are problematic, and dlclose should not
call malloc because it cannot readily deal with malloc failure.)
Marking still-used objects uses the namespace list directly, with
next and next_idx replacing the done_index variable.
After marking, _dl_init_called_list is used to call the destructors
of now-unused maps in reverse destructor order. These destructors
can call dlopen. Previously, new objects do not have l_map_used set.
This had to change: There is no copy of the link map list anymore,
so processing would cover newly opened (and unmarked) mappings,
unloading them. Now, _dl_init (indirectly) sets l_map_used, too.
(dlclose is handled by the existing reentrancy guard.)
After _dl_init_called_list traversal, two more loops follow. The
processing order changes to the original link map order in the
namespace. Previously, dependency order was used. The difference
should not matter because relocation dependencies could already
reorder link maps in the old code.
The changes to _dl_fini remove the sorting step and replace it with
a traversal of _dl_init_called_list. The l_direct_opencount
decrement outside the loader lock is removed because it appears
incorrect: the counter manipulation could race with other dynamic
loader operations.
tst-audit23 needs adjustments to the changes in LA_ACT_DELETE
notifications. The new approach for checking la_activity should
make it clearer that la_activty calls come in pairs around namespace
updates.
The dependency sorting test cases need updates because the destructor
order is always the opposite order of constructor order, even with
relocation dependencies or cycles present.
There is a future cleanup opportunity to remove the now-constant
force_first and for_fini arguments from the _dl_sort_maps function.
Fixes commit 1df71d32fe5f5905ffd5d100e5e9ca8ad62 ("elf: Implement
force_first handling in _dl_sort_maps_dfs (bug 28937)").
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/elf/dl-close.c b/elf/dl-close.c
index 26ea51dfbadc5b85..6b134f66628cfc03 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -137,30 +137,31 @@ _dl_close_worker (struct link_map *map, bool force)
bool any_tls = false;
const unsigned int nloaded = ns->_ns_nloaded;
- struct link_map *maps[nloaded];
- /* Run over the list and assign indexes to the link maps and enter
- them into the MAPS array. */
+ /* Run over the list and assign indexes to the link maps. */
int idx = 0;
for (struct link_map *l = ns->_ns_loaded; l != NULL; l = l->l_next)
{
l->l_map_used = 0;
l->l_map_done = 0;
l->l_idx = idx;
- maps[idx] = l;
++idx;
}
assert (idx == nloaded);
- /* Keep track of the lowest index link map we have covered already. */
- int done_index = -1;
- while (++done_index < nloaded)
+ /* Keep marking link maps until no new link maps are found. */
+ for (struct link_map *l = ns->_ns_loaded; l != NULL; )
{
- struct link_map *l = maps[done_index];
+ /* next is reset to earlier link maps for remarking. */
+ struct link_map *next = l->l_next;
+ int next_idx = l->l_idx + 1; /* next->l_idx, but covers next == NULL. */
if (l->l_map_done)
- /* Already handled. */
- continue;
+ {
+ /* Already handled. */
+ l = next;
+ continue;
+ }
/* Check whether this object is still used. */
if (l->l_type == lt_loaded
@@ -170,7 +171,10 @@ _dl_close_worker (struct link_map *map, bool force)
acquire is sufficient and correct. */
&& atomic_load_acquire (&l->l_tls_dtor_count) == 0
&& !l->l_map_used)
- continue;
+ {
+ l = next;
+ continue;
+ }
/* We need this object and we handle it now. */
l->l_map_used = 1;
@@ -197,8 +201,11 @@ _dl_close_worker (struct link_map *map, bool force)
already processed it, then we need to go back
and process again from that point forward to
ensure we keep all of its dependencies also. */
- if ((*lp)->l_idx - 1 < done_index)
- done_index = (*lp)->l_idx - 1;
+ if ((*lp)->l_idx < next_idx)
+ {
+ next = *lp;
+ next_idx = next->l_idx;
+ }
}
}
@@ -218,44 +225,65 @@ _dl_close_worker (struct link_map *map, bool force)
if (!jmap->l_map_used)
{
jmap->l_map_used = 1;
- if (jmap->l_idx - 1 < done_index)
- done_index = jmap->l_idx - 1;
+ if (jmap->l_idx < next_idx)
+ {
+ next = jmap;
+ next_idx = next->l_idx;
+ }
}
}
}
- }
- /* Sort the entries. We can skip looking for the binary itself which is
- at the front of the search list for the main namespace. */
- _dl_sort_maps (maps, nloaded, (nsid == LM_ID_BASE), true);
+ l = next;
+ }
- /* Call all termination functions at once. */
- bool unload_any = false;
- bool scope_mem_left = false;
- unsigned int unload_global = 0;
- unsigned int first_loaded = ~0;
- for (unsigned int i = 0; i < nloaded; ++i)
+ /* Call the destructors in reverse constructor order, and remove the
+ closed link maps from the list. */
+ for (struct link_map **init_called_head = &_dl_init_called_list;
+ *init_called_head != NULL; )
{
- struct link_map *imap = maps[i];
+ struct link_map *imap = *init_called_head;
- /* All elements must be in the same namespace. */
- assert (imap->l_ns == nsid);
-
- if (!imap->l_map_used)
+ /* _dl_init_called_list is global, to produce a global odering.
+ Ignore the other namespaces (and link maps that are still used). */
+ if (imap->l_ns != nsid || imap->l_map_used)
+ init_called_head = &imap->l_init_called_next;
+ else
{
assert (imap->l_type == lt_loaded && !imap->l_nodelete_active);
- /* Call its termination function. Do not do it for
- half-cooked objects. Temporarily disable exception
- handling, so that errors are fatal. */
- if (imap->l_init_called)
+ /* _dl_init_called_list is updated at the same time as
+ l_init_called. */
+ assert (imap->l_init_called);
+
+ if (imap->l_info[DT_FINI_ARRAY] != NULL
+ || imap->l_info[DT_FINI] != NULL)
_dl_catch_exception (NULL, _dl_call_fini, imap);
#ifdef SHARED
/* Auditing checkpoint: we remove an object. */
_dl_audit_objclose (imap);
#endif
+ /* Unlink this link map. */
+ *init_called_head = imap->l_init_called_next;
+ }
+ }
+
+
+ bool unload_any = false;
+ bool scope_mem_left = false;
+ unsigned int unload_global = 0;
+
+ /* For skipping un-unloadable link maps in the second loop. */
+ struct link_map *first_loaded = ns->_ns_loaded;
+ /* Iterate over the namespace to find objects to unload. Some
+ unloadable objects may not be on _dl_init_called_list due to
+ dlopen failure. */
+ for (struct link_map *imap = first_loaded; imap != NULL; imap = imap->l_next)
+ {
+ if (!imap->l_map_used)
+ {
/* This object must not be used anymore. */
imap->l_removed = 1;
@@ -266,8 +294,8 @@ _dl_close_worker (struct link_map *map, bool force)
++unload_global;
/* Remember where the first dynamically loaded object is. */
- if (i < first_loaded)
- first_loaded = i;
+ if (first_loaded == NULL)
+ first_loaded = imap;
}
/* Else imap->l_map_used. */
else if (imap->l_type == lt_loaded)
@@ -403,8 +431,8 @@ _dl_close_worker (struct link_map *map, bool force)
imap->l_loader = NULL;
/* Remember where the first dynamically loaded object is. */
- if (i < first_loaded)
- first_loaded = i;
+ if (first_loaded == NULL)
+ first_loaded = imap;
}
}
@@ -475,10 +503,11 @@ _dl_close_worker (struct link_map *map, bool force)
/* Check each element of the search list to see if all references to
it are gone. */
- for (unsigned int i = first_loaded; i < nloaded; ++i)
+ for (struct link_map *imap = first_loaded; imap != NULL; )
{
- struct link_map *imap = maps[i];
- if (!imap->l_map_used)
+ if (imap->l_map_used)
+ imap = imap->l_next;
+ else
{
assert (imap->l_type == lt_loaded);
@@ -686,7 +715,9 @@ _dl_close_worker (struct link_map *map, bool force)
if (imap == GL(dl_initfirst))
GL(dl_initfirst) = NULL;
+ struct link_map *next = imap->l_next;
free (imap);
+ imap = next;
}
}
diff --git a/elf/dl-fini.c b/elf/dl-fini.c
index 2d34658d4c3a470c..25a7767d707721d5 100644
--- a/elf/dl-fini.c
+++ b/elf/dl-fini.c
@@ -23,116 +23,68 @@
void
_dl_fini (void)
{
- /* Lots of fun ahead. We have to call the destructors for all still
- loaded objects, in all namespaces. The problem is that the ELF
- specification now demands that dependencies between the modules
- are taken into account. I.e., the destructor for a module is
- called before the ones for any of its dependencies.
-
- To make things more complicated, we cannot simply use the reverse
- order of the constructors. Since the user might have loaded objects
- using `dlopen' there are possibly several other modules with its
- dependencies to be taken into account. Therefore we have to start
- determining the order of the modules once again from the beginning. */
-
- /* We run the destructors of the main namespaces last. As for the
- other namespaces, we pick run the destructors in them in reverse
- order of the namespace ID. */
-#ifdef SHARED
- int do_audit = 0;
- again:
-#endif
- for (Lmid_t ns = GL(dl_nns) - 1; ns >= 0; --ns)
- {
- /* Protect against concurrent loads and unloads. */
- __rtld_lock_lock_recursive (GL(dl_load_lock));
-
- unsigned int nloaded = GL(dl_ns)[ns]._ns_nloaded;
- /* No need to do anything for empty namespaces or those used for
- auditing DSOs. */
- if (nloaded == 0
-#ifdef SHARED
- || GL(dl_ns)[ns]._ns_loaded->l_auditing != do_audit
-#endif
- )
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
- else
- {
+ /* Call destructors strictly in the reverse order of constructors.
+ This causes fewer surprises than some arbitrary reordering based
+ on new (relocation) dependencies. None of the objects are
+ unmapped, so applications can deal with this if their DSOs remain
+ in a consistent state after destructors have run. */
+
+ /* Protect against concurrent loads and unloads. */
+ __rtld_lock_lock_recursive (GL(dl_load_lock));
+
+ /* Ignore objects which are opened during shutdown. */
+ struct link_map *local_init_called_list = _dl_init_called_list;
+
+ for (struct link_map *l = local_init_called_list; l != NULL;
+ l = l->l_init_called_next)
+ /* Bump l_direct_opencount of all objects so that they
+ are not dlclose()ed from underneath us. */
+ ++l->l_direct_opencount;
+
+ /* After this point, everything linked from local_init_called_list
+ cannot be unloaded because of the reference counter update. */
+ __rtld_lock_unlock_recursive (GL(dl_load_lock));
+
+ /* Perform two passes: One for non-audit modules, one for audit
+ modules. This way, audit modules receive unload notifications
+ for non-audit objects, and the destructors for audit modules
+ still run. */
#ifdef SHARED
- _dl_audit_activity_nsid (ns, LA_ACT_DELETE);
+ int last_pass = GLRO(dl_naudit) > 0;
+ Lmid_t last_ns = -1;
+ for (int do_audit = 0; do_audit <= last_pass; ++do_audit)
#endif
-
- /* Now we can allocate an array to hold all the pointers and
- copy the pointers in. */
- struct link_map *maps[nloaded];
-
- unsigned int i;
- struct link_map *l;
- assert (nloaded != 0 || GL(dl_ns)[ns]._ns_loaded == NULL);
- for (l = GL(dl_ns)[ns]._ns_loaded, i = 0; l != NULL; l = l->l_next)
- /* Do not handle ld.so in secondary namespaces. */
- if (l == l->l_real)
- {
- assert (i < nloaded);
-
- maps[i] = l;
- l->l_idx = i;
- ++i;
-
- /* Bump l_direct_opencount of all objects so that they
- are not dlclose()ed from underneath us. */
- ++l->l_direct_opencount;
- }
- assert (ns != LM_ID_BASE || i == nloaded);
- assert (ns == LM_ID_BASE || i == nloaded || i == nloaded - 1);
- unsigned int nmaps = i;
-
- /* Now we have to do the sorting. We can skip looking for the
- binary itself which is at the front of the search list for
- the main namespace. */
- _dl_sort_maps (maps, nmaps, (ns == LM_ID_BASE), true);
-
- /* We do not rely on the linked list of loaded object anymore
- from this point on. We have our own list here (maps). The
- various members of this list cannot vanish since the open
- count is too high and will be decremented in this loop. So
- we release the lock so that some code which might be called
- from a destructor can directly or indirectly access the
- lock. */
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
-
- /* 'maps' now contains the objects in the right order. Now
- call the destructors. We have to process this array from
- the front. */
- for (i = 0; i < nmaps; ++i)
- {
- struct link_map *l = maps[i];
-
- if (l->l_init_called)
- {
- _dl_call_fini (l);
+ for (struct link_map *l = local_init_called_list; l != NULL;
+ l = l->l_init_called_next)
+ {
#ifdef SHARED
- /* Auditing checkpoint: another object closed. */
- _dl_audit_objclose (l);
+ if (GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing != do_audit)
+ continue;
+
+ /* Avoid back-to-back calls of _dl_audit_activity_nsid for the
+ same namespace. */
+ if (last_ns != l->l_ns)
+ {
+ if (last_ns >= 0)
+ _dl_audit_activity_nsid (last_ns, LA_ACT_CONSISTENT);
+ _dl_audit_activity_nsid (l->l_ns, LA_ACT_DELETE);
+ last_ns = l->l_ns;
+ }
#endif
- }
- /* Correct the previous increment. */
- --l->l_direct_opencount;
- }
+ /* There is no need to re-enable exceptions because _dl_fini
+ is not called from a context where exceptions are caught. */
+ _dl_call_fini (l);
#ifdef SHARED
- _dl_audit_activity_nsid (ns, LA_ACT_CONSISTENT);
+ /* Auditing checkpoint: another object closed. */
+ _dl_audit_objclose (l);
#endif
- }
- }
+ }
#ifdef SHARED
- if (! do_audit && GLRO(dl_naudit) > 0)
- {
- do_audit = 1;
- goto again;
- }
+ if (last_ns >= 0)
+ _dl_audit_activity_nsid (last_ns, LA_ACT_CONSISTENT);
if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_STATISTICS))
_dl_debug_printf ("\nruntime linker statistics:\n"
diff --git a/elf/dl-init.c b/elf/dl-init.c
index 73c0259fbe6d19af..eb6c83d180c3f1a1 100644
--- a/elf/dl-init.c
+++ b/elf/dl-init.c
@@ -24,6 +24,7 @@
/* Type of the initializer. */
typedef void (*init_t) (int, char **, char **);
+struct link_map *_dl_init_called_list;
static void
call_init (struct link_map *l, int argc, char **argv, char **env)
@@ -45,6 +46,21 @@ call_init (struct link_map *l, int argc, char **argv, char **env)
dependency. */
l->l_init_called = 1;
+ /* Help an already-running dlclose: The just-loaded object must not
+ be removed during the current pass. (No effect if no dlclose in
+ progress.) */
+ l->l_map_used = 1;
+
+ /* Record execution before starting any initializers. This way, if
+ the initializers themselves call dlopen, their ELF destructors
+ will eventually be run before this object is destructed, matching
+ that their ELF constructors have run before this object was
+ constructed. _dl_fini uses this list for audit callbacks, so
+ register objects on the list even if they do not have a
+ constructor. */
+ l->l_init_called_next = _dl_init_called_list;
+ _dl_init_called_list = l;
+
/* Check for object which constructors we do not run here. */
if (__builtin_expect (l->l_name[0], 'a') == '\0'
&& l->l_type == lt_executable)
diff --git a/elf/dso-sort-tests-1.def b/elf/dso-sort-tests-1.def
index 4bf9052db16fb352..61dc54f8ae06d465 100644
--- a/elf/dso-sort-tests-1.def
+++ b/elf/dso-sort-tests-1.def
@@ -53,21 +53,14 @@ tst-dso-ordering10: {}->a->b->c;soname({})=c
output: b>a>{}<a<b
# Complex example from Bugzilla #15311, under-linked and with circular
-# relocation(dynamic) dependencies. While this is technically unspecified, the
-# presumed reasonable practical behavior is for the destructor order to respect
-# the static DT_NEEDED links (here this means the a->b->c->d order).
-# The older dynamic_sort=1 algorithm does not achieve this, while the DFS-based
-# dynamic_sort=2 algorithm does, although it is still arguable whether going
-# beyond spec to do this is the right thing to do.
-# The below expected outputs are what the two algorithms currently produce
-# respectively, for regression testing purposes.
+# relocation(dynamic) dependencies. For both sorting algorithms, the
+# destruction order is the reverse of the construction order, and
+# relocation dependencies are not taken into account.
tst-bz15311: {+a;+e;+f;+g;+d;%d;-d;-g;-f;-e;-a};a->b->c->d;d=>[ba];c=>a;b=>e=>a;c=>f=>b;d=>g=>c
-output(glibc.rtld.dynamic_sort=1): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[<a<c<d<g<f<b<e];}
-output(glibc.rtld.dynamic_sort=2): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[<g<f<a<b<c<d<e];}
+output: {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[<g<f<e<a<b<c<d];}
# Test that even in the presence of dependency loops involving dlopen'ed
# object, that object is initialized last (and not unloaded prematurely).
-# Final destructor order is indeterminate due to the cycle.
+# Final destructor order is the opposite of constructor order.
tst-bz28937: {+a;+b;-b;+c;%c};a->a1;a->a2;a2->a;b->b1;c->a1;c=>a1
-output(glibc.rtld.dynamic_sort=1): {+a[a2>a1>a>];+b[b1>b>];-b[<b<b1];+c[c>];%c(a1());}<a<a2<c<a1
-output(glibc.rtld.dynamic_sort=2): {+a[a2>a1>a>];+b[b1>b>];-b[<b<b1];+c[c>];%c(a1());}<a2<a<c<a1
+output: {+a[a2>a1>a>];+b[b1>b>];-b[<b<b1];+c[c>];%c(a1());}<c<a<a1<a2
diff --git a/elf/tst-audit23.c b/elf/tst-audit23.c
index 4904cf1340a97ee1..f40760bd702e87c1 100644
--- a/elf/tst-audit23.c
+++ b/elf/tst-audit23.c
@@ -98,6 +98,8 @@ do_test (int argc, char *argv[])
char *lname;
uintptr_t laddr;
Lmid_t lmid;
+ uintptr_t cookie;
+ uintptr_t namespace;
bool closed;
} objs[max_objs] = { [0 ... max_objs-1] = { .closed = false } };
size_t nobjs = 0;
@@ -117,6 +119,9 @@ do_test (int argc, char *argv[])
size_t buffer_length = 0;
while (xgetline (&buffer, &buffer_length, out))
{
+ *strchrnul (buffer, '\n') = '\0';
+ printf ("info: subprocess output: %s\n", buffer);
+
if (startswith (buffer, "la_activity: "))
{
uintptr_t cookie;
@@ -125,29 +130,26 @@ do_test (int argc, char *argv[])
&cookie);
TEST_COMPARE (r, 2);
- /* The cookie identifies the object at the head of the link map,
- so we only add a new namespace if it changes from the previous
- one. This works since dlmopen is the last in the test body. */
- if (cookie != last_act_cookie && last_act_cookie != -1)
- TEST_COMPARE (last_act, LA_ACT_CONSISTENT);
-
if (this_act == LA_ACT_ADD && acts[nacts] != cookie)
{
+ /* The cookie identifies the object at the head of the
+ link map, so we only add a new namespace if it
+ changes from the previous one. This works since
+ dlmopen is the last in the test body. */
+ if (cookie != last_act_cookie && last_act_cookie != -1)
+ TEST_COMPARE (last_act, LA_ACT_CONSISTENT);
+
acts[nacts++] = cookie;
last_act_cookie = cookie;
}
- /* The LA_ACT_DELETE is called in the reverse order of LA_ACT_ADD
- at program termination (if the tests adds a dlclose or a library
- with extra dependencies this will need to be adapted). */
+ /* LA_ACT_DELETE is called multiple times for each
+ namespace, depending on destruction order. */
else if (this_act == LA_ACT_DELETE)
- {
- last_act_cookie = acts[--nacts];
- TEST_COMPARE (acts[nacts], cookie);
- acts[nacts] = 0;
- }
+ last_act_cookie = cookie;
else if (this_act == LA_ACT_CONSISTENT)
{
TEST_COMPARE (cookie, last_act_cookie);
+ last_act_cookie = -1;
/* LA_ACT_DELETE must always be followed by an la_objclose. */
if (last_act == LA_ACT_DELETE)
@@ -179,6 +181,8 @@ do_test (int argc, char *argv[])
objs[nobjs].lname = lname;
objs[nobjs].laddr = laddr;
objs[nobjs].lmid = lmid;
+ objs[nobjs].cookie = cookie;
+ objs[nobjs].namespace = last_act_cookie;
objs[nobjs].closed = false;
nobjs++;
@@ -201,6 +205,12 @@ do_test (int argc, char *argv[])
if (strcmp (lname, objs[i].lname) == 0 && lmid == objs[i].lmid)
{
TEST_COMPARE (objs[i].closed, false);
+ TEST_COMPARE (objs[i].cookie, cookie);
+ if (objs[i].namespace == -1)
+ /* No LA_ACT_ADD before the first la_objopen call. */
+ TEST_COMPARE (acts[0], last_act_cookie);
+ else
+ TEST_COMPARE (objs[i].namespace, last_act_cookie);
objs[i].closed = true;
break;
}
@@ -209,11 +219,7 @@ do_test (int argc, char *argv[])
/* la_objclose should be called after la_activity(LA_ACT_DELETE) for
the closed object's namespace. */
TEST_COMPARE (last_act, LA_ACT_DELETE);
- if (!seen_first_objclose)
- {
- TEST_COMPARE (last_act_cookie, cookie);
- seen_first_objclose = true;
- }
+ seen_first_objclose = true;
}
}
diff --git a/include/link.h b/include/link.h
index 041ff5f753a9ee11..a37b2cf3133eefd5 100644
--- a/include/link.h
+++ b/include/link.h
@@ -276,6 +276,10 @@ struct link_map
/* List of object in order of the init and fini calls. */
struct link_map **l_initfini;
+ /* Linked list of objects in reverse ELF constructor execution
+ order. Head of list is stored in _dl_init_called_list. */
+ struct link_map *l_init_called_next;
+
/* List of the dependencies introduced through symbol binding. */
struct link_map_reldeps
{
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 0bad34d44a5685d9..906447e677a4ca40 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -1046,6 +1046,10 @@ extern int _dl_check_map_versions (struct link_map *map, int verbose,
extern void _dl_init (struct link_map *main_map, int argc, char **argv,
char **env) attribute_hidden;
+/* List of ELF objects in reverse order of their constructor
+ invocation. */
+extern struct link_map *_dl_init_called_list attribute_hidden;
+
/* Call the finalizer functions of all shared objects whose
initializer functions have completed. */
extern void _dl_fini (void) attribute_hidden;

View File

@ -0,0 +1,141 @@
commit 53df2ce6885da3d0e89e87dca7b095622296014f
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Sep 8 13:02:06 2023 +0200
elf: Remove unused l_text_end field from struct link_map
It is a left-over from commit 52a01100ad011293197637e42b5be1a479a2
("elf: Remove ad-hoc restrictions on dlopen callers [BZ #22787]").
When backporting commmit 6985865bc3ad5b23147ee73466583dd7fdf65892
("elf: Always call destructors in reverse constructor order
(bug 30785)"), we can move the l_init_called_next field to this
place, so that the internal GLIBC_PRIVATE ABI does not change.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
Tested-by: Carlos O'Donell <carlos@redhat.com>
Conflicts:
elf/dl-load.h
(Missing commit "Avoid "inline" after return type in function
definitions.")
elf/rtld.c
(Missing rtld_setup_main_map function. Re-did the l_text_end
removal from scratch.)
diff --git a/elf/dl-load.c b/elf/dl-load.c
index 0b45e6e3db31c70d..52dc564af9e95878 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -1176,7 +1176,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
/* Now process the load commands and map segments into memory.
This is responsible for filling in:
- l_map_start, l_map_end, l_addr, l_contiguous, l_text_end, l_phdr
+ l_map_start, l_map_end, l_addr, l_contiguous, l_phdr
*/
errstring = _dl_map_segments (l, fd, header, type, loadcmds, nloadcmds,
maplength, has_holes, loader);
diff --git a/elf/dl-load.h b/elf/dl-load.h
index 66ea2e9237ab6321..ebf2604e044c3bde 100644
--- a/elf/dl-load.h
+++ b/elf/dl-load.h
@@ -82,14 +82,11 @@ struct loadcmd
/* This is a subroutine of _dl_map_segments. It should be called for each
load command, some time after L->l_addr has been set correctly. It is
- responsible for setting up the l_text_end and l_phdr fields. */
-static void __always_inline
+ responsible for setting the l_phdr fields */
+static __always_inline void
_dl_postprocess_loadcmd (struct link_map *l, const ElfW(Ehdr) *header,
const struct loadcmd *c)
{
- if (c->prot & PROT_EXEC)
- l->l_text_end = l->l_addr + c->mapend;
-
if (l->l_phdr == 0
&& c->mapoff <= header->e_phoff
&& ((size_t) (c->mapend - c->mapstart + c->mapoff)
@@ -102,7 +99,7 @@ _dl_postprocess_loadcmd (struct link_map *l, const ElfW(Ehdr) *header,
/* This is a subroutine of _dl_map_object_from_fd. It is responsible
for filling in several fields in *L: l_map_start, l_map_end, l_addr,
- l_contiguous, l_text_end, l_phdr. On successful return, all the
+ l_contiguous, l_phdr. On successful return, all the
segments are mapped (or copied, or whatever) from the file into their
final places in the address space, with the correct page permissions,
and any bss-like regions already zeroed. It returns a null pointer
diff --git a/elf/rtld.c b/elf/rtld.c
index cd2cc4024a3581c2..c2edd0bfdc27f207 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -472,7 +472,6 @@ _dl_start_final (void *arg, struct dl_start_final_info *info)
GL(dl_rtld_map).l_real = &GL(dl_rtld_map);
GL(dl_rtld_map).l_map_start = (ElfW(Addr)) _begin;
GL(dl_rtld_map).l_map_end = (ElfW(Addr)) _end;
- GL(dl_rtld_map).l_text_end = (ElfW(Addr)) _etext;
/* Copy the TLS related data if necessary. */
#ifndef DONT_USE_BOOTSTRAP_MAP
# if NO_TLS_OFFSET != 0
@@ -1520,7 +1519,6 @@ dl_main (const ElfW(Phdr) *phdr,
}
main_map->l_map_end = 0;
- main_map->l_text_end = 0;
/* Perhaps the executable has no PT_LOAD header entries at all. */
main_map->l_map_start = ~0;
/* And it was opened directly. */
@@ -1591,8 +1589,6 @@ dl_main (const ElfW(Phdr) *phdr,
allocend = main_map->l_addr + ph->p_vaddr + ph->p_memsz;
if (main_map->l_map_end < allocend)
main_map->l_map_end = allocend;
- if ((ph->p_flags & PF_X) && allocend > main_map->l_text_end)
- main_map->l_text_end = allocend;
}
break;
@@ -1641,8 +1637,6 @@ ERROR: '%s': cannot process note segment.\n", _dl_argv[0]);
= (char *) main_map->l_tls_initimage + main_map->l_addr;
if (! main_map->l_map_end)
main_map->l_map_end = ~0;
- if (! main_map->l_text_end)
- main_map->l_text_end = ~0;
if (! GL(dl_rtld_map).l_libname && GL(dl_rtld_map).l_name)
{
/* We were invoked directly, so the program might not have a
diff --git a/elf/setup-vdso.h b/elf/setup-vdso.h
index d2b35a080b57c183..352e992c578e416f 100644
--- a/elf/setup-vdso.h
+++ b/elf/setup-vdso.h
@@ -52,9 +52,6 @@ setup_vdso (struct link_map *main_map __attribute__ ((unused)),
l->l_addr = ph->p_vaddr;
if (ph->p_vaddr + ph->p_memsz >= l->l_map_end)
l->l_map_end = ph->p_vaddr + ph->p_memsz;
- if ((ph->p_flags & PF_X)
- && ph->p_vaddr + ph->p_memsz >= l->l_text_end)
- l->l_text_end = ph->p_vaddr + ph->p_memsz;
}
else
/* There must be no TLS segment. */
@@ -63,7 +60,6 @@ setup_vdso (struct link_map *main_map __attribute__ ((unused)),
l->l_map_start = (ElfW(Addr)) GLRO(dl_sysinfo_dso);
l->l_addr = l->l_map_start - l->l_addr;
l->l_map_end += l->l_addr;
- l->l_text_end += l->l_addr;
l->l_ld = (void *) ((ElfW(Addr)) l->l_ld + l->l_addr);
elf_get_dynamic_info (l, dyn_temp);
_dl_setup_hash (l);
diff --git a/include/link.h b/include/link.h
index a37b2cf3133eefd5..88683e7a747b86e0 100644
--- a/include/link.h
+++ b/include/link.h
@@ -251,8 +251,6 @@ struct link_map
/* Start and finish of memory map for this object. l_map_start
need not be the same as l_addr. */
ElfW(Addr) l_map_start, l_map_end;
- /* End of the executable part of the mapping. */
- ElfW(Addr) l_text_end;
/* Default array for 'l_scope'. */
struct r_scope_elem *l_scope_mem[4];

View File

@ -0,0 +1,35 @@
commit d3ba6c1333b10680ce5900a628108507d9d4b844
Author: Florian Weimer <fweimer@redhat.com>
Date: Mon Sep 11 09:17:52 2023 +0200
elf: Move l_init_called_next to old place of l_text_end in link map
This preserves all member offsets and the GLIBC_PRIVATE ABI
for backporting.
diff --git a/include/link.h b/include/link.h
index 88683e7a747b86e0..a464dd8e86cf89d0 100644
--- a/include/link.h
+++ b/include/link.h
@@ -252,6 +252,10 @@ struct link_map
need not be the same as l_addr. */
ElfW(Addr) l_map_start, l_map_end;
+ /* Linked list of objects in reverse ELF constructor execution
+ order. Head of list is stored in _dl_init_called_list. */
+ struct link_map *l_init_called_next;
+
/* Default array for 'l_scope'. */
struct r_scope_elem *l_scope_mem[4];
/* Size of array allocated for 'l_scope'. */
@@ -274,10 +278,6 @@ struct link_map
/* List of object in order of the init and fini calls. */
struct link_map **l_initfini;
- /* Linked list of objects in reverse ELF constructor execution
- order. Head of list is stored in _dl_init_called_list. */
- struct link_map *l_init_called_next;
-
/* List of the dependencies introduced through symbol binding. */
struct link_map_reldeps
{

View File

@ -1,6 +1,6 @@
%define glibcsrcdir glibc-2.28
%define glibcversion 2.28
%define glibcrelease 225%{?dist}
%define glibcrelease 236%{?dist}
# Pre-release tarballs are pulled in from git using a command that is
# effectively:
#
@ -132,7 +132,7 @@ end \
Summary: The GNU libc libraries
Name: glibc
Version: %{glibcversion}
Release: %{glibcrelease}
Release: %{glibcrelease}.1
# In general, GPLv2+ is used by programs, LGPLv2+ is used for
# libraries.
@ -1031,6 +1031,28 @@ Patch838: glibc-rh2142937-3.patch
Patch839: glibc-rh2144568.patch
Patch840: glibc-rh2154914-1.patch
Patch841: glibc-rh2154914-2.patch
Patch842: glibc-rh2183081-1.patch
Patch843: glibc-rh2183081-2.patch
Patch844: glibc-rh2172949.patch
Patch845: glibc-rh2180155-1.patch
Patch846: glibc-rh2180155-2.patch
Patch847: glibc-rh2180155-3.patch
Patch848: glibc-rh2213909.patch
Patch849: glibc-rh2176707-1.patch
Patch850: glibc-rh2176707-2.patch
Patch851: glibc-rh2186781.patch
Patch852: glibc-rh2224348.patch
Patch853: glibc-rh2176707-3.patch
Patch854: glibc-rh2180462-1.patch
Patch855: glibc-rh2180462-2.patch
Patch856: glibc-rh2180462-3.patch
Patch857: glibc-rh2180462-4.patch
Patch858: glibc-rh2233338-1.patch
Patch859: glibc-rh2233338-2.patch
Patch860: glibc-rh2233338-3.patch
Patch861: glibc-rh2233338-4.patch
Patch862: glibc-rh2233338-5.patch
Patch863: glibc-rh2233338-6.patch
##############################################################################
# Continued list of core "glibc" package information:
@ -2778,7 +2800,7 @@ fi
%ifarch s390x
/lib/ld64.so.1
%endif
%verify(not md5 size mtime) %config(noreplace) /etc/nsswitch.conf
%verify(not md5 size mtime link) %config(noreplace) /etc/nsswitch.conf
%verify(not md5 size mtime) %config(noreplace) /etc/ld.so.conf
%verify(not md5 size mtime) %config(noreplace) /etc/rpc
%dir /etc/ld.so.conf.d
@ -2861,6 +2883,42 @@ fi
%files -f compat-libpthread-nonshared.filelist -n compat-libpthread-nonshared
%changelog
* Mon Sep 11 2023 Florian Weimer <fweimer@redhat.com> - 2.28-236.1
- Always call destructors in reverse constructor order (#2233338)
* Tue Aug 15 2023 Carlos O'Donell <carlos@redhat.com> - 2.28-236
- Fix string and memory function tuning on small systems (#2180462)
* Tue Aug 8 2023 DJ Delorie <dj@redhat.com> - 2.28-235
- Fix temporal threshold calculations (#2180462)
* Mon Aug 7 2023 Florian Weimer <fweimer@redhat.com> - 2.28-234
- Ignore symbolic link change on /etc/nsswitch.conf (#2229709)
* Wed Jul 26 2023 DJ Delorie <dj@redhat.com> - 2.28-233
- Update test to closer match upstream. (#2176707)
* Fri Jul 21 2023 Florian Weimer <fweimer@redhat.com> - 2.28-232
- Make libSegFault.so NODELETE (#2224348)
* Sun Jul 9 2023 Carlos O'Donell <carlos@redhat.com> - 2.28-231
- Update ESTALE error message translations (#2186781)
* Fri Jul 7 2023 DJ Delorie <dj@redhat.com> - 2.28-230
- Don't block SIGCHILD when system() is called concurrently (#2176707)
* Mon Jul 3 2023 DJ Delorie <dj@redhat.com> - 2.28-229
- resolv_conf: release lock on allocation failure (#2213909)
* Mon May 22 2023 Florian Weimer <fweimer@redhat.com> - 2.28-228
- gmon: Various bug fixes (#2180155)
* Thu May 18 2023 Patsy Griffin <patsy@redhat.com> - 2.28-227
- Change sgetsgent_r to set errno. (#2172949)
* Wed May 3 2023 Florian Weimer <fweimer@redhat.com> - 2.28-226
- Fix incorrect inline feraiseexcept on i686, x86-64 (#2183081)
* Fri Jan 20 2023 Florian Weimer <fweimer@redhat.com> - 2.28-225
- Enforce a specififc internal ordering for tunables (#2154914)