import UBI glibc-2.28-236.el8.7

This commit is contained in:
eabdullin 2023-11-14 19:00:37 +00:00
parent fc2ec9e560
commit 0058656e5f
22 changed files with 1679 additions and 32 deletions

View File

@ -0,0 +1,22 @@
Work around in the test case, the fact that RHEL-8 NSS modules
infrastructure incorrectly allows merging in the hosts database. This
is a RHEL-8 only fix.
diff --git a/nss/tst-nss-gai-actions.c b/nss/tst-nss-gai-actions.c
index efca6cd1837a172a..c35e752896eceb2a 100644
--- a/nss/tst-nss-gai-actions.c
+++ b/nss/tst-nss-gai-actions.c
@@ -87,6 +87,13 @@ do_one_test (int action, int family, bool canon)
case ACTION_MERGE:
if (ret == 0)
{
+ if (hints.ai_flags == 0 && hints.ai_family == AF_INET)
+ {
+ printf ("***** RHEL-8 limitation: "
+ "NSS modules infrastructure incorrectly allows MERGE\n");
+ return;
+ }
+
char *formatted = support_format_addrinfo (ai, ret);
printf ("merge unexpectedly succeeded:\n %s\n", formatted);

View File

@ -1,4 +1,4 @@
commit 1c37b8022e8763fedbb3f79c02e05c6acfe5a215
commit 228cdb00a045ae3b68a91b35c7548bab6029446e
Author: Siddhesh Poyarekar <siddhesh@sourceware.org>
Date: Thu Mar 17 11:44:34 2022 +0530
@ -22,6 +22,7 @@ Date: Thu Mar 17 11:44:34 2022 +0530
Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
Reviewed-by: DJ Delorie <dj@redhat.com>
(cherry picked from commit 1c37b8022e8763fedbb3f79c02e05c6acfe5a215)
Conflicts:
nss/Makefile
@ -29,8 +30,6 @@ Conflicts:
sysdeps/posix/getaddrinfo.c
(RES_USE_INET6 still present in RHEL-8 and NSS module traversal rewrite
not in RHEL-8)
nss/tst-nss-gai-actions.c
(Adapted SUCCESS=merge result to RHEL-8)
diff --git a/nss/Makefile b/nss/Makefile
index e8a7d9c7b3cefcdf..cfb255c6e7a3a4de 100644
@ -48,10 +47,10 @@ index e8a7d9c7b3cefcdf..cfb255c6e7a3a4de 100644
ifeq (yes,$(build-shared))
diff --git a/nss/tst-nss-gai-actions.c b/nss/tst-nss-gai-actions.c
new file mode 100644
index 0000000000000000..c35e752896eceb2a
index 0000000000000000..efca6cd1837a172a
--- /dev/null
+++ b/nss/tst-nss-gai-actions.c
@@ -0,0 +1,156 @@
@@ -0,0 +1,149 @@
+/* Test continue and merge NSS actions for getaddrinfo.
+ Copyright The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
@ -141,13 +140,6 @@ index 0000000000000000..c35e752896eceb2a
+ case ACTION_MERGE:
+ if (ret == 0)
+ {
+ if (hints.ai_flags == 0 && hints.ai_family == AF_INET)
+ {
+ printf ("***** RHEL-8 limitation: "
+ "NSS modules infrastructure incorrectly allows MERGE\n");
+ return;
+ }
+
+ char *formatted = support_format_addrinfo (ai, ret);
+
+ printf ("merge unexpectedly succeeded:\n %s\n", formatted);

View File

@ -0,0 +1,121 @@
commit 969e9733c7d17edf1e239a73fa172f357561f440
Author: Florian Weimer <fweimer@redhat.com>
Date: Tue Feb 21 09:20:28 2023 +0100
gshadow: Matching sgetsgent, sgetsgent_r ERANGE handling (bug 30151)
Before this change, sgetsgent_r did not set errno to ERANGE, but
sgetsgent only check errno, not the return value from sgetsgent_r.
Consequently, sgetsgent did not detect any error, and reported
success to the caller, without initializing the struct sgrp object
whose address was returned.
This commit changes sgetsgent_r to set errno as well. This avoids
similar issues in applications which only change errno.
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
diff --git a/gshadow/Makefile b/gshadow/Makefile
index 796fbbf473..a95524593a 100644
--- a/gshadow/Makefile
+++ b/gshadow/Makefile
@@ -26,7 +26,7 @@ headers = gshadow.h
routines = getsgent getsgnam sgetsgent fgetsgent putsgent \
getsgent_r getsgnam_r sgetsgent_r fgetsgent_r
-tests = tst-gshadow tst-putsgent tst-fgetsgent_r
+tests = tst-gshadow tst-putsgent tst-fgetsgent_r tst-sgetsgent
CFLAGS-getsgent_r.c += -fexceptions
CFLAGS-getsgent.c += -fexceptions
diff --git a/gshadow/sgetsgent_r.c b/gshadow/sgetsgent_r.c
index ea085e91d7..c75624e1f7 100644
--- a/gshadow/sgetsgent_r.c
+++ b/gshadow/sgetsgent_r.c
@@ -61,7 +61,10 @@ __sgetsgent_r (const char *string, struct sgrp *resbuf, char *buffer,
buffer[buflen - 1] = '\0';
sp = strncpy (buffer, string, buflen);
if (buffer[buflen - 1] != '\0')
- return ERANGE;
+ {
+ __set_errno (ERANGE);
+ return ERANGE;
+ }
}
else
sp = (char *) string;
diff --git a/gshadow/tst-sgetsgent.c b/gshadow/tst-sgetsgent.c
new file mode 100644
index 0000000000..0370c10fd0
--- /dev/null
+++ b/gshadow/tst-sgetsgent.c
@@ -0,0 +1,69 @@
+/* Test large input for sgetsgent (bug 30151).
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <gshadow.h>
+#include <stddef.h>
+#include <support/check.h>
+#include <support/support.h>
+#include <support/xmemstream.h>
+#include <stdlib.h>
+
+static int
+do_test (void)
+{
+ /* Create a shadow group with 1000 members. */
+ struct xmemstream mem;
+ xopen_memstream (&mem);
+ const char *passwd = "k+zD0nucwfxAo3sw1NXUj6K5vt5M16+X0TVGdE1uFvq5R8V7efJ";
+ fprintf (mem.out, "group-name:%s::m0", passwd);
+ for (int i = 1; i < 1000; ++i)
+ fprintf (mem.out, ",m%d", i);
+ xfclose_memstream (&mem);
+
+ /* Call sgetsgent. */
+ char *input = mem.buffer;
+ struct sgrp *e = sgetsgent (input);
+ TEST_VERIFY_EXIT (e != NULL);
+ TEST_COMPARE_STRING (e->sg_namp, "group-name");
+ TEST_COMPARE_STRING (e->sg_passwd, passwd);
+ /* No administrators. */
+ TEST_COMPARE_STRING (e->sg_adm[0], NULL);
+ /* Check the members list. */
+ for (int i = 0; i < 1000; ++i)
+ {
+ char *member = xasprintf ("m%d", i);
+ TEST_COMPARE_STRING (e->sg_mem[i], member);
+ free (member);
+ }
+ TEST_COMPARE_STRING (e->sg_mem[1000], NULL);
+
+ /* Check that putsgent brings back the input string. */
+ xopen_memstream (&mem);
+ TEST_COMPARE (putsgent (e, mem.out), 0);
+ xfclose_memstream (&mem);
+ /* Compare without the trailing '\n' that putsgent added. */
+ TEST_COMPARE (mem.buffer[mem.length - 1], '\n');
+ mem.buffer[mem.length - 1] = '\0';
+ TEST_COMPARE_STRING (mem.buffer, input);
+
+ free (mem.buffer);
+ free (input);
+ return 0;
+}
+
+#include <support/test-driver.c>

View File

@ -0,0 +1,144 @@
From 436a604b7dc741fc76b5a6704c6cd8bb178518e7 Mon Sep 17 00:00:00 2001
From: Adam Yi <ayi@janestreet.com>
Date: Tue, 7 Mar 2023 07:30:02 -0500
Subject: posix: Fix system blocks SIGCHLD erroneously [BZ #30163]
Fix bug that SIGCHLD is erroneously blocked forever in the following
scenario:
1. Thread A calls system but hasn't returned yet
2. Thread B calls another system but returns
SIGCHLD would be blocked forever in thread B after its system() returns,
even after the system() in thread A returns.
Although POSIX does not require, glibc system implementation aims to be
thread and cancellation safe. This bug was introduced in
5fb7fc96350575c9adb1316833e48ca11553be49 when we moved reverting signal
mask to happen when the last concurrently running system returns,
despite that signal mask is per thread. This commit reverts this logic
and adds a test.
Signed-off-by: Adam Yi <ayi@janestreet.com>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
[DJ: Edited to use integer sleep() instead of nanosleep() dependency rabbit hole]
diff --git a/stdlib/tst-system.c b/stdlib/tst-system.c
index 634acfe264..47a0afe6bf 100644
--- a/stdlib/tst-system.c
+++ b/stdlib/tst-system.c
@@ -25,6 +25,7 @@
#include <support/check.h>
#include <support/temp_file.h>
#include <support/support.h>
+#include <support/xthread.h>
#include <support/xunistd.h>
static char *tmpdir;
@@ -71,6 +72,20 @@ call_system (void *closure)
}
}
+static void *
+sleep_and_check_sigchld (void *closure)
+{
+ double *seconds = (double *) closure;
+ char cmd[namemax];
+ sprintf (cmd, "sleep %lf" , *seconds);
+ TEST_COMPARE (system (cmd), 0);
+
+ sigset_t blocked = {0};
+ TEST_COMPARE (sigprocmask (SIG_BLOCK, NULL, &blocked), 0);
+ TEST_COMPARE (sigismember (&blocked, SIGCHLD), 0);
+ return NULL;
+}
+
static int
do_test (void)
{
@@ -154,6 +169,17 @@ do_test (void)
xchmod (_PATH_BSHELL, st.st_mode);
}
+ {
+ pthread_t long_sleep_thread = xpthread_create (NULL,
+ sleep_and_check_sigchld,
+ &(double) { 2 });
+ pthread_t short_sleep_thread = xpthread_create (NULL,
+ sleep_and_check_sigchld,
+ &(double) { 1 });
+ xpthread_join (short_sleep_thread);
+ xpthread_join (long_sleep_thread);
+ }
+
TEST_COMPARE (system (""), 0);
return 0;
diff --git a/support/shell-container.c b/support/shell-container.c
index ffa3378b5e..b1f9e793c1 100644
--- a/support/shell-container.c
+++ b/support/shell-container.c
@@ -169,6 +170,31 @@ kill_func (char **argv)
return 0;
}
+/* Emulate the "/bin/sleep" command. No suffix support. Options are
+ ignored. */
+static int
+sleep_func (char **argv)
+{
+ if (argv[0] == NULL)
+ {
+ fprintf (stderr, "sleep: missing operand\n");
+ return 1;
+ }
+ char *endptr = NULL;
+ long sec = strtol (argv[0], &endptr, 0);
+ if (endptr == argv[0] || errno == ERANGE || sec < 0)
+ {
+ fprintf (stderr, "sleep: invalid time interval '%s'\n", argv[0]);
+ return 1;
+ }
+ if (sleep (sec) < 0)
+ {
+ fprintf (stderr, "sleep: failed to nanosleep\n");
+ return 1;
+ }
+ return 0;
+}
+
/* This is a list of all the built-in commands we understand. */
static struct {
const char *name;
@@ -179,6 +206,7 @@ static struct {
{ "cp", copy_func },
{ "exit", exit_func },
{ "kill", kill_func },
+ { "sleep", sleep_func },
{ NULL, NULL }
};
diff --git a/sysdeps/posix/system.c b/sysdeps/posix/system.c
index 2335a99184..d77720a625 100644
--- a/sysdeps/posix/system.c
+++ b/sysdeps/posix/system.c
@@ -179,16 +179,16 @@ do_system (const char *line)
as if the shell had terminated using _exit(127). */
status = W_EXITCODE (127, 0);
+ /* sigaction can not fail with SIGINT/SIGQUIT used with old
+ disposition. Same applies for sigprocmask. */
DO_LOCK ();
if (SUB_REF () == 0)
{
- /* sigaction can not fail with SIGINT/SIGQUIT used with old
- disposition. Same applies for sigprocmask. */
__sigaction (SIGINT, &intr, NULL);
__sigaction (SIGQUIT, &quit, NULL);
- __sigprocmask (SIG_SETMASK, &omask, NULL);
}
DO_UNLOCK ();
+ __sigprocmask (SIG_SETMASK, &omask, NULL);
if (ret != 0)
__set_errno (ret);

View File

@ -0,0 +1,27 @@
From d03094649d39949a30513bf3ffb03a28fecbccd8 Mon Sep 17 00:00:00 2001
From: Adam Yi <ayi@janestreet.com>
Date: Wed, 8 Mar 2023 03:11:47 -0500
Subject: hurd: fix build of tst-system.c
We made tst-system.c depend on pthread, but that requires linking with
$(shared-thread-library). It does not fail under Linux because the
variable expands to nothing under Linux, but it fails for Hurd.
I tested verified via cross-compiling that "make check" now works
for Hurd.
Signed-off-by: Adam Yi <ayi@janestreet.com>
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
[DJ: Edited for RHEL 8]
diff -rup a/stdlib/Makefile b/stdlib/Makefile
--- a/stdlib/Makefile 2023-07-07 00:44:55.810981644 -0400
+++ b/stdlib/Makefile 2023-07-07 00:46:47.541411091 -0400
@@ -102,6 +102,7 @@ LDLIBS-test-atexit-race = $(shared-threa
LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
LDLIBS-test-on_exit-race = $(shared-thread-library)
+LDLIBS-tst-system = $(shared-thread-library)
LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
LDFLAGS-test-dlclose-exit-race = $(LDFLAGS-rdynamic)

View File

@ -0,0 +1,42 @@
This patch is a RHEL-only patch which modifies the custom changes
in the previous patches in this series to make the test case look
more like the upstream test case.
diff -rup a/stdlib/tst-system.c b/stdlib/tst-system.c
--- a/stdlib/tst-system.c 2023-07-10 13:37:53.089505036 -0400
+++ b/stdlib/tst-system.c 2023-07-10 14:04:03.922610279 -0400
@@ -173,10 +173,10 @@ do_test (void)
{
pthread_t long_sleep_thread = xpthread_create (NULL,
sleep_and_check_sigchld,
- &(double) { 2 });
+ &(double) { 0.2 });
pthread_t short_sleep_thread = xpthread_create (NULL,
sleep_and_check_sigchld,
- &(double) { 1 });
+ &(double) { 0.1 });
xpthread_join (short_sleep_thread);
xpthread_join (long_sleep_thread);
}
diff -rup a/support/shell-container.c b/support/shell-container.c
--- a/support/shell-container.c 2023-07-10 13:37:53.089505036 -0400
+++ b/support/shell-container.c 2023-07-10 14:03:20.392920627 -0400
@@ -182,15 +182,15 @@ sleep_func (char **argv)
return 1;
}
char *endptr = NULL;
- long sec = strtol (argv[0], &endptr, 0);
+ double sec = strtod (argv[0], &endptr);
if (endptr == argv[0] || errno == ERANGE || sec < 0)
{
fprintf (stderr, "sleep: invalid time interval '%s'\n", argv[0]);
return 1;
}
- if (sleep (sec) < 0)
+ if (usleep ((useconds_t)(sec * 1000000.0)) < 0)
{
- fprintf (stderr, "sleep: failed to nanosleep\n");
+ fprintf (stderr, "sleep: failed to usleep: %s\n", strerror (errno));
return 1;
}
return 0;

View File

@ -0,0 +1,70 @@
commit 801af9fafd4689337ebf27260aa115335a0cb2bc
Author: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
Date: Sat Feb 4 14:41:38 2023 +0300
gmon: Fix allocated buffer overflow (bug 29444)
The `__monstartup()` allocates a buffer used to store all the data
accumulated by the monitor.
The size of this buffer depends on the size of the internal structures
used and the address range for which the monitor is activated, as well
as on the maximum density of call instructions and/or callable functions
that could be potentially on a segment of executable code.
In particular a hash table of arcs is placed at the end of this buffer.
The size of this hash table is calculated in bytes as
p->fromssize = p->textsize / HASHFRACTION;
but actually should be
p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
This results in writing beyond the end of the allocated buffer when an
added arc corresponds to a call near from the end of the monitored
address range, since `_mcount()` check the incoming caller address for
monitored range but not the intermediate result hash-like index that
uses to write into the table.
It should be noted that when the results are output to `gmon.out`, the
table is read to the last element calculated from the allocated size in
bytes, so the arcs stored outside the buffer boundary did not fall into
`gprof` for analysis. Thus this "feature" help me to found this bug
during working with https://sourceware.org/bugzilla/show_bug.cgi?id=29438
Just in case, I will explicitly note that the problem breaks the
`make test t=gmon/tst-gmon-dso` added for Bug 29438.
There, the arc of the `f3()` call disappears from the output, since in
the DSO case, the call to `f3` is located close to the end of the
monitored range.
Signed-off-by: Леонид Юрьев (Leonid Yuriev) <leo@yuriev.ru>
Another minor error seems a related typo in the calculation of
`kcountsize`, but since kcounts are smaller than froms, this is
actually to align the p->froms data.
Co-authored-by: DJ Delorie <dj@redhat.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/gmon/gmon.c b/gmon/gmon.c
index dee64803ada583d7..bf76358d5b1aa2da 100644
--- a/gmon/gmon.c
+++ b/gmon/gmon.c
@@ -132,6 +132,8 @@ __monstartup (u_long lowpc, u_long highpc)
p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER));
p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER));
p->textsize = p->highpc - p->lowpc;
+ /* This looks like a typo, but it's here to align the p->froms
+ section. */
p->kcountsize = ROUNDUP(p->textsize / HISTFRACTION, sizeof(*p->froms));
p->hashfraction = HASHFRACTION;
p->log_hashfraction = -1;
@@ -142,7 +144,7 @@ __monstartup (u_long lowpc, u_long highpc)
instead of integer division. Precompute shift amount. */
p->log_hashfraction = ffs(p->hashfraction * sizeof(*p->froms)) - 1;
}
- p->fromssize = p->textsize / HASHFRACTION;
+ p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
p->tolimit = p->textsize * ARCDENSITY / 100;
if (p->tolimit < MINARCS)
p->tolimit = MINARCS;

View File

@ -0,0 +1,477 @@
This patch adds the required @order directives to preserve the
GLIBC_PRIVATE ABI.
commit 31be941e4367c001b2009308839db5c67bf9dcbc
Author: Simon Kissane <skissane@gmail.com>
Date: Sat Feb 11 20:12:13 2023 +1100
gmon: improve mcount overflow handling [BZ# 27576]
When mcount overflows, no gmon.out file is generated, but no message is printed
to the user, leaving the user with no idea why, and thinking maybe there is
some bug - which is how BZ 27576 ended up being logged. Print a message to
stderr in this case so the user knows what is going on.
As a comment in sys/gmon.h acknowledges, the hardcoded MAXARCS value is too
small for some large applications, including the test case in that BZ. Rather
than increase it, add tunables to enable MINARCS and MAXARCS to be overridden
at runtime (glibc.gmon.minarcs and glibc.gmon.maxarcs). So if a user gets the
mcount overflow error, they can try increasing maxarcs (they might need to
increase minarcs too if the heuristic is wrong in their case.)
Note setting minarcs/maxarcs too large can cause monstartup to fail with an
out of memory error. If you set them large enough, it can cause an integer
overflow in calculating the buffer size. I haven't done anything to defend
against that - it would not generally be a security vulnerability, since these
tunables will be ignored in suid/sgid programs (due to the SXID_ERASE default),
and if you can set GLIBC_TUNABLES in the environment of a process, you can take
it over anyway (LD_PRELOAD, LD_LIBRARY_PATH, etc). I thought about modifying
the code of monstartup to defend against integer overflows, but doing so is
complicated, and I realise the existing code is susceptible to them even prior
to this change (e.g. try passing a pathologically large highpc argument to
monstartup), so I decided just to leave that possibility in-place.
Add a test case which demonstrates mcount overflow and the tunables.
Document the new tunables in the manual.
Signed-off-by: Simon Kissane <skissane@gmail.com>
Reviewed-by: DJ Delorie <dj@redhat.com>
Conflicts:
manual/tunables.texi
(missing tunables downstream)
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index f11ca5b3e8b09b43..dc2999796042dbaf 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -149,4 +149,17 @@ glibc {
default: 2
}
}
+
+ gmon {
+ minarcs {
+ type: INT_32
+ minval: 50
+ default: 50
+ }
+ maxarcs {
+ type: INT_32
+ minval: 50
+ default: 1048576
+ }
+ }
}
diff --git a/gmon/Makefile b/gmon/Makefile
index d94593c9d8a882eb..54f05894d4dd8c4a 100644
--- a/gmon/Makefile
+++ b/gmon/Makefile
@@ -25,7 +25,7 @@ include ../Makeconfig
headers := sys/gmon.h sys/gmon_out.h sys/profil.h
routines := gmon mcount profil sprofil prof-freq
-tests = tst-sprofil tst-gmon
+tests = tst-sprofil tst-gmon tst-mcount-overflow
ifeq ($(build-profile),yes)
tests += tst-profile-static
tests-static += tst-profile-static
@@ -56,6 +56,18 @@ ifeq ($(run-built-tests),yes)
tests-special += $(objpfx)tst-gmon-gprof.out
endif
+CFLAGS-tst-mcount-overflow.c := -fno-omit-frame-pointer -pg
+tst-mcount-overflow-no-pie = yes
+CRT-tst-mcount-overflow := $(csu-objpfx)g$(start-installed-name)
+# Intentionally use invalid config where maxarcs<minarcs to check warning is printed
+tst-mcount-overflow-ENV := GMON_OUT_PREFIX=$(objpfx)tst-mcount-overflow.data \
+ GLIBC_TUNABLES=glibc.gmon.minarcs=51:glibc.gmon.maxarcs=50
+# Send stderr into output file because we make sure expected messages are printed
+tst-mcount-overflow-ARGS := 2>&1 1>/dev/null | cat
+ifeq ($(run-built-tests),yes)
+tests-special += $(objpfx)tst-mcount-overflow-check.out
+endif
+
CFLAGS-tst-gmon-static.c := $(PIE-ccflag) -fno-omit-frame-pointer -pg
CRT-tst-gmon-static := $(csu-objpfx)gcrt1.o
tst-gmon-static-no-pie = yes
@@ -103,6 +115,14 @@ $(objpfx)tst-gmon.out: clean-tst-gmon-data
clean-tst-gmon-data:
rm -f $(objpfx)tst-gmon.data.*
+$(objpfx)tst-mcount-overflow.o: clean-tst-mcount-overflow-data
+clean-tst-mcount-overflow-data:
+ rm -f $(objpfx)tst-mcount-overflow.data.*
+
+$(objpfx)tst-mcount-overflow-check.out: tst-mcount-overflow-check.sh $(objpfx)tst-mcount-overflow.out
+ $(SHELL) $< $(objpfx)tst-mcount-overflow > $@; \
+ $(evaluate-test)
+
$(objpfx)tst-gmon-gprof.out: tst-gmon-gprof.sh $(objpfx)tst-gmon.out
$(SHELL) $< $(GPROF) $(objpfx)tst-gmon $(objpfx)tst-gmon.data.* > $@; \
$(evaluate-test)
diff --git a/gmon/gmon.c b/gmon/gmon.c
index bf76358d5b1aa2da..689bf80141e559ca 100644
--- a/gmon/gmon.c
+++ b/gmon/gmon.c
@@ -46,6 +46,11 @@
#include <libc-internal.h>
#include <not-cancel.h>
+#if HAVE_TUNABLES
+# define TUNABLE_NAMESPACE gmon
+# include <elf/dl-tunables.h>
+#endif
+
#ifdef PIC
# include <link.h>
@@ -124,6 +129,22 @@ __monstartup (u_long lowpc, u_long highpc)
int o;
char *cp;
struct gmonparam *p = &_gmonparam;
+ long int minarcs, maxarcs;
+
+#if HAVE_TUNABLES
+ /* Read minarcs/maxarcs tunables. */
+ minarcs = TUNABLE_GET (minarcs, int32_t, NULL);
+ maxarcs = TUNABLE_GET (maxarcs, int32_t, NULL);
+ if (maxarcs < minarcs)
+ {
+ ERR("monstartup: maxarcs < minarcs, setting maxarcs = minarcs\n");
+ maxarcs = minarcs;
+ }
+#else
+ /* No tunables, we use hardcoded defaults */
+ minarcs = MINARCS;
+ maxarcs = MAXARCS;
+#endif
/*
* round lowpc and highpc to multiples of the density we're using
@@ -146,10 +167,10 @@ __monstartup (u_long lowpc, u_long highpc)
}
p->fromssize = ROUNDUP(p->textsize / HASHFRACTION, sizeof(*p->froms));
p->tolimit = p->textsize * ARCDENSITY / 100;
- if (p->tolimit < MINARCS)
- p->tolimit = MINARCS;
- else if (p->tolimit > MAXARCS)
- p->tolimit = MAXARCS;
+ if (p->tolimit < minarcs)
+ p->tolimit = minarcs;
+ else if (p->tolimit > maxarcs)
+ p->tolimit = maxarcs;
p->tossize = p->tolimit * sizeof(struct tostruct);
cp = calloc (p->kcountsize + p->fromssize + p->tossize, 1);
diff --git a/gmon/mcount.c b/gmon/mcount.c
index 9d4a1a50fa6ab21a..f7180fdb83399a14 100644
--- a/gmon/mcount.c
+++ b/gmon/mcount.c
@@ -41,6 +41,10 @@ static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
#include <atomic.h>
+#include <not-cancel.h>
+#include <unistd.h>
+#define ERR(s) __write_nocancel (STDERR_FILENO, s, sizeof (s) - 1)
+
/*
* mcount is called on entry to each function compiled with the profiling
* switch set. _mcount(), which is declared in a machine-dependent way
@@ -170,6 +174,7 @@ done:
return;
overflow:
p->state = GMON_PROF_ERROR;
+ ERR("mcount: call graph buffer size limit exceeded, gmon.out will not be generated\n");
return;
}
diff --git a/gmon/sys/gmon.h b/gmon/sys/gmon.h
index b4cc3b043a2aec77..af0582a3717085b5 100644
--- a/gmon/sys/gmon.h
+++ b/gmon/sys/gmon.h
@@ -111,6 +111,8 @@ extern struct __bb *__bb_head;
* Always allocate at least this many tostructs. This
* hides the inadequacy of the ARCDENSITY heuristic, at least
* for small programs.
+ *
+ * Value can be overridden at runtime by glibc.gmon.minarcs tunable.
*/
#define MINARCS 50
@@ -124,8 +126,8 @@ extern struct __bb *__bb_head;
* Used to be max representable value of ARCINDEX minus 2, but now
* that ARCINDEX is a long, that's too large; we don't really want
* to allow a 48 gigabyte table.
- * The old value of 1<<16 wasn't high enough in practice for large C++
- * programs; will 1<<20 be adequate for long? FIXME
+ *
+ * Value can be overridden at runtime by glibc.gmon.maxarcs tunable.
*/
#define MAXARCS (1 << 20)
diff --git a/gmon/tst-mcount-overflow-check.sh b/gmon/tst-mcount-overflow-check.sh
new file mode 100644
index 0000000000000000..27eb5538fd573a6e
--- /dev/null
+++ b/gmon/tst-mcount-overflow-check.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+# Test expected messages generated when mcount overflows
+# Copyright (C) 2017-2023 Free Software Foundation, Inc.
+# Copyright The GNU Toolchain Authors.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+LC_ALL=C
+export LC_ALL
+set -e
+exec 2>&1
+
+program="$1"
+
+check_msg() {
+ if ! grep -q "$1" "$program.out"; then
+ echo "FAIL: expected message not in output: $1"
+ exit 1
+ fi
+}
+
+check_msg 'monstartup: maxarcs < minarcs, setting maxarcs = minarcs'
+check_msg 'mcount: call graph buffer size limit exceeded, gmon.out will not be generated'
+
+for data_file in $1.data.*; do
+ if [ -f "$data_file" ]; then
+ echo "FAIL: expected no data files, but found $data_file"
+ exit 1
+ fi
+done
+
+echo PASS
diff --git a/gmon/tst-mcount-overflow.c b/gmon/tst-mcount-overflow.c
new file mode 100644
index 0000000000000000..06cc93ef872eb7c1
--- /dev/null
+++ b/gmon/tst-mcount-overflow.c
@@ -0,0 +1,72 @@
+/* Test program to trigger mcount overflow in profiling collection.
+ Copyright (C) 2017-2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Program with sufficiently complex, yet pointless, call graph
+ that it will trigger an mcount overflow, when you set the
+ minarcs/maxarcs tunables to very low values. */
+
+#define PREVENT_TAIL_CALL asm volatile ("")
+
+/* Calls REP(n) macro 16 times, for n=0..15.
+ * You need to define REP(n) before using this.
+ */
+#define REPS \
+ REP(0) REP(1) REP(2) REP(3) REP(4) REP(5) REP(6) REP(7) \
+ REP(8) REP(9) REP(10) REP(11) REP(12) REP(13) REP(14) REP(15)
+
+/* Defines 16 leaf functions named f1_0 to f1_15 */
+#define REP(n) \
+ __attribute__ ((noinline, noclone, weak)) void f1_##n (void) {};
+REPS
+#undef REP
+
+/* Calls all 16 leaf functions f1_* in succession */
+__attribute__ ((noinline, noclone, weak)) void
+f2 (void)
+{
+# define REP(n) f1_##n();
+ REPS
+# undef REP
+ PREVENT_TAIL_CALL;
+}
+
+/* Defines 16 functions named f2_0 to f2_15, which all just call f2 */
+#define REP(n) \
+ __attribute__ ((noinline, noclone, weak)) void \
+ f2_##n (void) { f2(); PREVENT_TAIL_CALL; };
+REPS
+#undef REP
+
+__attribute__ ((noinline, noclone, weak)) void
+f3 (int count)
+{
+ for (int i = 0; i < count; ++i)
+ {
+ /* Calls f1_0(), f2_0(), f1_1(), f2_1(), f3_0(), etc */
+# define REP(n) f1_##n(); f2_##n();
+ REPS
+# undef REP
+ }
+}
+
+int
+main (void)
+{
+ f3 (1000);
+ return 0;
+}
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 7b70e80391ee87f7..00eafcf44b562b9e 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -73,6 +73,9 @@ glibc.malloc.check: 0 (min: 0, max: 3)
* Elision Tunables:: Tunables in elision subsystem
* Hardware Capability Tunables:: Tunables that modify the hardware
capabilities seen by @theglibc{}
+* gmon Tunables:: Tunables that control the gmon profiler, used in
+ conjunction with gprof
+
@end menu
@node Tunable names
@@ -506,3 +509,59 @@ instead.
This tunable is specific to i386 and x86-64.
@end deftp
+
+@node gmon Tunables
+@section gmon Tunables
+@cindex gmon tunables
+
+@deftp {Tunable namespace} glibc.gmon
+This tunable namespace affects the behaviour of the gmon profiler.
+gmon is a component of @theglibc{} which is normally used in
+conjunction with gprof.
+
+When GCC compiles a program with the @code{-pg} option, it instruments
+the program with calls to the @code{mcount} function, to record the
+program's call graph. At program startup, a memory buffer is allocated
+to store this call graph; the size of the buffer is calculated using a
+heuristic based on code size. If during execution, the buffer is found
+to be too small, profiling will be aborted and no @file{gmon.out} file
+will be produced. In that case, you will see the following message
+printed to standard error:
+
+@example
+mcount: call graph buffer size limit exceeded, gmon.out will not be generated
+@end example
+
+Most of the symbols discussed in this section are defined in the header
+@code{sys/gmon.h}. However, some symbols (for example @code{mcount})
+are not defined in any header file, since they are only intended to be
+called from code generated by the compiler.
+@end deftp
+
+@deftp Tunable glibc.mem.minarcs
+The heuristic for sizing the call graph buffer is known to be
+insufficient for small programs; hence, the calculated value is clamped
+to be at least a minimum size. The default minimum (in units of
+call graph entries, @code{struct tostruct}), is given by the macro
+@code{MINARCS}. If you have some program with an unusually complex
+call graph, for which the heuristic fails to allocate enough space,
+you can use this tunable to increase the minimum to a larger value.
+@end deftp
+
+@deftp Tunable glibc.mem.maxarcs
+To prevent excessive memory consumption when profiling very large
+programs, the call graph buffer is allowed to have a maximum of
+@code{MAXARCS} entries. For some very large programs, the default
+value of @code{MAXARCS} defined in @file{sys/gmon.h} is too small; in
+that case, you can use this tunable to increase it.
+
+Note the value of the @code{maxarcs} tunable must be greater or equal
+to that of the @code{minarcs} tunable; if this constraint is violated,
+a warning will printed to standard error at program startup, and
+the @code{minarcs} value will be used as the maximum as well.
+
+Setting either tunable too high may result in a call graph buffer
+whose size exceeds the available memory; in that case, an out of memory
+error will be printed at program startup, the profiler will be
+disabled, and no @file{gmon.out} file will be generated.
+@end deftp
diff --git a/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list b/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list
index 5c3c5292025607a1..265f82ef2be42fd0 100644
--- a/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/aarch64/dl-tunables.list
@@ -24,3 +24,7 @@
# Tunables added in RHEL 8.8.0
@order glibc.rtld.dynamic_sort
+
+# Tunables added in RHEL 8.9.0
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs
diff --git a/sysdeps/unix/sysv/linux/i386/dl-tunables.list b/sysdeps/unix/sysv/linux/i386/dl-tunables.list
index b9cad4af62d9f2e5..9c1ccb86501c61e7 100644
--- a/sysdeps/unix/sysv/linux/i386/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/i386/dl-tunables.list
@@ -31,3 +31,7 @@
# Tunables added in RHEL 8.8.0
@order glibc.rtld.dynamic_sort
+
+# Tunables added in RHEL 8.9.0
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list
index ee1e6fca95e1f2da..c8bb1a8ec0283ac8 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/dl-tunables.list
@@ -24,3 +24,7 @@
# Tunables added in RHEL 8.8.0
@order glibc.rtld.dynamic_sort
+
+# Tunables added in RHEL 8.9.0
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list b/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
index 099e28d8f8e67944..85b3a014ffcadc45 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/dl-tunables.list
@@ -23,3 +23,7 @@
# Tunables added in RHEL 8.8.0
@order glibc.rtld.dynamic_sort
+
+# Tunables added in RHEL 8.9.0
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list b/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list
index b9cad4af62d9f2e5..9c1ccb86501c61e7 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list
+++ b/sysdeps/unix/sysv/linux/x86_64/64/dl-tunables.list
@@ -31,3 +31,7 @@
# Tunables added in RHEL 8.8.0
@order glibc.rtld.dynamic_sort
+
+# Tunables added in RHEL 8.9.0
+@order glibc.gmon.minarcs
+@order glibc.gmon.maxarcs

View File

@ -0,0 +1,191 @@
commit bde121872001d8f3224eeafa5b7effb871c3fbca
Author: Simon Kissane <skissane@gmail.com>
Date: Sat Feb 11 08:58:02 2023 +1100
gmon: fix memory corruption issues [BZ# 30101]
V2 of this patch fixes an issue in V1, where the state was changed to ON not
OFF at end of _mcleanup. I hadn't noticed that (counterintuitively) ON=0 and
OFF=3, hence zeroing the buffer turned it back on. So set the state to OFF
after the memset.
1. Prevent double free, and reads from unallocated memory, when
_mcleanup is (incorrectly) called two or more times in a row,
without an intervening call to __monstartup; with this patch, the
second and subsequent calls effectively become no-ops instead.
While setting tos=NULL is minimal fix, safest action is to zero the
whole gmonparam buffer.
2. Prevent memory leak when __monstartup is (incorrectly) called two
or more times in a row, without an intervening call to _mcleanup;
with this patch, the second and subsequent calls effectively become
no-ops instead.
3. After _mcleanup, treat __moncontrol(1) as __moncontrol(0) instead.
With zeroing of gmonparam buffer in _mcleanup, this stops the
state incorrectly being changed to GMON_PROF_ON despite profiling
actually being off. If we'd just done the minimal fix to _mcleanup
of setting tos=NULL, there is risk of far worse memory corruption:
kcount would point to deallocated memory, and the __profil syscall
would make the kernel write profiling data into that memory,
which could have since been reallocated to something unrelated.
4. Ensure __moncontrol(0) still turns off profiling even in error
state. Otherwise, if mcount overflows and sets state to
GMON_PROF_ERROR, when _mcleanup calls __moncontrol(0), the __profil
syscall to disable profiling will not be invoked. _mcleanup will
free the buffer, but the kernel will still be writing profiling
data into it, potentially corrupted arbitrary memory.
Also adds a test case for (1). Issues (2)-(4) are not feasible to test.
Signed-off-by: Simon Kissane <skissane@gmail.com>
Reviewed-by: DJ Delorie <dj@redhat.com>
Conflicts:
gmon/Makefile
(copyright year update)
diff --git a/gmon/Makefile b/gmon/Makefile
index 54f05894d4dd8c4a..1bc4ad6e14e292a9 100644
--- a/gmon/Makefile
+++ b/gmon/Makefile
@@ -1,4 +1,5 @@
-# Copyright (C) 1995-2018 Free Software Foundation, Inc.
+# Copyright (C) 1995-2023 Free Software Foundation, Inc.
+# Copyright The GNU Toolchain Authors.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
@@ -25,7 +26,7 @@ include ../Makeconfig
headers := sys/gmon.h sys/gmon_out.h sys/profil.h
routines := gmon mcount profil sprofil prof-freq
-tests = tst-sprofil tst-gmon tst-mcount-overflow
+tests = tst-sprofil tst-gmon tst-mcount-overflow tst-mcleanup
ifeq ($(build-profile),yes)
tests += tst-profile-static
tests-static += tst-profile-static
@@ -68,6 +69,14 @@ ifeq ($(run-built-tests),yes)
tests-special += $(objpfx)tst-mcount-overflow-check.out
endif
+CFLAGS-tst-mcleanup.c := -fno-omit-frame-pointer -pg
+tst-mcleanup-no-pie = yes
+CRT-tst-mcleanup := $(csu-objpfx)g$(start-installed-name)
+tst-mcleanup-ENV := GMON_OUT_PREFIX=$(objpfx)tst-mcleanup.data
+ifeq ($(run-built-tests),yes)
+tests-special += $(objpfx)tst-mcleanup.out
+endif
+
CFLAGS-tst-gmon-static.c := $(PIE-ccflag) -fno-omit-frame-pointer -pg
CRT-tst-gmon-static := $(csu-objpfx)gcrt1.o
tst-gmon-static-no-pie = yes
@@ -123,6 +132,10 @@ $(objpfx)tst-mcount-overflow-check.out: tst-mcount-overflow-check.sh $(objpfx)ts
$(SHELL) $< $(objpfx)tst-mcount-overflow > $@; \
$(evaluate-test)
+$(objpfx)tst-mcleanup.out: clean-tst-mcleanup-data
+clean-tst-mcleanup-data:
+ rm -f $(objpfx)tst-mcleanup.data.*
+
$(objpfx)tst-gmon-gprof.out: tst-gmon-gprof.sh $(objpfx)tst-gmon.out
$(SHELL) $< $(GPROF) $(objpfx)tst-gmon $(objpfx)tst-gmon.data.* > $@; \
$(evaluate-test)
diff --git a/gmon/gmon.c b/gmon/gmon.c
index 689bf80141e559ca..5e99a7351dc71666 100644
--- a/gmon/gmon.c
+++ b/gmon/gmon.c
@@ -102,11 +102,8 @@ __moncontrol (int mode)
{
struct gmonparam *p = &_gmonparam;
- /* Don't change the state if we ran into an error. */
- if (p->state == GMON_PROF_ERROR)
- return;
-
- if (mode)
+ /* Treat start request as stop if error or gmon not initialized. */
+ if (mode && p->state != GMON_PROF_ERROR && p->tos != NULL)
{
/* start */
__profil((void *) p->kcount, p->kcountsize, p->lowpc, s_scale);
@@ -116,7 +113,9 @@ __moncontrol (int mode)
{
/* stop */
__profil(NULL, 0, 0, 0);
- p->state = GMON_PROF_OFF;
+ /* Don't change the state if we ran into an error. */
+ if (p->state != GMON_PROF_ERROR)
+ p->state = GMON_PROF_OFF;
}
}
libc_hidden_def (__moncontrol)
@@ -146,6 +145,14 @@ __monstartup (u_long lowpc, u_long highpc)
maxarcs = MAXARCS;
#endif
+ /*
+ * If we are incorrectly called twice in a row (without an
+ * intervening call to _mcleanup), ignore the second call to
+ * prevent leaking memory.
+ */
+ if (p->tos != NULL)
+ return;
+
/*
* round lowpc and highpc to multiples of the density we're using
* so the rest of the scaling (here and in gprof) stays in ints.
@@ -463,9 +470,14 @@ _mcleanup (void)
{
__moncontrol (0);
- if (_gmonparam.state != GMON_PROF_ERROR)
+ if (_gmonparam.state != GMON_PROF_ERROR && _gmonparam.tos != NULL)
write_gmon ();
/* free the memory. */
free (_gmonparam.tos);
+
+ /* reset buffer to initial state for safety */
+ memset(&_gmonparam, 0, sizeof _gmonparam);
+ /* somewhat confusingly, ON=0, OFF=3 */
+ _gmonparam.state = GMON_PROF_OFF;
}
diff --git a/gmon/tst-mcleanup.c b/gmon/tst-mcleanup.c
new file mode 100644
index 0000000000000000..b259653ec833aca4
--- /dev/null
+++ b/gmon/tst-mcleanup.c
@@ -0,0 +1,31 @@
+/* Test program for repeated invocation of _mcleanup
+ Copyright The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Intentionally calls _mcleanup() twice: once manually, it will be
+ called again as an atexit handler. This is incorrect use of the API,
+ but the point of the test is to make sure we don't crash when the
+ API is misused in this way. */
+
+#include <sys/gmon.h>
+
+int
+main (void)
+{
+ _mcleanup();
+ return 0;
+}

View File

@ -0,0 +1,216 @@
From af992e7abdc9049714da76cae1e5e18bc4838fb8 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Wed, 7 Jun 2023 13:18:01 -0500
Subject: [PATCH] x86: Increase `non_temporal_threshold` to roughly `sizeof_L3
/ 4`
Content-type: text/plain; charset=UTF-8
Current `non_temporal_threshold` set to roughly '3/4 * sizeof_L3 /
ncores_per_socket'. This patch updates that value to roughly
'sizeof_L3 / 4`
The original value (specifically dividing the `ncores_per_socket`) was
done to limit the amount of other threads' data a `memcpy`/`memset`
could evict.
Dividing by 'ncores_per_socket', however leads to exceedingly low
non-temporal thresholds and leads to using non-temporal stores in
cases where REP MOVSB is multiple times faster.
Furthermore, non-temporal stores are written directly to main memory
so using it at a size much smaller than L3 can place soon to be
accessed data much further away than it otherwise could be. As well,
modern machines are able to detect streaming patterns (especially if
REP MOVSB is used) and provide LRU hints to the memory subsystem. This
in affect caps the total amount of eviction at 1/cache_associativity,
far below meaningfully thrashing the entire cache.
As best I can tell, the benchmarks that lead this small threshold
where done comparing non-temporal stores versus standard cacheable
stores. A better comparison (linked below) is to be REP MOVSB which,
on the measure systems, is nearly 2x faster than non-temporal stores
at the low-end of the previous threshold, and within 10% for over
100MB copies (well past even the current threshold). In cases with a
low number of threads competing for bandwidth, REP MOVSB is ~2x faster
up to `sizeof_L3`.
The divisor of `4` is a somewhat arbitrary value. From benchmarks it
seems Skylake and Icelake both prefer a divisor of `2`, but older CPUs
such as Broadwell prefer something closer to `8`. This patch is meant
to be followed up by another one to make the divisor cpu-specific, but
in the meantime (and for easier backporting), this patch settles on
`4` as a middle-ground.
Benchmarks comparing non-temporal stores, REP MOVSB, and cacheable
stores where done using:
https://github.com/goldsteinn/memcpy-nt-benchmarks
Sheets results (also available in pdf on the github):
https://docs.google.com/spreadsheets/d/e/2PACX-1vS183r0rW_jRX6tG_E90m9qVuFiMbRIJvi5VAE8yYOvEOIEEc3aSNuEsrFbuXw5c3nGboxMmrupZD7K/pubhtml
Reviewed-by: DJ Delorie <dj@redhat.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
---
sysdeps/x86/dl-cacheinfo.h | 70 +++++++++++++++++++++++---------------
1 file changed, 43 insertions(+), 27 deletions(-)
[DJ - ported to C8S]
diff -rup a/sysdeps/x86/cacheinfo.h b/sysdeps/x86/cacheinfo.h
--- a/sysdeps/x86/cacheinfo.h 2023-08-08 11:54:09.969791421 -0400
+++ b/sysdeps/x86/cacheinfo.h 2023-08-08 13:44:55.185333601 -0400
@@ -46,7 +46,7 @@ long int __x86_rep_movsb_threshold attri
long int __x86_rep_stosb_threshold attribute_hidden = 2048;
static void
-get_common_cache_info (long int *shared_ptr, unsigned int *threads_ptr,
+get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, unsigned int *threads_ptr,
long int core)
{
unsigned int eax;
@@ -65,6 +65,7 @@ get_common_cache_info (long int *shared_
unsigned int family = cpu_features->basic.family;
unsigned int model = cpu_features->basic.model;
long int shared = *shared_ptr;
+ long int shared_per_thread = *shared_per_thread_ptr;
unsigned int threads = *threads_ptr;
bool inclusive_cache = true;
bool support_count_mask = true;
@@ -80,6 +81,7 @@ get_common_cache_info (long int *shared_
/* Try L2 otherwise. */
level = 2;
shared = core;
+ shared_per_thread = core;
threads_l2 = 0;
threads_l3 = -1;
}
@@ -236,29 +238,28 @@ get_common_cache_info (long int *shared_
}
else
{
-intel_bug_no_cache_info:
- /* Assume that all logical threads share the highest cache
- level. */
- threads
- = ((cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx
- >> 16) & 0xff);
- }
-
- /* Cap usage of highest cache level to the number of supported
- threads. */
- if (shared > 0 && threads > 0)
- shared /= threads;
+ intel_bug_no_cache_info:
+ /* Assume that all logical threads share the highest cache
+ level. */
+ threads = ((cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx >> 16)
+ & 0xff);
+
+ /* Get per-thread size of highest level cache. */
+ if (shared_per_thread > 0 && threads > 0)
+ shared_per_thread /= threads;
+ }
}
/* Account for non-inclusive L2 and L3 caches. */
if (!inclusive_cache)
{
if (threads_l2 > 0)
- core /= threads_l2;
+ shared_per_thread += core / threads_l2;
shared += core;
}
*shared_ptr = shared;
+ *shared_per_thread_ptr = shared_per_thread;
*threads_ptr = threads;
}
@@ -272,6 +273,7 @@ init_cacheinfo (void)
int max_cpuid_ex;
long int data = -1;
long int shared = -1;
+ long int shared_per_thread = -1;
long int core;
unsigned int threads = 0;
const struct cpu_features *cpu_features = __get_cpu_features ();
@@ -287,22 +289,25 @@ init_cacheinfo (void)
data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
+ shared_per_thread = shared;
- get_common_cache_info (&shared, &threads, core);
+ get_common_cache_info (&shared, &shared_per_thread, &threads, core);
}
else if (cpu_features->basic.kind == arch_kind_zhaoxin)
{
data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE);
core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE);
shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE);
+ shared_per_thread = shared;
- get_common_cache_info (&shared, &threads, core);
+ get_common_cache_info (&shared, &shared_per_thread, &threads, core);
}
else if (cpu_features->basic.kind == arch_kind_amd)
{
data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
+ shared_per_thread = shared;
/* Get maximum extended function. */
__cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
@@ -352,6 +357,9 @@ init_cacheinfo (void)
shared += core;
}
}
+
+ if (shared_per_thread <= 0)
+ shared_per_thread = shared;
}
if (cpu_features->data_cache_size != 0)
@@ -380,20 +388,30 @@ init_cacheinfo (void)
__x86_shared_cache_size = shared;
}
- /* The default setting for the non_temporal threshold is 3/4 of one
- thread's share of the chip's cache. For most Intel and AMD processors
- with an initial release date between 2017 and 2020, a thread's typical
- share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4
- threshold leaves 125 KBytes to 500 KBytes of the thread's data
- in cache after a maximum temporal copy, which will maintain
- in cache a reasonable portion of the thread's stack and other
- active data. If the threshold is set higher than one thread's
- share of the cache, it has a substantial risk of negatively
- impacting the performance of other threads running on the chip. */
+ /* The default setting for the non_temporal threshold is 1/4 of size
+ of the chip's cache. For most Intel and AMD processors with an
+ initial release date between 2017 and 2023, a thread's typical
+ share of the cache is from 18-64MB. Using the 1/4 L3 is meant to
+ estimate the point where non-temporal stores begin out-competing
+ REP MOVSB. As well the point where the fact that non-temporal
+ stores are forced back to main memory would already occurred to the
+ majority of the lines in the copy. Note, concerns about the
+ entire L3 cache being evicted by the copy are mostly alleviated
+ by the fact that modern HW detects streaming patterns and
+ provides proper LRU hints so that the maximum thrashing
+ capped at 1/associativity. */
+ unsigned long int non_temporal_threshold = shared / 4;
+ /* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run
+ a higher risk of actually thrashing the cache as they don't have a HW LRU
+ hint. As well, their performance in highly parallel situations is
+ noticeably worse. */
+ if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+ non_temporal_threshold = shared_per_thread * 3 / 4;
+
__x86_shared_non_temporal_threshold
= (cpu_features->non_temporal_threshold != 0
? cpu_features->non_temporal_threshold
- : __x86_shared_cache_size * 3 / 4);
+ : non_temporal_threshold);
/* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */
unsigned int minimum_rep_movsb_threshold;
Only in b/sysdeps/x86: cacheinfo.h~

View File

@ -0,0 +1,47 @@
From 47f747217811db35854ea06741be3685e8bbd44d Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Mon, 17 Jul 2023 23:14:33 -0500
Subject: [PATCH] x86: Fix slight bug in `shared_per_thread` cache size
calculation.
Content-type: text/plain; charset=UTF-8
After:
```
commit af992e7abdc9049714da76cae1e5e18bc4838fb8
Author: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Wed Jun 7 13:18:01 2023 -0500
x86: Increase `non_temporal_threshold` to roughly `sizeof_L3 / 4`
```
Split `shared` (cumulative cache size) from `shared_per_thread` (cache
size per socket), the `shared_per_thread` *can* be slightly off from
the previous calculation.
Previously we added `core` even if `threads_l2` was invalid, and only
used `threads_l2` to divide `core` if it was present. The changed
version only included `core` if `threads_l2` was valid.
This change restores the old behavior if `threads_l2` is invalid by
adding the entire value of `core`.
Reviewed-by: DJ Delorie <dj@redhat.com>
---
sysdeps/x86/dl-cacheinfo.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
[DJ - ported to C8S]
diff -rup b1/sysdeps/x86/cacheinfo.h b2/sysdeps/x86/cacheinfo.h
--- b1/sysdeps/x86/cacheinfo.h 2023-08-08 13:44:55.185333601 -0400
+++ b2/sysdeps/x86/cacheinfo.h 2023-08-08 13:55:16.474680016 -0400
@@ -253,8 +253,8 @@ get_common_cache_info (long int *shared_
/* Account for non-inclusive L2 and L3 caches. */
if (!inclusive_cache)
{
- if (threads_l2 > 0)
- shared_per_thread += core / threads_l2;
+ long int core_per_thread = threads_l2 > 0 ? (core / threads_l2) : core;
+ shared_per_thread += core_per_thread;
shared += core;
}

View File

@ -0,0 +1,44 @@
From 8b9a0af8ca012217bf90d1dc0694f85b49ae09da Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Tue, 18 Jul 2023 10:27:59 -0500
Subject: [PATCH] [PATCH v1] x86: Use `3/4*sizeof(per-thread-L3)` as low bound
for NT threshold.
Content-type: text/plain; charset=UTF-8
On some machines we end up with incomplete cache information. This can
make the new calculation of `sizeof(total-L3)/custom-divisor` end up
lower than intended (and lower than the prior value). So reintroduce
the old bound as a lower bound to avoid potentially regressing code
where we don't have complete information to make the decision.
Reviewed-by: DJ Delorie <dj@redhat.com>
---
sysdeps/x86/dl-cacheinfo.h | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
[DJ - ported to C8S]
diff -rup b2/sysdeps/x86/cacheinfo.h b3/sysdeps/x86/cacheinfo.h
--- b2/sysdeps/x86/cacheinfo.h 2023-08-08 13:55:16.474680016 -0400
+++ b3/sysdeps/x86/cacheinfo.h 2023-08-08 13:59:14.507988958 -0400
@@ -401,12 +401,20 @@ init_cacheinfo (void)
provides proper LRU hints so that the maximum thrashing
capped at 1/associativity. */
unsigned long int non_temporal_threshold = shared / 4;
+ /* If the computed non_temporal_threshold <= 3/4 * per-thread L3, we most
+ likely have incorrect/incomplete cache info in which case, default to
+ 3/4 * per-thread L3 to avoid regressions. */
+ unsigned long int non_temporal_threshold_lowbound
+ = shared_per_thread * 3 / 4;
+ if (non_temporal_threshold < non_temporal_threshold_lowbound)
+ non_temporal_threshold = non_temporal_threshold_lowbound;
+
/* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run
a higher risk of actually thrashing the cache as they don't have a HW LRU
hint. As well, their performance in highly parallel situations is
noticeably worse. */
if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS))
- non_temporal_threshold = shared_per_thread * 3 / 4;
+ non_temporal_threshold = non_temporal_threshold_lowbound;
__x86_shared_non_temporal_threshold
= (cpu_features->non_temporal_threshold != 0

View File

@ -0,0 +1,39 @@
Adjusted for backport to c8s by modifying sysdeps/x86/cacheinfo.h.
commit 885a7f0feee951f514a121788f46f33b2867110f
Author: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Fri Aug 11 12:29:11 2023 -0500
x86: Fix incorrect scope of setting `shared_per_thread` [BZ# 30745]
The:
```
if (shared_per_thread > 0 && threads > 0)
shared_per_thread /= threads;
```
Code was accidentally moved to inside the else scope. This doesn't
match how it was previously (before af992e7abd).
This patch fixes that by putting the division after the `else` block.
diff --git a/sysdeps/x86/cacheinfo.h b/sysdeps/x86/cacheinfo.h
index 4dbfa979ef052eaa..e53fa25106c95253 100644
--- a/sysdeps/x86/cacheinfo.h
+++ b/sysdeps/x86/cacheinfo.h
@@ -243,11 +243,10 @@ get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, u
level. */
threads = ((cpu_features->features[COMMON_CPUID_INDEX_1].cpuid.ebx >> 16)
& 0xff);
-
- /* Get per-thread size of highest level cache. */
- if (shared_per_thread > 0 && threads > 0)
- shared_per_thread /= threads;
}
+ /* Get per-thread size of highest level cache. */
+ if (shared_per_thread > 0 && threads > 0)
+ shared_per_thread /= threads;
}
/* Account for non-inclusive L2 and L3 caches. */

View File

@ -0,0 +1,35 @@
commit 5d1ccdda7b0c625751661d50977f3dfbc73f8eae
Author: Florian Weimer <fweimer@redhat.com>
Date: Mon Apr 3 17:23:11 2023 +0200
x86_64: Fix asm constraints in feraiseexcept (bug 30305)
The divss instruction clobbers its first argument, and the constraints
need to reflect that. Fortunately, with GCC 12, generated code does
not actually change, so there is no externally visible bug.
Suggested-by: Jakub Jelinek <jakub@redhat.com>
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
diff --git a/sysdeps/x86_64/fpu/fraiseexcpt.c b/sysdeps/x86_64/fpu/fraiseexcpt.c
index ca1c223053bf016b..fb886ed540b52100 100644
--- a/sysdeps/x86_64/fpu/fraiseexcpt.c
+++ b/sysdeps/x86_64/fpu/fraiseexcpt.c
@@ -33,7 +33,7 @@ __feraiseexcept (int excepts)
/* One example of an invalid operation is 0.0 / 0.0. */
float f = 0.0;
- __asm__ __volatile__ ("divss %0, %0 " : : "x" (f));
+ __asm__ __volatile__ ("divss %0, %0 " : "+x" (f));
(void) &f;
}
@@ -43,7 +43,7 @@ __feraiseexcept (int excepts)
float f = 1.0;
float g = 0.0;
- __asm__ __volatile__ ("divss %1, %0" : : "x" (f), "x" (g));
+ __asm__ __volatile__ ("divss %1, %0" : "+x" (f) : "x" (g));
(void) &f;
}

View File

@ -0,0 +1,28 @@
Apply a fix similar to upstream commit 5d1ccdda7b0c625751661d50977f3dfbc73f8eae
to the installed header file. Upstream, the header file has been removed
in its uncorrected state, so there is no upstream fix to backport.
Suggested by Jakub Jelinek.
diff --git a/sysdeps/x86/fpu/bits/fenv.h b/sysdeps/x86/fpu/bits/fenv.h
index 4103982d8c8ae014..4ae2d2a04c6754bd 100644
--- a/sysdeps/x86/fpu/bits/fenv.h
+++ b/sysdeps/x86/fpu/bits/fenv.h
@@ -132,7 +132,7 @@ __NTH (__feraiseexcept_invalid_divbyzero (int __excepts))
float __f = 0.0;
# ifdef __SSE_MATH__
- __asm__ __volatile__ ("divss %0, %0 " : : "x" (__f));
+ __asm__ __volatile__ ("divss %0, %0 " : "+x" (__f));
# else
__asm__ __volatile__ ("fdiv %%st, %%st(0); fwait"
: "=t" (__f) : "0" (__f));
@@ -145,7 +145,7 @@ __NTH (__feraiseexcept_invalid_divbyzero (int __excepts))
float __g = 0.0;
# ifdef __SSE_MATH__
- __asm__ __volatile__ ("divss %1, %0" : : "x" (__f), "x" (__g));
+ __asm__ __volatile__ ("divss %1, %0" : "+x" (__f) : "x" (__g));
# else
__asm__ __volatile__ ("fdivp %%st, %%st(1); fwait"
: "=t" (__f) : "0" (__f), "u" (__g) : "st(1)");

View File

@ -0,0 +1,46 @@
Only backport po/it.po and po/ja.po changes for the ESTALE message
translation which we use during CI testing.
commit 7ff33eca6860648fb909df954da4996ce853d01d
Author: Carlos O'Donell <carlos@redhat.com>
Date: Fri Jul 7 11:27:08 2023 -0400
Translations: Add new ro support and update others.
This brings in the new Romanian language translations, and updates
nine other translations. Important translations in this update
include the Italian and Japanese translations for ESTALE which
remove the mention of "NFS" from the error message translation.
diff --git a/po/it.po b/po/it.po
index 2750575a1082f1db..6c2be3a4df5611ff 100644
--- a/po/it.po
+++ b/po/it.po
@@ -6793,10 +6793,8 @@ msgstr "Quota disco superata"
#. TRANS Repairing this condition usually requires unmounting, possibly repairing
#. TRANS and remounting the file system.
#: sysdeps/gnu/errlist.c:788
-#, fuzzy
-#| msgid "Stale NFS file handle"
msgid "Stale file handle"
-msgstr "Gestione del file NFS interrotta"
+msgstr "Riferimento al file obsoleto"
# lf
#. TRANS An attempt was made to NFS-mount a remote file system with a file name that
diff --git a/po/ja.po b/po/ja.po
index bd9b7ffbbd3e3bf6..8fb598c5edbc5891 100644
--- a/po/ja.po
+++ b/po/ja.po
@@ -6360,10 +6360,8 @@ msgstr "ディスク使用量制限を超過しました"
#. TRANS Repairing this condition usually requires unmounting, possibly repairing
#. TRANS and remounting the file system.
#: sysdeps/gnu/errlist.c:788
-#, fuzzy
-#| msgid "Stale NFS file handle"
msgid "Stale file handle"
-msgstr "実効性のないNFSファイルハンドルです"
+msgstr "古いファイルハンドルです"
#. TRANS An attempt was made to NFS-mount a remote file system with a file name that
#. TRANS already specifies an NFS-mounted file.

View File

@ -0,0 +1,26 @@
From abcf8db7fa46b73fd5b8193ce11f9312301b84c7 Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Wed, 7 Jun 2023 11:21:48 +0200
Subject: resolv_conf: release lock on allocation failure (bug 30527)
When the initial allocation of global fails, the local lock is left
locked.
Reported by Steffen Lammel of SAP HANA development.
diff --git a/resolv/resolv_conf.c b/resolv/resolv_conf.c
index bd5890773b..8bc9edc634 100644
--- a/resolv/resolv_conf.c
+++ b/resolv/resolv_conf.c
@@ -93,7 +93,10 @@ get_locked_global (void)
{
global_copy = calloc (1, sizeof (*global));
if (global_copy == NULL)
- return NULL;
+ {
+ __libc_lock_unlock (lock);
+ return NULL;
+ }
atomic_store_relaxed (&global, global_copy);
resolv_conf_array_init (&global_copy->array);
}

View File

@ -0,0 +1,27 @@
commit 0fda2a41baf7e978d07322aa278e964f4dce8802
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Jul 20 18:31:48 2023 +0200
debug: Mark libSegFault.so as NODELETE
The signal handler installed in the ELF constructor cannot easily
be removed again (because the program may have changed handlers
in the meantime). Mark the object as NODELETE so that the registered
handler function is never unloaded.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
(cherry picked from commit 23ee92deea4c99d0e6a5f48fa7b942909b123ec5)
diff --git a/debug/Makefile b/debug/Makefile
index b0f0b7beb6d5cef5..8bce89ddcd0a61ed 100644
--- a/debug/Makefile
+++ b/debug/Makefile
@@ -213,6 +213,8 @@ extra-libs-others = $(extra-libs)
libSegFault-routines = segfault
libSegFault-inhibit-o = $(filter-out .os,$(object-suffixes))
+# libSegFault.so installs a signal handler in its ELF constructor.
+LDFLAGS-SegFault.so = -Wl,--enable-new-dtags,-z,nodelete
libpcprofile-routines = pcprofile
libpcprofile-inhibit-o = $(filter-out .os,$(object-suffixes))

View File

@ -1,6 +1,6 @@
%define glibcsrcdir glibc-2.28
%define glibcversion 2.28
%define glibcrelease 225%{?dist}
%define glibcrelease 236%{?dist}
# Pre-release tarballs are pulled in from git using a command that is
# effectively:
#
@ -132,7 +132,7 @@ end \
Summary: The GNU libc libraries
Name: glibc
Version: %{glibcversion}
Release: %{glibcrelease}.6
Release: %{glibcrelease}.7
# In general, GPLv2+ is used by programs, LGPLv2+ is used for
# libraries.
@ -1031,11 +1031,28 @@ Patch838: glibc-rh2142937-3.patch
Patch839: glibc-rh2144568.patch
Patch840: glibc-rh2154914-1.patch
Patch841: glibc-rh2154914-2.patch
# (Reverted fixes for rh2237433 were here.)
Patch848: glibc-rh2234713.patch
Patch849: glibc-RHEL-2434.patch
Patch850: glibc-RHEL-2422.patch
Patch851: glibc-RHEL-3035.patch
Patch842: glibc-rh2183081-1.patch
Patch843: glibc-rh2183081-2.patch
Patch844: glibc-rh2172949.patch
Patch845: glibc-rh2180155-1.patch
Patch846: glibc-rh2180155-2.patch
Patch847: glibc-rh2180155-3.patch
Patch848: glibc-rh2213909.patch
Patch849: glibc-rh2176707-1.patch
Patch850: glibc-rh2176707-2.patch
Patch851: glibc-rh2186781.patch