Overwrite target for x86_64_v2

Update patch-git.lua to handle AlmaLinux branches correctly

Add support for AlmaLinux import UBI format
This commit is contained in:
Eduard Abdullin 2026-05-19 19:10:20 +00:00 committed by root
commit 27aeab8258
235 changed files with 158007 additions and 5706 deletions

81
glibc-RHEL-111115-1.patch Normal file
View File

@ -0,0 +1,81 @@
commit 84373ef7b72c9c8ab61ce1fdfd798777715a1a52
Author: Frédéric Bérat <fberat@redhat.com>
Date: Fri Mar 7 14:42:26 2025 +0100
Prepare inet_ntop to be fortified
Rename inet_ntop to __inet_ntop and create the inet_ntop weak alias
based on it in order to prepare for disabling fortification when
available.
Reviewed-by: Florian Weimer <fweimer@redhat.com>
diff --git a/include/arpa/inet.h b/include/arpa/inet.h
index df9472ba3bc52689..d1ea13bb19d4a497 100644
--- a/include/arpa/inet.h
+++ b/include/arpa/inet.h
@@ -5,7 +5,9 @@
extern int __inet_aton_exact (const char *__cp, struct in_addr *__inp);
libc_hidden_proto (__inet_aton_exact)
-libc_hidden_proto (inet_ntop)
+extern __typeof (inet_ntop) __inet_ntop;
+libc_hidden_proto (__inet_ntop)
+
libc_hidden_proto (inet_pton)
extern __typeof (inet_pton) __inet_pton;
libc_hidden_proto (__inet_pton)
diff --git a/nss/getnameinfo.c b/nss/getnameinfo.c
index 889c0a35d29b676e..36c5401a7645d7d2 100644
--- a/nss/getnameinfo.c
+++ b/nss/getnameinfo.c
@@ -338,7 +338,7 @@ gni_host_inet_numeric (struct scratch_buffer *tmpbuf,
if (sa->sa_family == AF_INET6)
{
const struct sockaddr_in6 *sin6p = (const struct sockaddr_in6 *) sa;
- if (inet_ntop (AF_INET6, &sin6p->sin6_addr, host, hostlen) == NULL)
+ if (__inet_ntop (AF_INET6, &sin6p->sin6_addr, host, hostlen) == NULL)
return EAI_OVERFLOW;
uint32_t scopeid = sin6p->sin6_scope_id;
@@ -365,7 +365,7 @@ gni_host_inet_numeric (struct scratch_buffer *tmpbuf,
else
{
const struct sockaddr_in *sinp = (const struct sockaddr_in *) sa;
- if (inet_ntop (AF_INET, &sinp->sin_addr, host, hostlen) == NULL)
+ if (__inet_ntop (AF_INET, &sinp->sin_addr, host, hostlen) == NULL)
return EAI_OVERFLOW;
}
return 0;
diff --git a/resolv/inet_ntop.c b/resolv/inet_ntop.c
index c4d38c0f951013e5..acf5f3cb885e2e47 100644
--- a/resolv/inet_ntop.c
+++ b/resolv/inet_ntop.c
@@ -42,7 +42,7 @@ static const char *inet_ntop4 (const u_char *src, char *dst, socklen_t size);
static const char *inet_ntop6 (const u_char *src, char *dst, socklen_t size);
/* char *
- * inet_ntop(af, src, dst, size)
+ * __inet_ntop(af, src, dst, size)
* convert a network format address to presentation format.
* return:
* pointer to presentation format address (`dst'), or NULL (see errno).
@@ -50,7 +50,7 @@ static const char *inet_ntop6 (const u_char *src, char *dst, socklen_t size);
* Paul Vixie, 1996.
*/
const char *
-inet_ntop (int af, const void *src, char *dst, socklen_t size)
+__inet_ntop (int af, const void *src, char *dst, socklen_t size)
{
switch (af) {
case AF_INET:
@@ -63,7 +63,8 @@ inet_ntop (int af, const void *src, char *dst, socklen_t size)
}
/* NOTREACHED */
}
-libc_hidden_def (inet_ntop)
+libc_hidden_def (__inet_ntop)
+weak_alias (__inet_ntop, inet_ntop)
/* const char *
* inet_ntop4(src, dst, size)

26
glibc-RHEL-111115-2.patch Normal file
View File

@ -0,0 +1,26 @@
commit 3cdb99d8bb9d0008b2b297080e61d6c10dd66cc8
Author: Frédéric Bérat <fberat@redhat.com>
Date: Tue Mar 11 10:40:11 2025 +0100
Add missing guards in include/arpa/inet.h
Add the missing guards in the header, similarly to other headers at the
same level
Reviewed-by: Florian Weimer <fweimer@redhat.com>
diff --git a/include/arpa/inet.h b/include/arpa/inet.h
index d1ea13bb19d4a497..d9e55a3c7f2db9f2 100644
--- a/include/arpa/inet.h
+++ b/include/arpa/inet.h
@@ -1,3 +1,5 @@
+#ifndef _ARPA_INET_H
+/* Note: _ARPA_INET_H is defined by inet/arpa/inet.h below. */
#include <inet/arpa/inet.h>
#ifndef _ISOMAC
@@ -17,3 +19,4 @@ libc_hidden_proto (inet_netof)
extern __typeof (inet_network) __inet_network;
libc_hidden_proto (__inet_network)
#endif
+#endif

318
glibc-RHEL-111115-3.patch Normal file
View File

@ -0,0 +1,318 @@
Partial backport (without ABI changes, using libc_nonshared.a instead)
of:
commit 090dfa40a5e46f7c0e4d6e8369bcbbd51267625f
Author: Frédéric Bérat <fberat@redhat.com>
Date: Fri Mar 7 18:16:30 2025 +0100
Add _FORTIFY_SOURCE support for inet_ntop
- Create the __inet_ntop_chk routine that verifies that the builtin size
of the destination buffer is at least as big as the size given by the
user.
- Redirect calls from inet_ntop to __inet_ntop_chk or __inet_ntop_warn
- Update the abilist (Dropped) for this new routine
- Update the manual to mention the new fortification
Reviewed-by: Florian Weimer <fweimer@redhat.com>
Conflicts:
debug/inet_ntop_chk.c
(attribute_hidden added, use inet_ntop instead of __inet_ntop)
debug/Makefile
(Routine added to static-only-routines)
debug/Versions
(Dropped)
inet/bits/inet-fortified.h
(removed attribute_overloadable and clang specific handling)
sysdeps/mach/hurd/i386/libc.abilist
sysdeps/mach/hurd/x86_64/libc.abilist
sysdeps/unix/sysv/linux/aarch64/libc.abilist
sysdeps/unix/sysv/linux/alpha/libc.abilist
sysdeps/unix/sysv/linux/arc/libc.abilist
sysdeps/unix/sysv/linux/arm/be/libc.abilist
sysdeps/unix/sysv/linux/arm/le/libc.abilist
sysdeps/unix/sysv/linux/csky/libc.abilist
sysdeps/unix/sysv/linux/hppa/libc.abilist
sysdeps/unix/sysv/linux/i386/libc.abilist
sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist
sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
sysdeps/unix/sysv/linux/or1k/libc.abilist
sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
sysdeps/unix/sysv/linux/sh/be/libc.abilist
sysdeps/unix/sysv/linux/sh/le/libc.abilist
sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
(Dropped)
diff --git a/debug/Makefile b/debug/Makefile
index 76c311d2845df9c1..db9a400711a2ce91 100644
--- a/debug/Makefile
+++ b/debug/Makefile
@@ -119,7 +119,10 @@ routines = \
wmemset_chk \
wprintf_chk \
# routines
-static-only-routines := stack_chk_fail_local
+static-only-routines := \
+ inet_ntop_chk \
+ stack_chk_fail_local \
+ # static-only-routines
# Don't add stack_chk_fail_local.o to libc.a since __stack_chk_fail_local
# is an alias of __stack_chk_fail in stack_chk_fail.o.
diff --git a/debug/inet_ntop_chk.c b/debug/inet_ntop_chk.c
new file mode 100644
index 0000000000000000..8a3994dd3fc9bfe4
--- /dev/null
+++ b/debug/inet_ntop_chk.c
@@ -0,0 +1,31 @@
+/* Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <arpa/inet.h>
+#include <stdio.h>
+
+attribute_hidden
+const char *
+__inet_ntop_chk (int af, const void *src, char *dst,
+ socklen_t size, size_t dst_size)
+{
+ if (size > dst_size)
+ __chk_fail ();
+
+ return inet_ntop (af, src, dst, size);
+}
+libc_hidden_def (__inet_ntop_chk)
diff --git a/debug/tst-fortify.c b/debug/tst-fortify.c
index ae738ff10a305575..f9b97531749d363b 100644
--- a/debug/tst-fortify.c
+++ b/debug/tst-fortify.c
@@ -23,6 +23,7 @@
#include <assert.h>
#include <fcntl.h>
+#include <arpa/inet.h>
#include <limits.h>
#include <locale.h>
#include <obstack.h>
@@ -1832,6 +1833,26 @@ do_test (void)
# endif
#endif
+ struct in6_addr addr6 = {};
+ struct in_addr addr = {};
+ char addrstr6[INET6_ADDRSTRLEN];
+ char addrstr[INET_ADDRSTRLEN];
+
+ if (inet_ntop (AF_INET6, &addr6, addrstr6, sizeof (addrstr6)) == NULL)
+ FAIL ();
+ if (inet_ntop (AF_INET, &addr, addrstr, sizeof (addrstr)) == NULL)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ inet_ntop (AF_INET6, &addr6, buf, INET6_ADDRSTRLEN);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ inet_ntop (AF_INET, &addr, buf, INET_ADDRSTRLEN);
+ CHK_FAIL_END
+#endif
+
return ret;
}
diff --git a/include/arpa/inet.h b/include/arpa/inet.h
index d9e55a3c7f2db9f2..a02892f48a27454e 100644
--- a/include/arpa/inet.h
+++ b/include/arpa/inet.h
@@ -3,12 +3,18 @@
#include <inet/arpa/inet.h>
#ifndef _ISOMAC
+/* Declare functions with security checks.
+ This needs to be included unconditionally as these definition are needed even
+ when fortification is disabled in inet/arpa/inet.h. */
+#include <bits/inet-fortified-decl.h>
+
/* Variant of inet_aton which rejects trailing garbage. */
extern int __inet_aton_exact (const char *__cp, struct in_addr *__inp);
libc_hidden_proto (__inet_aton_exact)
extern __typeof (inet_ntop) __inet_ntop;
libc_hidden_proto (__inet_ntop)
+libc_hidden_proto (__inet_ntop_chk)
libc_hidden_proto (inet_pton)
extern __typeof (inet_pton) __inet_pton;
diff --git a/include/bits/inet-fortified-decl.h b/include/bits/inet-fortified-decl.h
new file mode 100644
index 0000000000000000..e6ad4d4663c61a0d
--- /dev/null
+++ b/include/bits/inet-fortified-decl.h
@@ -0,0 +1 @@
+#include <inet/bits/inet-fortified-decl.h>
diff --git a/include/bits/inet-fortified.h b/include/bits/inet-fortified.h
new file mode 100644
index 0000000000000000..abba7c57014c2a23
--- /dev/null
+++ b/include/bits/inet-fortified.h
@@ -0,0 +1 @@
+#include <inet/bits/inet-fortified.h>
diff --git a/inet/Makefile b/inet/Makefile
index cb97b45f0f9d223f..01208235c4f800bb 100644
--- a/inet/Makefile
+++ b/inet/Makefile
@@ -25,6 +25,8 @@ include ../Makeconfig
headers := \
$(wildcard arpa/*.h protocols/*.h) \
bits/in.h \
+ bits/inet-fortified-decl.h \
+ bits/inet-fortified.h \
ifaddrs.h \
netinet/ether.h \
netinet/icmp6.h \
diff --git a/inet/arpa/inet.h b/inet/arpa/inet.h
index c005340a8004dcaf..2b8eac147280306e 100644
--- a/inet/arpa/inet.h
+++ b/inet/arpa/inet.h
@@ -101,6 +101,11 @@ extern char *inet_nsap_ntoa (int __len, const unsigned char *__cp,
char *__buf) __THROW;
#endif
+#if __USE_FORTIFY_LEVEL > 0 && defined __fortify_function
+/* Include functions with security checks. */
+# include <bits/inet-fortified.h>
+#endif
+
__END_DECLS
#endif /* arpa/inet.h */
diff --git a/inet/bits/inet-fortified-decl.h b/inet/bits/inet-fortified-decl.h
new file mode 100644
index 0000000000000000..23e3cf4b2238c81a
--- /dev/null
+++ b/inet/bits/inet-fortified-decl.h
@@ -0,0 +1,35 @@
+/* Declarations of checking macros for inet functions.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _BITS_INET_FORTIFIED_DEC_H
+#define _BITS_INET_FORTIFIED_DEC_H 1
+
+#ifndef _ARPA_INET_H
+# error "Never include <bits/inet-fortified-decl.h> directly; use <arpa/inet.h> instead."
+#endif
+
+extern const char *__inet_ntop_chk (int, const void *, char *, socklen_t, size_t);
+
+extern const char *__REDIRECT_FORTIFY_NTH (__inet_ntop_alias,
+ (int, const void *, char *, socklen_t), inet_ntop);
+extern const char *__REDIRECT_NTH (__inet_ntop_chk_warn,
+ (int, const void *, char *, socklen_t, size_t), __inet_ntop_chk)
+ __warnattr ("inet_ntop called with bigger length than "
+ "size of destination buffer");
+
+#endif /* bits/inet-fortified-decl.h. */
diff --git a/inet/bits/inet-fortified.h b/inet/bits/inet-fortified.h
new file mode 100644
index 0000000000000000..af26f36ef6ae0533
--- /dev/null
+++ b/inet/bits/inet-fortified.h
@@ -0,0 +1,37 @@
+/* Checking macros for inet functions.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _BITS_INET_FORTIFIED_H
+#define _BITS_INET_FORTIFIED_H 1
+
+#ifndef _ARPA_INET_H
+# error "Never include <bits/inet-fortified.h> directly; use <arpa/inet.h> instead."
+#endif
+
+#include <bits/inet-fortified-decl.h>
+
+__fortify_function const char *
+__NTH (inet_ntop (int __af, const void * __restrict __src,
+ char *__restrict __dst, socklen_t __dst_size))
+{
+ return __glibc_fortify (inet_ntop, __dst_size, sizeof (char),
+ __glibc_objsize (__dst),
+ __af, __src, __dst, __dst_size);
+};
+
+#endif /* bits/inet-fortified.h. */
diff --git a/manual/maint.texi b/manual/maint.texi
index 04faa222e2bd2fc4..ce6a556c68925b49 100644
--- a/manual/maint.texi
+++ b/manual/maint.texi
@@ -303,6 +303,8 @@ The following functions and macros are fortified in @theglibc{}:
@item @code{getwd}
+@item @code{inet_ntop}
+
@item @code{longjmp}
@item @code{mbsnrtowcs}

471
glibc-RHEL-111115-4.patch Normal file
View File

@ -0,0 +1,471 @@
commit a71db81ed1353edd00ca2901d2fefd98c53209d3
Author: Aaron Merey <amerey@redhat.com>
Date: Thu Mar 20 11:07:05 2025 -0400
Prepare inet_pton to be fortified
Split inet_pton internals such as __inet_pton_length from the
inet_pton entry point.
This allows the internals to be built with fortification while
leaving the inet_pton entry point unchanged.
Co-authored-by: Frédéric Bérat <fberat@redhat.com>
Reviewed-by: Florian Weimer <fweimer@redhat.com>
diff --git a/resolv/Makefile b/resolv/Makefile
index abff7fc0074e893b..05fb04edf1082690 100644
--- a/resolv/Makefile
+++ b/resolv/Makefile
@@ -38,6 +38,7 @@ routines := \
inet_addr \
inet_ntop \
inet_pton \
+ inet_pton_length \
ns_makecanon \
ns_name_compress \
ns_name_length_uncompressed \
@@ -73,6 +74,11 @@ routines := \
resolv_context \
# routines
+# Exclude fortified routines from being built with _FORTIFY_SOURCE
+routines_no_fortify += \
+ inet_pton \
+ # routines_no_fortify
+
tests = tst-aton tst-leaks tst-inet_ntop
tests-container = tst-leaks2
diff --git a/resolv/inet_pton.c b/resolv/inet_pton.c
index 96ca3e4f9e1790a0..70f6fa177582f2cc 100644
--- a/resolv/inet_pton.c
+++ b/resolv/inet_pton.c
@@ -33,33 +33,7 @@
*/
#include <arpa/inet.h>
-#include <arpa/nameser.h>
-#include <ctype.h>
-#include <errno.h>
-#include <netinet/in.h>
#include <resolv/resolv-internal.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-
-static int inet_pton4 (const char *src, const char *src_end, u_char *dst);
-static int inet_pton6 (const char *src, const char *src_end, u_char *dst);
-
-int
-__inet_pton_length (int af, const char *src, size_t srclen, void *dst)
-{
- switch (af)
- {
- case AF_INET:
- return inet_pton4 (src, src + srclen, dst);
- case AF_INET6:
- return inet_pton6 (src, src + srclen, dst);
- default:
- __set_errno (EAFNOSUPPORT);
- return -1;
- }
-}
-libc_hidden_def (__inet_pton_length)
/* Like __inet_pton_length, but use strlen (SRC) as the length of
SRC. */
@@ -71,164 +45,3 @@ __inet_pton (int af, const char *src, void *dst)
libc_hidden_def (__inet_pton)
weak_alias (__inet_pton, inet_pton)
libc_hidden_weak (inet_pton)
-
-/* Like inet_aton but without all the hexadecimal, octal and shorthand
- (and trailing garbage is not ignored). Return 1 if SRC is a valid
- dotted quad, else 0. This function does not touch DST unless it's
- returning 1.
- Author: Paul Vixie, 1996. */
-static int
-inet_pton4 (const char *src, const char *end, unsigned char *dst)
-{
- int saw_digit, octets, ch;
- unsigned char tmp[NS_INADDRSZ], *tp;
-
- saw_digit = 0;
- octets = 0;
- *(tp = tmp) = 0;
- while (src < end)
- {
- ch = *src++;
- if (ch >= '0' && ch <= '9')
- {
- unsigned int new = *tp * 10 + (ch - '0');
-
- if (saw_digit && *tp == 0)
- return 0;
- if (new > 255)
- return 0;
- *tp = new;
- if (! saw_digit)
- {
- if (++octets > 4)
- return 0;
- saw_digit = 1;
- }
- }
- else if (ch == '.' && saw_digit)
- {
- if (octets == 4)
- return 0;
- *++tp = 0;
- saw_digit = 0;
- }
- else
- return 0;
- }
- if (octets < 4)
- return 0;
- memcpy (dst, tmp, NS_INADDRSZ);
- return 1;
-}
-
-/* Return the value of CH as a hexadecimal digit, or -1 if it is a
- different type of character. */
-static int
-hex_digit_value (char ch)
-{
- if ('0' <= ch && ch <= '9')
- return ch - '0';
- if ('a' <= ch && ch <= 'f')
- return ch - 'a' + 10;
- if ('A' <= ch && ch <= 'F')
- return ch - 'A' + 10;
- return -1;
-}
-
-/* Convert presentation-level IPv6 address to network order binary
- form. Return 1 if SRC is a valid [RFC1884 2.2] address, else 0.
- This function does not touch DST unless it's returning 1.
- Author: Paul Vixie, 1996. Inspired by Mark Andrews. */
-static int
-inet_pton6 (const char *src, const char *src_endp, unsigned char *dst)
-{
- unsigned char tmp[NS_IN6ADDRSZ], *tp, *endp, *colonp;
- const char *curtok;
- int ch;
- size_t xdigits_seen; /* Number of hex digits since colon. */
- unsigned int val;
-
- tp = memset (tmp, '\0', NS_IN6ADDRSZ);
- endp = tp + NS_IN6ADDRSZ;
- colonp = NULL;
-
- /* Leading :: requires some special handling. */
- if (src == src_endp)
- return 0;
- if (*src == ':')
- {
- ++src;
- if (src == src_endp || *src != ':')
- return 0;
- }
-
- curtok = src;
- xdigits_seen = 0;
- val = 0;
- while (src < src_endp)
- {
- ch = *src++;
- int digit = hex_digit_value (ch);
- if (digit >= 0)
- {
- if (xdigits_seen == 4)
- return 0;
- val <<= 4;
- val |= digit;
- if (val > 0xffff)
- return 0;
- ++xdigits_seen;
- continue;
- }
- if (ch == ':')
- {
- curtok = src;
- if (xdigits_seen == 0)
- {
- if (colonp)
- return 0;
- colonp = tp;
- continue;
- }
- else if (src == src_endp)
- return 0;
- if (tp + NS_INT16SZ > endp)
- return 0;
- *tp++ = (unsigned char) (val >> 8) & 0xff;
- *tp++ = (unsigned char) val & 0xff;
- xdigits_seen = 0;
- val = 0;
- continue;
- }
- if (ch == '.' && ((tp + NS_INADDRSZ) <= endp)
- && inet_pton4 (curtok, src_endp, tp) > 0)
- {
- tp += NS_INADDRSZ;
- xdigits_seen = 0;
- break; /* '\0' was seen by inet_pton4. */
- }
- return 0;
- }
- if (xdigits_seen > 0)
- {
- if (tp + NS_INT16SZ > endp)
- return 0;
- *tp++ = (unsigned char) (val >> 8) & 0xff;
- *tp++ = (unsigned char) val & 0xff;
- }
- if (colonp != NULL)
- {
- /* Replace :: with zeros. */
- if (tp == endp)
- /* :: would expand to a zero-width field. */
- return 0;
- size_t n = tp - colonp;
- memmove (endp - n, colonp, n);
- memset (colonp, 0, endp - n - colonp);
- tp = endp;
- }
- if (tp != endp)
- return 0;
- memcpy (dst, tmp, NS_IN6ADDRSZ);
- return 1;
-}
diff --git a/resolv/inet_pton_length.c b/resolv/inet_pton_length.c
new file mode 100644
index 0000000000000000..c3614074a47140c1
--- /dev/null
+++ b/resolv/inet_pton_length.c
@@ -0,0 +1,223 @@
+/* Copyright (C) 1996-2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/*
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
+ * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
+ * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+#include <ctype.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <resolv/resolv-internal.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+static int inet_pton4 (const char *src, const char *src_end, u_char *dst);
+static int inet_pton6 (const char *src, const char *src_end, u_char *dst);
+
+int
+__inet_pton_length (int af, const char *src, size_t srclen, void *dst)
+{
+ switch (af)
+ {
+ case AF_INET:
+ return inet_pton4 (src, src + srclen, dst);
+ case AF_INET6:
+ return inet_pton6 (src, src + srclen, dst);
+ default:
+ __set_errno (EAFNOSUPPORT);
+ return -1;
+ }
+}
+libc_hidden_def (__inet_pton_length)
+
+/* Like inet_aton but without all the hexadecimal, octal and shorthand
+ (and trailing garbage is not ignored). Return 1 if SRC is a valid
+ dotted quad, else 0. This function does not touch DST unless it's
+ returning 1.
+ Author: Paul Vixie, 1996. */
+static int
+inet_pton4 (const char *src, const char *end, unsigned char *dst)
+{
+ int saw_digit, octets, ch;
+ unsigned char tmp[NS_INADDRSZ], *tp;
+
+ saw_digit = 0;
+ octets = 0;
+ *(tp = tmp) = 0;
+ while (src < end)
+ {
+ ch = *src++;
+ if (ch >= '0' && ch <= '9')
+ {
+ unsigned int new = *tp * 10 + (ch - '0');
+
+ if (saw_digit && *tp == 0)
+ return 0;
+ if (new > 255)
+ return 0;
+ *tp = new;
+ if (! saw_digit)
+ {
+ if (++octets > 4)
+ return 0;
+ saw_digit = 1;
+ }
+ }
+ else if (ch == '.' && saw_digit)
+ {
+ if (octets == 4)
+ return 0;
+ *++tp = 0;
+ saw_digit = 0;
+ }
+ else
+ return 0;
+ }
+ if (octets < 4)
+ return 0;
+ memcpy (dst, tmp, NS_INADDRSZ);
+ return 1;
+}
+
+/* Return the value of CH as a hexadecimal digit, or -1 if it is a
+ different type of character. */
+static int
+hex_digit_value (char ch)
+{
+ if ('0' <= ch && ch <= '9')
+ return ch - '0';
+ if ('a' <= ch && ch <= 'f')
+ return ch - 'a' + 10;
+ if ('A' <= ch && ch <= 'F')
+ return ch - 'A' + 10;
+ return -1;
+}
+
+/* Convert presentation-level IPv6 address to network order binary
+ form. Return 1 if SRC is a valid [RFC1884 2.2] address, else 0.
+ This function does not touch DST unless it's returning 1.
+ Author: Paul Vixie, 1996. Inspired by Mark Andrews. */
+static int
+inet_pton6 (const char *src, const char *src_endp, unsigned char *dst)
+{
+ unsigned char tmp[NS_IN6ADDRSZ], *tp, *endp, *colonp;
+ const char *curtok;
+ int ch;
+ size_t xdigits_seen; /* Number of hex digits since colon. */
+ unsigned int val;
+
+ tp = memset (tmp, '\0', NS_IN6ADDRSZ);
+ endp = tp + NS_IN6ADDRSZ;
+ colonp = NULL;
+
+ /* Leading :: requires some special handling. */
+ if (src == src_endp)
+ return 0;
+ if (*src == ':')
+ {
+ ++src;
+ if (src == src_endp || *src != ':')
+ return 0;
+ }
+
+ curtok = src;
+ xdigits_seen = 0;
+ val = 0;
+ while (src < src_endp)
+ {
+ ch = *src++;
+ int digit = hex_digit_value (ch);
+ if (digit >= 0)
+ {
+ if (xdigits_seen == 4)
+ return 0;
+ val <<= 4;
+ val |= digit;
+ if (val > 0xffff)
+ return 0;
+ ++xdigits_seen;
+ continue;
+ }
+ if (ch == ':')
+ {
+ curtok = src;
+ if (xdigits_seen == 0)
+ {
+ if (colonp)
+ return 0;
+ colonp = tp;
+ continue;
+ }
+ else if (src == src_endp)
+ return 0;
+ if (tp + NS_INT16SZ > endp)
+ return 0;
+ *tp++ = (unsigned char) (val >> 8) & 0xff;
+ *tp++ = (unsigned char) val & 0xff;
+ xdigits_seen = 0;
+ val = 0;
+ continue;
+ }
+ if (ch == '.' && ((tp + NS_INADDRSZ) <= endp)
+ && inet_pton4 (curtok, src_endp, tp) > 0)
+ {
+ tp += NS_INADDRSZ;
+ xdigits_seen = 0;
+ break; /* '\0' was seen by inet_pton4. */
+ }
+ return 0;
+ }
+ if (xdigits_seen > 0)
+ {
+ if (tp + NS_INT16SZ > endp)
+ return 0;
+ *tp++ = (unsigned char) (val >> 8) & 0xff;
+ *tp++ = (unsigned char) val & 0xff;
+ }
+ if (colonp != NULL)
+ {
+ /* Replace :: with zeros. */
+ if (tp == endp)
+ /* :: would expand to a zero-width field. */
+ return 0;
+ size_t n = tp - colonp;
+ memmove (endp - n, colonp, n);
+ memset (colonp, 0, endp - n - colonp);
+ tp = endp;
+ }
+ if (tp != endp)
+ return 0;
+ memcpy (dst, tmp, NS_IN6ADDRSZ);
+ return 1;
+}

216
glibc-RHEL-111115-5.patch Normal file
View File

@ -0,0 +1,216 @@
Partial backport (without ABI changes, using libc_nonshared.a instead)
of:
commit e3a6e85d67f1a48dec3e2557a83d6ce1544a58cb
Author: Aaron Merey <amerey@redhat.com>
Date: Thu Mar 20 13:13:33 2025 -0400
Add _FORTIFY_SOURCE support for inet_pton
Add function __inet_pton_chk which calls __chk_fail when the size of
argument dst is too small. inet_pton is redirected to __inet_pton_chk
or __inet_pton_warn when _FORTIFY_SOURCE is > 0.
Also add tests to debug/tst-fortify.c, update the abilist (Dropped) with
__inet_pton_chk and mention inet_pton fortification in maint.texi.
Co-authored-by: Frédéric Bérat <fberat@redhat.com>
Reviewed-by: Florian Weimer <fweimer@redhat.com>
Conflicts:
debug/inet_pton_chk.c
(attribute_hidden added, use inet_pton instead of __inet_pton)
debug/Makefile
(routine added to static-routines-only)
debug/Versions
(Dropped)
inet/bits/inet-fortified.h
(removed attribute_overloadable and clang specific handling)
sysdeps/mach/hurd/i386/libc.abilist
sysdeps/mach/hurd/x86_64/libc.abilist
sysdeps/unix/sysv/linux/aarch64/libc.abilist
sysdeps/unix/sysv/linux/alpha/libc.abilist
sysdeps/unix/sysv/linux/arc/libc.abilist
sysdeps/unix/sysv/linux/arm/be/libc.abilist
sysdeps/unix/sysv/linux/arm/le/libc.abilist
sysdeps/unix/sysv/linux/csky/libc.abilist
sysdeps/unix/sysv/linux/hppa/libc.abilist
sysdeps/unix/sysv/linux/i386/libc.abilist
sysdeps/unix/sysv/linux/loongarch/lp64/libc.abilist
sysdeps/unix/sysv/linux/m68k/coldfire/libc.abilist
sysdeps/unix/sysv/linux/m68k/m680x0/libc.abilist
sysdeps/unix/sysv/linux/microblaze/be/libc.abilist
sysdeps/unix/sysv/linux/microblaze/le/libc.abilist
sysdeps/unix/sysv/linux/mips/mips32/fpu/libc.abilist
sysdeps/unix/sysv/linux/mips/mips32/nofpu/libc.abilist
sysdeps/unix/sysv/linux/mips/mips64/n32/libc.abilist
sysdeps/unix/sysv/linux/mips/mips64/n64/libc.abilist
sysdeps/unix/sysv/linux/or1k/libc.abilist
sysdeps/unix/sysv/linux/powerpc/powerpc32/fpu/libc.abilist
sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/libc.abilist
sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libc.abilist
sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libc.abilist
sysdeps/unix/sysv/linux/riscv/rv32/libc.abilist
sysdeps/unix/sysv/linux/riscv/rv64/libc.abilist
sysdeps/unix/sysv/linux/s390/s390-32/libc.abilist
sysdeps/unix/sysv/linux/s390/s390-64/libc.abilist
sysdeps/unix/sysv/linux/sh/be/libc.abilist
sysdeps/unix/sysv/linux/sh/le/libc.abilist
sysdeps/unix/sysv/linux/sparc/sparc32/libc.abilist
sysdeps/unix/sysv/linux/sparc/sparc64/libc.abilist
sysdeps/unix/sysv/linux/x86_64/64/libc.abilist
sysdeps/unix/sysv/linux/x86_64/x32/libc.abilist
(Dropped)
diff --git a/debug/Makefile b/debug/Makefile
index a2d236e28eda8b47..5a7295678cb1eed1 100644
--- a/debug/Makefile
+++ b/debug/Makefile
@@ -122,6 +122,7 @@ routines = \
static-only-routines := \
inet_ntop_chk \
+ inet_pton_chk \
stack_chk_fail_local \
# static-only-routines
# Don't add stack_chk_fail_local.o to libc.a since __stack_chk_fail_local
diff --git a/debug/inet_pton_chk.c b/debug/inet_pton_chk.c
new file mode 100644
index 0000000000000000..feca3bff5a4ded2f
--- /dev/null
+++ b/debug/inet_pton_chk.c
@@ -0,0 +1,31 @@
+/* Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <arpa/inet.h>
+#include <stdio.h>
+
+attribute_hidden
+int
+__inet_pton_chk (int af, const char *src, void *dst, size_t dst_size)
+{
+ if ((af == AF_INET && dst_size < 4)
+ || (af == AF_INET6 && dst_size < 16))
+ __chk_fail ();
+
+ return inet_pton (af, src, dst);
+}
+libc_hidden_def (__inet_pton_chk)
diff --git a/debug/tst-fortify.c b/debug/tst-fortify.c
index f9b97531749d363b..0e44594a181fdba5 100644
--- a/debug/tst-fortify.c
+++ b/debug/tst-fortify.c
@@ -1853,6 +1853,30 @@ do_test (void)
CHK_FAIL_END
#endif
+ const char *ipv4str = "127.0.0.1";
+ const char *ipv6str = "::1";
+
+ if (inet_pton (AF_INET, ipv4str, (void *) &addr) != 1)
+ FAIL ();
+ if (inet_pton (AF_INET6, ipv6str, (void *) &addr6) != 1)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ char smallbuf[2];
+
+ CHK_FAIL_START
+ inet_pton (AF_INET, ipv4str, (void *) smallbuf);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ inet_pton (AF_INET6, ipv6str, (void *) smallbuf);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ inet_pton (AF_INET6, ipv6str, (void *) &addr);
+ CHK_FAIL_END
+#endif
+
return ret;
}
diff --git a/include/arpa/inet.h b/include/arpa/inet.h
index a02892f48a27454e..3db8f1a96fdbd6fd 100644
--- a/include/arpa/inet.h
+++ b/include/arpa/inet.h
@@ -19,6 +19,8 @@ libc_hidden_proto (__inet_ntop_chk)
libc_hidden_proto (inet_pton)
extern __typeof (inet_pton) __inet_pton;
libc_hidden_proto (__inet_pton)
+libc_hidden_proto (__inet_pton_chk)
+
extern __typeof (inet_makeaddr) __inet_makeaddr;
libc_hidden_proto (__inet_makeaddr)
libc_hidden_proto (inet_netof)
diff --git a/inet/bits/inet-fortified-decl.h b/inet/bits/inet-fortified-decl.h
index 23e3cf4b2238c81a..748a119f149d790b 100644
--- a/inet/bits/inet-fortified-decl.h
+++ b/inet/bits/inet-fortified-decl.h
@@ -32,4 +32,11 @@ extern const char *__REDIRECT_NTH (__inet_ntop_chk_warn,
__warnattr ("inet_ntop called with bigger length than "
"size of destination buffer");
+extern int __inet_pton_chk (int, const char *, void *, size_t);
+
+extern int __REDIRECT_FORTIFY_NTH (__inet_pton_alias,
+ (int, const char *, void *), inet_pton);
+extern int __REDIRECT_NTH (__inet_pton_chk_warn,
+ (int, const char *, void *, size_t), __inet_pton_chk)
+ __warnattr ("inet_pton called with a destination buffer size too small");
#endif /* bits/inet-fortified-decl.h. */
diff --git a/inet/bits/inet-fortified.h b/inet/bits/inet-fortified.h
index af26f36ef6ae0533..8420a4b7fb41086f 100644
--- a/inet/bits/inet-fortified.h
+++ b/inet/bits/inet-fortified.h
@@ -34,4 +34,21 @@ __NTH (inet_ntop (int __af, const void * __restrict __src,
__af, __src, __dst, __dst_size);
};
+__fortify_function int
+__NTH (inet_pton (int __af, const char *__restrict __src,
+ void * __restrict __dst))
+{
+ size_t sz = 0;
+ if (__af == AF_INET)
+ sz = sizeof (struct in_addr);
+ else if (__af == AF_INET6)
+ sz = sizeof (struct in6_addr);
+ else
+ return __inet_pton_alias (__af, __src, __dst);
+
+ return __glibc_fortify (inet_pton, sz, sizeof (char),
+ __glibc_objsize (__dst),
+ __af, __src, __dst);
+};
+
#endif /* bits/inet-fortified.h. */
diff --git a/manual/maint.texi b/manual/maint.texi
index ce6a556c68925b49..b6ee5b6e3bdf768c 100644
--- a/manual/maint.texi
+++ b/manual/maint.texi
@@ -305,6 +305,8 @@ The following functions and macros are fortified in @theglibc{}:
@item @code{inet_ntop}
+@item @code{inet_pton}
+
@item @code{longjmp}
@item @code{mbsnrtowcs}

34
glibc-RHEL-111115-6.patch Normal file
View File

@ -0,0 +1,34 @@
commit 87afbd7a1ad9c1dd116921817fa97198171045db
Author: Sam James <sam@gentoo.org>
Date: Mon Jul 28 21:55:30 2025 +0100
inet-fortified: fix namespace violation (bug 33227)
We need to use __sz, not sz, as we do elsewhere.
Reviewed-by: Florian Weimer <fweimer@redhat.com>
diff --git a/inet/bits/inet-fortified.h b/inet/bits/inet-fortified.h
index 8420a4b7fb41086f..5d16b1f871c49e6f 100644
--- a/inet/bits/inet-fortified.h
+++ b/inet/bits/inet-fortified.h
@@ -38,15 +38,15 @@ __fortify_function int
__NTH (inet_pton (int __af, const char *__restrict __src,
void * __restrict __dst))
{
- size_t sz = 0;
+ size_t __sz = 0;
if (__af == AF_INET)
- sz = sizeof (struct in_addr);
+ __sz = sizeof (struct in_addr);
else if (__af == AF_INET6)
- sz = sizeof (struct in6_addr);
+ __sz = sizeof (struct in6_addr);
else
return __inet_pton_alias (__af, __src, __dst);
- return __glibc_fortify (inet_pton, sz, sizeof (char),
+ return __glibc_fortify (inet_pton, __sz, sizeof (char),
__glibc_objsize (__dst),
__af, __src, __dst);
};

230
glibc-RHEL-111117-1.patch Normal file
View File

@ -0,0 +1,230 @@
commit f6ba993e0cda0ca5554fd47b00e6a87be5fdf05e
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu Jul 25 15:41:44 2024 -0300
stdlib: Allow concurrent exit (BZ 31997)
Even if C/POSIX standard states that exit is not formally thread-unsafe,
calling it more than once is UB. The glibc already supports
it for the single-thread, and both elf/nodelete2.c and tst-rseq-disable.c
call exit from a DSO destructor (which is called by _dl_fini, registered
at program startup with __cxa_atexit).
However, there are still race issues when it is called more than once
concurrently by multiple threads. A recent Rust PR triggered this
issue [1], which resulted in an Austin Group ask for clarification [2].
Besides it, there is a discussion to make concurrent calling not UB [3],
wtih a defined semantic where any remaining callers block until the first
call to exit has finished (reentrant calls, leaving through longjmp, and
exceptions are still undefined).
For glibc, at least reentrant calls are required to be supported to avoid
changing the current behaviour. This requires locking using a recursive
lock, where any exit called by atexit() handlers resumes at the point of
the current handler (thus avoiding calling the current handle multiple
times).
Checked on x86_64-linux-gnu and aarch64-linux-gnu.
[1] https://github.com/rust-lang/rust/issues/126600
[2] https://austingroupbugs.net/view.php?id=1845
[3] https://www.openwall.com/lists/libc-coord/2024/07/24/4
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 12f8820fd0668039..e15d154885fadc47 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -273,6 +273,7 @@ tests := \
tst-bsearch \
tst-bz20544 \
tst-canon-bz26341 \
+ tst-concurrent-exit \
tst-cxa_atexit \
tst-environ \
tst-environ-change-1 \
diff --git a/stdlib/exit.c b/stdlib/exit.c
index 5166c78044335398..bbaf1388068a1cea 100644
--- a/stdlib/exit.c
+++ b/stdlib/exit.c
@@ -132,9 +132,17 @@ __run_exit_handlers (int status, struct exit_function_list **listp,
}
+/* The lock handles concurrent exit(), even though the C/POSIX standard states
+ that calling exit() more than once is UB. The recursive lock allows
+ atexit() handlers or destructors to call exit() itself. In this case, the
+ handler list execution will resume at the point of the current handler. */
+__libc_lock_define_initialized_recursive (static, __exit_lock)
+
void
exit (int status)
{
+ /* The exit should never return, so there is no need to unlock it. */
+ __libc_lock_lock_recursive (__exit_lock);
__run_exit_handlers (status, &__exit_funcs, true, true);
}
libc_hidden_def (exit)
diff --git a/stdlib/tst-concurrent-exit.c b/stdlib/tst-concurrent-exit.c
new file mode 100644
index 0000000000000000..1141130f87fde20f
--- /dev/null
+++ b/stdlib/tst-concurrent-exit.c
@@ -0,0 +1,157 @@
+/* Check if exit can be called concurrently by multiple threads.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <stdlib.h>
+#include <support/check.h>
+#include <support/xthread.h>
+#include <stdio.h>
+#include <support/xunistd.h>
+#include <string.h>
+
+#define MAX_atexit 32
+
+static pthread_barrier_t barrier;
+
+static void *
+tf (void *closure)
+{
+ xpthread_barrier_wait (&barrier);
+ exit (0);
+
+ return NULL;
+}
+
+static const char expected[] = "00000000000000000000000003021121130211";
+static char crumbs[sizeof (expected)];
+static int next_slot = 0;
+
+static void
+exit_with_flush (int code)
+{
+ fflush (stdout);
+ /* glibc allows recursive exit, the atexit handlers execution will be
+ resumed from the where the previous exit was interrupted. */
+ exit (code);
+}
+
+/* Take some time, so another thread potentially issue exit. */
+#define SETUP_NANOSLEEP \
+ if (nanosleep (&(struct timespec) { .tv_sec = 0, .tv_nsec = 1000L }, \
+ NULL) != 0) \
+ FAIL_EXIT1 ("nanosleep: %m")
+
+static void
+fn0 (void)
+{
+ crumbs[next_slot++] = '0';
+ SETUP_NANOSLEEP;
+}
+
+static void
+fn1 (void)
+{
+ crumbs[next_slot++] = '1';
+ SETUP_NANOSLEEP;
+}
+
+static void
+fn2 (void)
+{
+ crumbs[next_slot++] = '2';
+ atexit (fn1);
+ SETUP_NANOSLEEP;
+}
+
+static void
+fn3 (void)
+{
+ crumbs[next_slot++] = '3';
+ atexit (fn2);
+ atexit (fn0);
+ SETUP_NANOSLEEP;
+}
+
+static void
+fn_final (void)
+{
+ TEST_COMPARE_STRING (crumbs, expected);
+ exit_with_flush (0);
+}
+
+_Noreturn static void
+child (void)
+{
+ enum { nthreads = 8 };
+
+ xpthread_barrier_init (&barrier, NULL, nthreads + 1);
+
+ pthread_t thr[nthreads];
+ for (int i = 0; i < nthreads; i++)
+ thr[i] = xpthread_create (NULL, tf, NULL);
+
+ xpthread_barrier_wait (&barrier);
+
+ for (int i = 0; i < nthreads; i++)
+ {
+ pthread_join (thr[i], NULL);
+ /* It should not be reached, it means that thread did not exit for
+ some reason. */
+ support_record_failure ();
+ }
+
+ exit (2);
+}
+
+static int
+do_test (void)
+{
+ /* Register a large number of handler that will trigger a heap allocation
+ for the handle state. On exit, each block will be freed after the
+ handle is processed. */
+ int slots_remaining = MAX_atexit;
+
+ /* Register this first so it can verify expected order of the rest. */
+ atexit (fn_final); --slots_remaining;
+
+ TEST_VERIFY_EXIT (atexit (fn1) == 0); --slots_remaining;
+ TEST_VERIFY_EXIT (atexit (fn3) == 0); --slots_remaining;
+ TEST_VERIFY_EXIT (atexit (fn1) == 0); --slots_remaining;
+ TEST_VERIFY_EXIT (atexit (fn2) == 0); --slots_remaining;
+ TEST_VERIFY_EXIT (atexit (fn1) == 0); --slots_remaining;
+ TEST_VERIFY_EXIT (atexit (fn3) == 0); --slots_remaining;
+
+ while (slots_remaining > 0)
+ {
+ TEST_VERIFY_EXIT (atexit (fn0) == 0); --slots_remaining;
+ }
+
+ pid_t pid = xfork ();
+ if (pid != 0)
+ {
+ int status;
+ xwaitpid (pid, &status, 0);
+ TEST_VERIFY (WIFEXITED (status));
+ }
+ else
+ child ();
+
+ return 0;
+}
+
+#include <support/test-driver.c>

427
glibc-RHEL-111117-2.patch Normal file
View File

@ -0,0 +1,427 @@
commit c6af8a9a3ce137a9704825d173be22a2b2d9cb49
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Mon Aug 5 11:27:35 2024 -0300
stdlib: Allow concurrent quick_exit (BZ 31997)
As for exit, also allows concurrent quick_exit to avoid race
conditions when it is called concurrently. Since it uses the same
internal function as exit, the __exit_lock lock is moved to
__run_exit_handlers. It also solved a potential concurrent when
calling exit and quick_exit concurrently.
The test case 'expected' is expanded to a value larger than the
minimum required by C/POSIX (32 entries) so at_quick_exit() will
require libc to allocate a new block. This makes the test mre likely to
trigger concurrent issues (through free() at __run_exit_handlers)
if quick_exit() interacts with the at_quick_exit list concurrently.
This is also the latest interpretation of the Austin Ticket [1].
Checked on x86_64-linux-gnu.
[1] https://austingroupbugs.net/view.php?id=1845
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/stdlib/Makefile b/stdlib/Makefile
index e15d154885fadc47..fe663f3bb8ee1e00 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -274,6 +274,7 @@ tests := \
tst-bz20544 \
tst-canon-bz26341 \
tst-concurrent-exit \
+ tst-concurrent-quick_exit \
tst-cxa_atexit \
tst-environ \
tst-environ-change-1 \
diff --git a/stdlib/exit.c b/stdlib/exit.c
index bbaf1388068a1cea..8d7e2e53d0ee93ae 100644
--- a/stdlib/exit.c
+++ b/stdlib/exit.c
@@ -28,6 +28,13 @@
__exit_funcs_lock is declared. */
bool __exit_funcs_done = false;
+/* The lock handles concurrent exit() and quick_exit(), even though the
+ C/POSIX standard states that calling exit() more than once is UB. The
+ recursive lock allows atexit() handlers or destructors to call exit()
+ itself. In this case, the handler list execution will resume at the
+ point of the current handler. */
+__libc_lock_define_initialized_recursive (static, __exit_lock)
+
/* Call all functions registered with `atexit' and `on_exit',
in the reverse of the order in which they were registered
perform stdio cleanup, and terminate program execution with STATUS. */
@@ -36,6 +43,9 @@ attribute_hidden
__run_exit_handlers (int status, struct exit_function_list **listp,
bool run_list_atexit, bool run_dtors)
{
+ /* The exit should never return, so there is no need to unlock it. */
+ __libc_lock_lock_recursive (__exit_lock);
+
/* First, call the TLS destructors. */
if (run_dtors)
call_function_static_weak (__call_tls_dtors);
@@ -132,17 +142,9 @@ __run_exit_handlers (int status, struct exit_function_list **listp,
}
-/* The lock handles concurrent exit(), even though the C/POSIX standard states
- that calling exit() more than once is UB. The recursive lock allows
- atexit() handlers or destructors to call exit() itself. In this case, the
- handler list execution will resume at the point of the current handler. */
-__libc_lock_define_initialized_recursive (static, __exit_lock)
-
void
exit (int status)
{
- /* The exit should never return, so there is no need to unlock it. */
- __libc_lock_lock_recursive (__exit_lock);
__run_exit_handlers (status, &__exit_funcs, true, true);
}
libc_hidden_def (exit)
diff --git a/stdlib/tst-concurrent-exit-skeleton.c b/stdlib/tst-concurrent-exit-skeleton.c
new file mode 100644
index 0000000000000000..cfd5140466e1a730
--- /dev/null
+++ b/stdlib/tst-concurrent-exit-skeleton.c
@@ -0,0 +1,160 @@
+/* Check if exit/quick_exit can be called concurrently by multiple threads.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <stdlib.h>
+#include <support/check.h>
+#include <support/xthread.h>
+#include <stdio.h>
+#include <support/xunistd.h>
+#include <string.h>
+
+/* A value larger than the minimum required by C/POSIX (32), to trigger a
+ new block memory allocation. */
+#define MAX_atexit 64
+
+static pthread_barrier_t barrier;
+
+static void *
+tf (void *closure)
+{
+ xpthread_barrier_wait (&barrier);
+ EXIT (0);
+
+ return NULL;
+}
+
+static const char expected[] = "00000000000000000000000000000000000"
+ "00000000000000000000003021121130211";
+static char crumbs[sizeof (expected)];
+static int next_slot = 0;
+
+static void
+exit_with_flush (int code)
+{
+ fflush (stdout);
+ /* glibc allows recursive EXIT, the ATEXIT handlers execution will be
+ resumed from the where the previous EXIT was interrupted. */
+ EXIT (code);
+}
+
+/* Take some time, so another thread potentially issue EXIT. */
+#define SETUP_NANOSLEEP \
+ if (nanosleep (&(struct timespec) { .tv_sec = 0, .tv_nsec = 1000L }, \
+ NULL) != 0) \
+ FAIL_EXIT1 ("nanosleep: %m")
+
+static void
+fn0 (void)
+{
+ crumbs[next_slot++] = '0';
+ SETUP_NANOSLEEP;
+}
+
+static void
+fn1 (void)
+{
+ crumbs[next_slot++] = '1';
+ SETUP_NANOSLEEP;
+}
+
+static void
+fn2 (void)
+{
+ crumbs[next_slot++] = '2';
+ ATEXIT (fn1);
+ SETUP_NANOSLEEP;
+}
+
+static void
+fn3 (void)
+{
+ crumbs[next_slot++] = '3';
+ ATEXIT (fn2);
+ ATEXIT (fn0);
+ SETUP_NANOSLEEP;
+}
+
+static void
+fn_final (void)
+{
+ TEST_COMPARE_STRING (crumbs, expected);
+ exit_with_flush (0);
+}
+
+_Noreturn static void
+child (void)
+{
+ enum { nthreads = 8 };
+
+ xpthread_barrier_init (&barrier, NULL, nthreads + 1);
+
+ pthread_t thr[nthreads];
+ for (int i = 0; i < nthreads; i++)
+ thr[i] = xpthread_create (NULL, tf, NULL);
+
+ xpthread_barrier_wait (&barrier);
+
+ for (int i = 0; i < nthreads; i++)
+ {
+ pthread_join (thr[i], NULL);
+ /* It should not be reached, it means that thread did not exit for
+ some reason. */
+ support_record_failure ();
+ }
+
+ EXIT (2);
+}
+
+static int
+do_test (void)
+{
+ /* Register a large number of handler that will trigger a heap allocation
+ for the handle state. On EXIT, each block will be freed after the
+ handle is processed. */
+ int slots_remaining = MAX_atexit;
+
+ /* Register this first so it can verify expected order of the rest. */
+ ATEXIT (fn_final); --slots_remaining;
+
+ TEST_VERIFY_EXIT (ATEXIT (fn1) == 0); --slots_remaining;
+ TEST_VERIFY_EXIT (ATEXIT (fn3) == 0); --slots_remaining;
+ TEST_VERIFY_EXIT (ATEXIT (fn1) == 0); --slots_remaining;
+ TEST_VERIFY_EXIT (ATEXIT (fn2) == 0); --slots_remaining;
+ TEST_VERIFY_EXIT (ATEXIT (fn1) == 0); --slots_remaining;
+ TEST_VERIFY_EXIT (ATEXIT (fn3) == 0); --slots_remaining;
+
+ while (slots_remaining > 0)
+ {
+ TEST_VERIFY_EXIT (ATEXIT (fn0) == 0); --slots_remaining;
+ }
+
+ pid_t pid = xfork ();
+ if (pid != 0)
+ {
+ int status;
+ xwaitpid (pid, &status, 0);
+ TEST_VERIFY (WIFEXITED (status));
+ }
+ else
+ child ();
+
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/stdlib/tst-concurrent-exit.c b/stdlib/tst-concurrent-exit.c
index 1141130f87fde20f..421c39d63126246d 100644
--- a/stdlib/tst-concurrent-exit.c
+++ b/stdlib/tst-concurrent-exit.c
@@ -16,142 +16,7 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <array_length.h>
-#include <stdlib.h>
-#include <support/check.h>
-#include <support/xthread.h>
-#include <stdio.h>
-#include <support/xunistd.h>
-#include <string.h>
+#define EXIT(__r) exit (__r)
+#define ATEXIT(__f) atexit (__f)
-#define MAX_atexit 32
-
-static pthread_barrier_t barrier;
-
-static void *
-tf (void *closure)
-{
- xpthread_barrier_wait (&barrier);
- exit (0);
-
- return NULL;
-}
-
-static const char expected[] = "00000000000000000000000003021121130211";
-static char crumbs[sizeof (expected)];
-static int next_slot = 0;
-
-static void
-exit_with_flush (int code)
-{
- fflush (stdout);
- /* glibc allows recursive exit, the atexit handlers execution will be
- resumed from the where the previous exit was interrupted. */
- exit (code);
-}
-
-/* Take some time, so another thread potentially issue exit. */
-#define SETUP_NANOSLEEP \
- if (nanosleep (&(struct timespec) { .tv_sec = 0, .tv_nsec = 1000L }, \
- NULL) != 0) \
- FAIL_EXIT1 ("nanosleep: %m")
-
-static void
-fn0 (void)
-{
- crumbs[next_slot++] = '0';
- SETUP_NANOSLEEP;
-}
-
-static void
-fn1 (void)
-{
- crumbs[next_slot++] = '1';
- SETUP_NANOSLEEP;
-}
-
-static void
-fn2 (void)
-{
- crumbs[next_slot++] = '2';
- atexit (fn1);
- SETUP_NANOSLEEP;
-}
-
-static void
-fn3 (void)
-{
- crumbs[next_slot++] = '3';
- atexit (fn2);
- atexit (fn0);
- SETUP_NANOSLEEP;
-}
-
-static void
-fn_final (void)
-{
- TEST_COMPARE_STRING (crumbs, expected);
- exit_with_flush (0);
-}
-
-_Noreturn static void
-child (void)
-{
- enum { nthreads = 8 };
-
- xpthread_barrier_init (&barrier, NULL, nthreads + 1);
-
- pthread_t thr[nthreads];
- for (int i = 0; i < nthreads; i++)
- thr[i] = xpthread_create (NULL, tf, NULL);
-
- xpthread_barrier_wait (&barrier);
-
- for (int i = 0; i < nthreads; i++)
- {
- pthread_join (thr[i], NULL);
- /* It should not be reached, it means that thread did not exit for
- some reason. */
- support_record_failure ();
- }
-
- exit (2);
-}
-
-static int
-do_test (void)
-{
- /* Register a large number of handler that will trigger a heap allocation
- for the handle state. On exit, each block will be freed after the
- handle is processed. */
- int slots_remaining = MAX_atexit;
-
- /* Register this first so it can verify expected order of the rest. */
- atexit (fn_final); --slots_remaining;
-
- TEST_VERIFY_EXIT (atexit (fn1) == 0); --slots_remaining;
- TEST_VERIFY_EXIT (atexit (fn3) == 0); --slots_remaining;
- TEST_VERIFY_EXIT (atexit (fn1) == 0); --slots_remaining;
- TEST_VERIFY_EXIT (atexit (fn2) == 0); --slots_remaining;
- TEST_VERIFY_EXIT (atexit (fn1) == 0); --slots_remaining;
- TEST_VERIFY_EXIT (atexit (fn3) == 0); --slots_remaining;
-
- while (slots_remaining > 0)
- {
- TEST_VERIFY_EXIT (atexit (fn0) == 0); --slots_remaining;
- }
-
- pid_t pid = xfork ();
- if (pid != 0)
- {
- int status;
- xwaitpid (pid, &status, 0);
- TEST_VERIFY (WIFEXITED (status));
- }
- else
- child ();
-
- return 0;
-}
-
-#include <support/test-driver.c>
+#include "tst-concurrent-exit-skeleton.c"
diff --git a/stdlib/tst-concurrent-quick_exit.c b/stdlib/tst-concurrent-quick_exit.c
new file mode 100644
index 0000000000000000..3f321668d6b8d536
--- /dev/null
+++ b/stdlib/tst-concurrent-quick_exit.c
@@ -0,0 +1,22 @@
+/* Check if quick_exit can be called concurrently by multiple threads.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define EXIT(__r) quick_exit (__r)
+#define ATEXIT(__f) at_quick_exit (__f)
+
+#include "tst-concurrent-exit-skeleton.c"

75
glibc-RHEL-111120-1.patch Normal file
View File

@ -0,0 +1,75 @@
commit a4a12af5abe22d63fbebf0a219d8d13eff6db20c
Author: Carlos O'Donell <carlos@redhat.com>
Date: Thu Jun 8 07:30:33 2023 -0400
dirent: Reformat Makefile.
Reflow and sort Makefile.
Code generation changes present due to link order changes.
No regressions on x86_64 and i686.
diff --git a/dirent/Makefile b/dirent/Makefile
index 92587cab9a85203f..556f759f653349bd 100644
--- a/dirent/Makefile
+++ b/dirent/Makefile
@@ -22,16 +22,48 @@ subdir := dirent
include ../Makeconfig
-headers := dirent.h bits/dirent.h bits/dirent_ext.h
-routines := opendir closedir readdir readdir_r rewinddir \
- seekdir telldir scandir alphasort versionsort \
- getdents getdents64 dirfd readdir64 readdir64_r scandir64 \
- alphasort64 versionsort64 fdopendir \
- scandirat scandirat64 \
- scandir-cancel scandir-tail scandir64-tail
-
-tests := list tst-seekdir opendir-tst1 bug-readdir1 tst-fdopendir \
- tst-fdopendir2 tst-scandir tst-scandir64
+headers := \
+ bits/dirent.h \
+ bits/dirent_ext.h \
+ dirent.h \
+ # headers
+routines := \
+ alphasort \
+ alphasort64 \
+ closedir \
+ dirfd \
+ fdopendir \
+ getdents \
+ getdents64 \
+ opendir \
+ readdir \
+ readdir64 \
+ readdir64_r \
+ readdir_r \
+ rewinddir \
+ scandir \
+ scandir-cancel \
+ scandir-tail \
+ scandir64 \
+ scandir64-tail \
+ scandirat \
+ scandirat64 \
+ seekdir \
+ telldir \
+ versionsort \
+ versionsort64 \
+ # routines
+
+tests := \
+ bug-readdir1 \
+ list \
+ opendir-tst1 \
+ tst-fdopendir \
+ tst-fdopendir2 \
+ tst-scandir \
+ tst-scandir64 \
+ tst-seekdir \
+ # tests
CFLAGS-scandir.c += $(uses-callbacks)
CFLAGS-scandir64.c += $(uses-callbacks)

48
glibc-RHEL-111120-2.patch Normal file
View File

@ -0,0 +1,48 @@
commit 61f2c2e1d1287a791c22d86c943b44bcf66bb8ad
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Aug 30 21:52:23 2024 +0200
Linux: readdir_r needs to report getdents failures (bug 32124)
Upon error, return the errno value set by the __getdents call
in __readdir_unlocked. Previously, kernel-reported errors
were ignored.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/sysdeps/unix/sysv/linux/readdir_r.c b/sysdeps/unix/sysv/linux/readdir_r.c
index ffd5262cf5a6f885..1d595688f78ccd77 100644
--- a/sysdeps/unix/sysv/linux/readdir_r.c
+++ b/sysdeps/unix/sysv/linux/readdir_r.c
@@ -25,14 +25,22 @@ __readdir_r (DIR *dirp, struct dirent *entry, struct dirent **result)
{
struct dirent *dp;
size_t reclen;
+ int saved_errno = errno;
__libc_lock_lock (dirp->lock);
while (1)
{
+ /* If errno is changed from 0, the NULL return value indicates
+ an actual error. It overrides a pending ENAMETOOLONG error. */
+ __set_errno (0);
dp = __readdir_unlocked (dirp);
if (dp == NULL)
- break;
+ {
+ if (errno != 0)
+ dirp->errcode = errno;
+ break;
+ }
reclen = dp->d_reclen;
if (reclen <= offsetof (struct dirent, d_name) + NAME_MAX + 1)
@@ -61,6 +69,7 @@ __readdir_r (DIR *dirp, struct dirent *entry, struct dirent **result)
__libc_lock_unlock (dirp->lock);
+ __set_errno (saved_errno);
return dp != NULL ? 0 : dirp->errcode;
}

526
glibc-RHEL-111120-3.patch Normal file
View File

@ -0,0 +1,526 @@
commit 1251e9ea49fba9f53bbf4f290f3db90c01931fa7
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Sep 12 09:40:25 2024 +0200
support: Add <support/readdir.h>
It allows to read directories using the six readdir variants
without writing type-specific code or using skeleton files
that are compiled four times.
The readdir_r subtest for support_readdir_expect_error revealed
bug 32124.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/support/Makefile b/support/Makefile
index 480d3a91b8b9b625..6809c234e9314163 100644
--- a/support/Makefile
+++ b/support/Makefile
@@ -74,6 +74,7 @@ libsupport-routines = \
support_quote_blob \
support_quote_blob_wide \
support_quote_string \
+ support_readdir \
support_readdir_check \
support_readdir_r_check \
support_record_failure \
@@ -329,6 +330,7 @@ tests = \
tst-support_quote_blob \
tst-support_quote_blob_wide \
tst-support_quote_string \
+ tst-support_readdir \
tst-support_record_failure \
tst-test_compare \
tst-test_compare_blob \
diff --git a/support/readdir.h b/support/readdir.h
new file mode 100644
index 0000000000000000..7d7c7650d42efb70
--- /dev/null
+++ b/support/readdir.h
@@ -0,0 +1,85 @@
+/* Type-generic wrapper for readdir functions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef SUPPORT_READDIR_H
+#define SUPPORT_READDIR_H
+
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+__BEGIN_DECLS
+
+/* Definition independent of _FILE_OFFSET_BITS. */
+struct support_dirent
+{
+ uint64_t d_ino;
+ uint64_t d_off; /* 0 if d_off is not supported. */
+ uint32_t d_type;
+ char *d_name;
+};
+
+/* Operation to be performed by support_readdir below. */
+enum support_readdir_op
+ {
+ SUPPORT_READDIR,
+ SUPPORT_READDIR64,
+ SUPPORT_READDIR_R,
+ SUPPORT_READDIR64_R,
+ SUPPORT_READDIR64_COMPAT,
+ SUPPORT_READDIR64_R_COMPAT,
+ };
+
+/* Returns the last supported function. May exclude
+ SUPPORT_READDIR64_R_COMPAT if not implemented. */
+enum support_readdir_op support_readdir_op_last (void);
+
+/* Returns the name of the function that corresponds to the OP constant. */
+const char *support_readdir_function (enum support_readdir_op op);
+
+/* Returns the d_ino field width for OP, in bits. */
+unsigned int support_readdir_inode_width (enum support_readdir_op op);
+
+/* Returns the d_off field width for OP, in bits. Zero if not present. */
+unsigned int support_readdir_offset_width (enum support_readdir_op op);
+
+/* Returns true if OP is an _r variant with name length restrictions. */
+bool support_readdir_r_variant (enum support_readdir_op op);
+
+/* First, free E->d_name and set the field to NULL. Then call the
+ readdir variant as specified by OP. If successfully, copy fields
+ to E, make a copy of the entry name using strdup, and write its
+ addres sto E->d_name.
+
+ Return true if an entry was read, or false if the end of the
+ directory stream was reached. Terminates the process upon error.
+ The caller is expected to free E->d_name if the function is not
+ called again for this E.
+
+ Note that this function assumes that E->d_name has been initialized
+ to NULL or has been allocated by a previous call to this function. */
+bool support_readdir (DIR *stream, enum support_readdir_op op,
+ struct support_dirent *e) __nonnull ((1, 3));
+
+/* Checks that the readdir operation OP fails with errno value EXPECTED. */
+void support_readdir_expect_error (DIR *stream, enum support_readdir_op op,
+ int expected) __nonnull ((1));
+
+__END_DECLS
+
+#endif /* SUPPORT_READDIR_H */
diff --git a/support/support_readdir.c b/support/support_readdir.c
new file mode 100644
index 0000000000000000..10d808416f7a0456
--- /dev/null
+++ b/support/support_readdir.c
@@ -0,0 +1,318 @@
+/* Type-generic wrapper for readdir functions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/readdir.h>
+
+#include <dlfcn.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <support/check.h>
+#include <support/support.h>
+#include <support/xdirent.h>
+
+/* Copied from <olddirent.h>. */
+struct __old_dirent64
+ {
+ __ino_t d_ino;
+ __off64_t d_off;
+ unsigned short int d_reclen;
+ unsigned char d_type;
+ char d_name[256];
+ };
+
+static struct __old_dirent64 *(*readdir64_compat) (DIR *);
+static int (*readdir64_r_compat) (DIR *, struct __old_dirent64 *,
+ struct __old_dirent64 **);
+
+static void __attribute__ ((constructor))
+init (void)
+{
+ /* These compat symbols exists on alpha, i386, m67k , powerpc, s390,
+ sparc. at the same GLIBC_2.1 version. */
+ readdir64_compat = dlvsym (RTLD_DEFAULT, "readdir64", "GLIBC_2.1");
+ readdir64_r_compat = dlvsym (RTLD_DEFAULT, "readdir64_r", "GLIBC_2.1");
+}
+
+enum support_readdir_op
+support_readdir_op_last (void)
+{
+ if (readdir64_r_compat != NULL)
+ {
+ TEST_VERIFY (readdir64_compat != NULL);
+ return SUPPORT_READDIR64_R_COMPAT;
+ }
+ else
+ return SUPPORT_READDIR64_R;
+}
+
+const char *
+support_readdir_function (enum support_readdir_op op)
+{
+ switch (op)
+ {
+ case SUPPORT_READDIR:
+ return "readdir";
+ case SUPPORT_READDIR64:
+ return "readdir64";
+ case SUPPORT_READDIR_R:
+ return "readdir_r";
+ case SUPPORT_READDIR64_R:
+ return "readdir64_r";
+ case SUPPORT_READDIR64_COMPAT:
+ return "readdir64@GBLIC_2.1";
+ case SUPPORT_READDIR64_R_COMPAT:
+ return "readdir64_r@GBLIC_2.1";
+ }
+ FAIL_EXIT1 ("invalid support_readdir_op constant: %d", op);
+}
+
+unsigned int
+support_readdir_inode_width (enum support_readdir_op op)
+{
+ switch (op)
+ {
+ case SUPPORT_READDIR:
+ case SUPPORT_READDIR_R:
+ return sizeof ((struct dirent) { 0, }.d_ino) * 8;
+ case SUPPORT_READDIR64:
+ case SUPPORT_READDIR64_R:
+ return sizeof ((struct dirent64) { 0, }.d_ino) * 8;
+ case SUPPORT_READDIR64_COMPAT:
+ case SUPPORT_READDIR64_R_COMPAT:
+ return sizeof ((struct __old_dirent64) { 0, }.d_ino) * 8;
+ }
+ FAIL_EXIT1 ("invalid support_readdir_op constant: %d", op);
+}
+
+unsigned int
+support_readdir_offset_width (enum support_readdir_op op)
+{
+#ifdef _DIRENT_HAVE_D_OFF
+ switch (op)
+ {
+ case SUPPORT_READDIR:
+ case SUPPORT_READDIR_R:
+ return sizeof ((struct dirent) { 0, }.d_off) * 8;
+ case SUPPORT_READDIR64:
+ case SUPPORT_READDIR64_R:
+ return sizeof ((struct dirent64) { 0, }.d_off) * 8;
+ case SUPPORT_READDIR64_COMPAT:
+ case SUPPORT_READDIR64_R_COMPAT:
+ return sizeof ((struct __old_dirent64) { 0, }.d_off) * 8;
+ }
+#else
+ switch (op)
+ {
+ case SUPPORT_READDIR:
+ case SUPPORT_READDIR_R:
+ case SUPPORT_READDIR64:
+ case SUPPORT_READDIR64_R:
+ case SUPPORT_READDIR64_COMPAT:
+ case SUPPORT_READDIR64_R_COMPAT:
+ return 0;
+ }
+#endif
+ FAIL_EXIT1 ("invalid support_readdir_op constant: %d", op);
+}
+
+bool
+support_readdir_r_variant (enum support_readdir_op op)
+{
+ switch (op)
+ {
+ case SUPPORT_READDIR:
+ case SUPPORT_READDIR64:
+ case SUPPORT_READDIR64_COMPAT:
+ return false;
+ case SUPPORT_READDIR_R:
+ case SUPPORT_READDIR64_R:
+ case SUPPORT_READDIR64_R_COMPAT:
+ return true;
+ }
+ FAIL_EXIT1 ("invalid support_readdir_op constant: %d", op);
+}
+
+static bool
+copy_dirent (struct support_dirent *dst, struct dirent *src)
+{
+ if (src == NULL)
+ return false;
+ dst->d_ino = src->d_ino;
+#ifdef _DIRENT_HAVE_D_OFF
+ dst->d_off = src->d_off;
+#else
+ dst->d_off = 0;
+#endif
+ dst->d_type = src->d_type;
+ dst->d_name = xstrdup (src->d_name);
+ return true;
+}
+
+static bool
+copy_dirent64 (struct support_dirent *dst, struct dirent64 *src)
+{
+ if (src == NULL)
+ return false;
+ dst->d_ino = src->d_ino;
+#ifdef _DIRENT_HAVE_D_OFF
+ dst->d_off = src->d_off;
+#else
+ dst->d_off = 0;
+#endif
+ dst->d_type = src->d_type;
+ dst->d_name = xstrdup (src->d_name);
+ return true;
+}
+
+static bool
+copy_old_dirent64 (struct support_dirent *dst, struct __old_dirent64 *src)
+{
+ if (src == NULL)
+ return false;
+ dst->d_ino = src->d_ino;
+#ifdef _DIRENT_HAVE_D_OFF
+ dst->d_off = src->d_off;
+#else
+ dst->d_off = 0;
+#endif
+ dst->d_type = src->d_type;
+ dst->d_name = xstrdup (src->d_name);
+ return true;
+}
+
+bool
+support_readdir (DIR *stream, enum support_readdir_op op,
+ struct support_dirent *e)
+{
+ free (e->d_name);
+ e->d_name = NULL;
+ switch (op)
+ {
+ case SUPPORT_READDIR:
+ return copy_dirent (e, xreaddir (stream));
+ case SUPPORT_READDIR64:
+ return copy_dirent64 (e, xreaddir64 (stream));
+
+ /* The functions readdir_r, readdir64_r were deprecated in glibc 2.24. */
+ DIAG_PUSH_NEEDS_COMMENT;
+ DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Wdeprecated-declarations");
+
+ case SUPPORT_READDIR_R:
+ {
+ struct dirent buf;
+ if (!xreaddir_r (stream, &buf))
+ return false;
+ return copy_dirent (e, &buf);
+ }
+ case SUPPORT_READDIR64_R:
+ {
+ struct dirent64 buf;
+ if (!xreaddir64_r (stream, &buf))
+ return false;
+ return copy_dirent64 (e, &buf);
+ }
+
+ DIAG_POP_NEEDS_COMMENT;
+
+ case SUPPORT_READDIR64_COMPAT:
+ if (readdir64_compat == NULL)
+ FAIL_EXIT1 ("readdir64 compat function not implemented");
+ return copy_old_dirent64 (e, readdir64_compat (stream));
+
+ case SUPPORT_READDIR64_R_COMPAT:
+ {
+ if (readdir64_r_compat == NULL)
+ FAIL_EXIT1 ("readdir64_r compat function not implemented");
+ struct __old_dirent64 buf;
+ struct __old_dirent64 *e1;
+ int ret = readdir64_r_compat (stream, &buf, &e1);
+ if (ret != 0)
+ {
+ errno = ret;
+ FAIL ("readdir64_r@GLIBC_2.1: %m");
+ return false;
+ }
+ if (e1 == NULL)
+ return false;
+ return copy_old_dirent64 (e, e1);
+ }
+ }
+ FAIL_EXIT1 ("support_readdir: invalid op argument %d", (int) op);
+}
+
+void
+support_readdir_expect_error (DIR *stream, enum support_readdir_op op,
+ int expected)
+{
+ switch (op)
+ {
+ case SUPPORT_READDIR:
+ errno = 0;
+ TEST_VERIFY (readdir (stream) == NULL);
+ TEST_COMPARE (errno, expected);
+ return;
+ case SUPPORT_READDIR64:
+ errno = 0;
+ TEST_VERIFY (readdir64 (stream) == NULL);
+ TEST_COMPARE (errno, expected);
+ return;
+
+ /* The functions readdir_r, readdir64_r were deprecated in glibc 2.24. */
+ DIAG_PUSH_NEEDS_COMMENT;
+ DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Wdeprecated-declarations");
+
+ case SUPPORT_READDIR_R:
+ {
+ struct dirent buf;
+ struct dirent *e;
+ errno = readdir_r (stream, &buf, &e);
+ TEST_COMPARE (errno, expected);;
+ }
+ return;
+ case SUPPORT_READDIR64_R:
+ {
+ struct dirent64 buf;
+ struct dirent64 *e;
+ errno = readdir64_r (stream, &buf, &e);
+ TEST_COMPARE (errno, expected);;
+ }
+ return;
+
+ DIAG_POP_NEEDS_COMMENT;
+
+ case SUPPORT_READDIR64_COMPAT:
+ if (readdir64_compat == NULL)
+ FAIL_EXIT1 ("readdir64_r compat function not implemented");
+ errno = 0;
+ TEST_VERIFY (readdir64_compat (stream) == NULL);
+ TEST_COMPARE (errno, expected);
+ return;
+ case SUPPORT_READDIR64_R_COMPAT:
+ {
+ if (readdir64_r_compat == NULL)
+ FAIL_EXIT1 ("readdir64_r compat function not implemented");
+ struct __old_dirent64 buf;
+ struct __old_dirent64 *e;
+ errno = readdir64_r_compat (stream, &buf, &e);
+ TEST_COMPARE (errno, expected);
+ }
+ return;
+ }
+ FAIL_EXIT1 ("support_readdir_expect_error: invalid op argument %d",
+ (int) op);
+}
diff --git a/support/tst-support_readdir.c b/support/tst-support_readdir.c
new file mode 100644
index 0000000000000000..c0639571c7c3f516
--- /dev/null
+++ b/support/tst-support_readdir.c
@@ -0,0 +1,70 @@
+/* Test the support_readdir function.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/readdir.h>
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/xdirent.h>
+#include <support/xunistd.h>
+
+static int
+do_test (void)
+{
+ DIR *reference_stream = xopendir (".");
+ struct dirent64 *reference = xreaddir64 (reference_stream);
+
+ for (enum support_readdir_op op = 0; op <= support_readdir_op_last (); ++op)
+ {
+ DIR *stream = xopendir (".");
+ struct support_dirent e;
+ memset (&e, 0xcc, sizeof (e));
+ e.d_name = NULL;
+ TEST_VERIFY (support_readdir (stream, op, &e));
+ TEST_COMPARE (e.d_ino, reference->d_ino);
+ if (support_readdir_offset_width (op) != 0)
+ TEST_COMPARE (e.d_off, reference->d_off);
+ else
+ TEST_COMPARE (e.d_off, 0);
+ TEST_COMPARE (e.d_type, reference->d_type);
+ TEST_COMPARE_STRING (e.d_name, reference->d_name);
+ free (e.d_name);
+ xclosedir (stream);
+ }
+
+ xclosedir (reference_stream);
+
+ /* Error injection test. */
+ int devnull = xopen ("/dev/null", O_RDONLY, 0);
+ for (enum support_readdir_op op = 0; op <= support_readdir_op_last (); ++op)
+ {
+ DIR *stream = xopendir (".");
+ /* A descriptor incompatible with readdir. */
+ xdup2 (devnull, dirfd (stream));
+ errno = -1;
+ support_readdir_expect_error (stream, op, ENOTDIR);
+ xclosedir (stream);
+ }
+ xclose (devnull);
+
+ return 0;
+}
+
+#include <support/test-driver.c>

34
glibc-RHEL-111120-4.patch Normal file
View File

@ -0,0 +1,34 @@
commit c9154cad66aa0b11ede62cc9190d3485c5ef6941
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Sep 12 18:26:04 2024 +0200
support: Fix Hurd build of tst-support_readdir
Check for the availability of the d_off member at compile time, not
run time.
Fixes commit 1251e9ea49fba9f53bbf4f290f3db90c01931fa7
("support: Add <support/readdir.h>").
diff --git a/support/tst-support_readdir.c b/support/tst-support_readdir.c
index c0639571c7c3f516..66be94fa802e727a 100644
--- a/support/tst-support_readdir.c
+++ b/support/tst-support_readdir.c
@@ -39,10 +39,13 @@ do_test (void)
e.d_name = NULL;
TEST_VERIFY (support_readdir (stream, op, &e));
TEST_COMPARE (e.d_ino, reference->d_ino);
- if (support_readdir_offset_width (op) != 0)
- TEST_COMPARE (e.d_off, reference->d_off);
- else
- TEST_COMPARE (e.d_off, 0);
+#ifdef _DIRENT_HAVE_D_OFF
+ TEST_VERIFY (support_readdir_offset_width (op) != 0);
+ TEST_COMPARE (e.d_off, reference->d_off);
+#else
+ TEST_COMPARE (support_readdir_offset_width (op), 0);
+ TEST_COMPARE (e.d_off, 0);
+#endif
TEST_COMPARE (e.d_type, reference->d_type);
TEST_COMPARE_STRING (e.d_name, reference->d_name);
free (e.d_name);

136
glibc-RHEL-111120-5.patch Normal file
View File

@ -0,0 +1,136 @@
commit 4c09aa31b1aeea1329674109eb02d4ba506b0ad2
Author: Florian Weimer <fweimer@redhat.com>
Date: Sat Sep 21 19:32:34 2024 +0200
dirent: Add tst-closedir-leaks
It verfies that closedir deallocates memory and closes
file descriptors.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/dirent/Makefile b/dirent/Makefile
index 556f759f653349bd..f9056724f03125c0 100644
--- a/dirent/Makefile
+++ b/dirent/Makefile
@@ -58,6 +58,7 @@ tests := \
bug-readdir1 \
list \
opendir-tst1 \
+ tst-closedir-leaks \
tst-fdopendir \
tst-fdopendir2 \
tst-scandir \
@@ -65,6 +66,18 @@ tests := \
tst-seekdir \
# tests
+ifeq ($(run-built-tests),yes)
+ifneq ($(PERL),no)
+generated += \
+ $(objpfx)tst-closedir-leaks-mem.out \
+ # generated
+
+tests-special += \
+ $(objpfx)tst-closedir-leaks-mem.out \
+ # tests-special
+endif # $(PERL) ! no
+endif # $(run-built-tests) == yes
+
CFLAGS-scandir.c += $(uses-callbacks)
CFLAGS-scandir64.c += $(uses-callbacks)
CFLAGS-scandir-tail.c += $(uses-callbacks)
@@ -74,3 +87,10 @@ CFLAGS-dirfd.c += $(config-cflags-wno-ignored-attributes)
include ../Rules
opendir-tst1-ARGS = --test-dir=${common-objpfx}dirent
+
+tst-closedir-leaks-ENV += MALLOC_TRACE=$(objpfx)tst-closedir-leaks.mtrace \
+ LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so
+
+$(objpfx)tst-closedir-leaks-mem.out: $(objpfx)tst-closedir-leaks.out
+ $(common-objpfx)malloc/mtrace $(objpfx)tst-closedir-leaks.mtrace > $@; \
+ $(evaluate-test)
diff --git a/dirent/tst-closedir-leaks.c b/dirent/tst-closedir-leaks.c
new file mode 100644
index 0000000000000000..d9de119b637ea623
--- /dev/null
+++ b/dirent/tst-closedir-leaks.c
@@ -0,0 +1,77 @@
+/* Test for resource leaks in closedir.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <mcheck.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <support/check.h>
+#include <support/descriptors.h>
+#include <support/readdir.h>
+#include <support/xdirent.h>
+#include <support/xunistd.h>
+
+static void
+one_test (enum support_readdir_op op, unsigned int read_limit,
+ bool use_fdopendir)
+{
+ struct support_descriptors *fds = support_descriptors_list ();
+ struct support_dirent e = { 0, };
+
+ DIR *stream;
+ if (use_fdopendir)
+ {
+ int fd = xopen (".", O_RDONLY | O_DIRECTORY, 0);
+ stream = xfdopendir (fd);
+ /* The descriptor fd will be closed by closedir below. */
+ }
+ else
+ stream = xopendir (".");
+ for (unsigned int i = 0; i < read_limit; ++i)
+ if (!support_readdir (stream, op, &e))
+ break;
+ TEST_COMPARE (closedir (stream), 0);
+
+ free (e.d_name);
+ support_descriptors_check (fds);
+ support_descriptors_free (fds);
+}
+
+static int
+do_test (void)
+{
+ mtrace ();
+
+ for (int use_fdopendir = 0; use_fdopendir < 2; ++use_fdopendir)
+ {
+ /* No reads, operation does not matter. */
+ one_test (SUPPORT_READDIR, 0, use_fdopendir);
+
+ for (enum support_readdir_op op = 0; op <= support_readdir_op_last();
+ ++op)
+ {
+ one_test (op, 1, use_fdopendir);
+ one_test (op, UINT_MAX, use_fdopendir); /* Unlimited reads. */
+ }
+ }
+
+ return 0;
+}
+
+#include <support/test-driver.c>

389
glibc-RHEL-111120-6.patch Normal file
View File

@ -0,0 +1,389 @@
commit e92718552e1d17b8eccbffb88bf5bbb2235c4596
Author: Florian Weimer <fweimer@redhat.com>
Date: Sat Sep 21 19:32:34 2024 +0200
Linux: Use readdir64_r for compat __old_readdir64_r (bug 32128)
It is not necessary to do the conversion at the getdents64
layer for readdir64_r. Doing it piecewise for readdir64
is slightly simpler and allows deleting __old_getdents64.
This fixes bug 32128 because readdir64_r handles the length
check correctly.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/sysdeps/unix/sysv/linux/getdents64.c b/sysdeps/unix/sysv/linux/getdents64.c
index 227fbf21aef294f6..795bd935f0e95126 100644
--- a/sysdeps/unix/sysv/linux/getdents64.c
+++ b/sysdeps/unix/sysv/linux/getdents64.c
@@ -33,100 +33,3 @@ __getdents64 (int fd, void *buf, size_t nbytes)
}
libc_hidden_def (__getdents64)
weak_alias (__getdents64, getdents64)
-
-#if _DIRENT_MATCHES_DIRENT64
-strong_alias (__getdents64, __getdents)
-#else
-# include <shlib-compat.h>
-
-# if SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2)
-# include <olddirent.h>
-# include <unistd.h>
-
-static ssize_t
-handle_overflow (int fd, __off64_t offset, ssize_t count)
-{
- /* If this is the first entry in the buffer, we can report the
- error. */
- if (offset == 0)
- {
- __set_errno (EOVERFLOW);
- return -1;
- }
-
- /* Otherwise, seek to the overflowing entry, so that the next call
- will report the error, and return the data read so far. */
- if (__lseek64 (fd, offset, SEEK_SET) != 0)
- return -1;
- return count;
-}
-
-ssize_t
-__old_getdents64 (int fd, char *buf, size_t nbytes)
-{
- /* We do not move the individual directory entries. This is only
- possible if the target type (struct __old_dirent64) is smaller
- than the source type. */
- _Static_assert (offsetof (struct __old_dirent64, d_name)
- <= offsetof (struct dirent64, d_name),
- "__old_dirent64 is larger than dirent64");
- _Static_assert (__alignof__ (struct __old_dirent64)
- <= __alignof__ (struct dirent64),
- "alignment of __old_dirent64 is larger than dirent64");
-
- ssize_t retval = INLINE_SYSCALL_CALL (getdents64, fd, buf, nbytes);
- if (retval > 0)
- {
- /* This is the marker for the first entry. Offset 0 is reserved
- for the first entry (see rewinddir). Here, we use it as a
- marker for the first entry in the buffer. We never actually
- seek to offset 0 because handle_overflow reports the error
- directly, so it does not matter that the offset is incorrect
- if entries have been read from the descriptor before (so that
- the descriptor is not actually at offset 0). */
- __off64_t previous_offset = 0;
-
- char *p = buf;
- char *end = buf + retval;
- while (p < end)
- {
- struct dirent64 *source = (struct dirent64 *) p;
-
- /* Copy out the fixed-size data. */
- __ino_t ino = source->d_ino;
- __off64_t offset = source->d_off;
- unsigned int reclen = source->d_reclen;
- unsigned char type = source->d_type;
-
- /* Check for ino_t overflow. */
- if (__glibc_unlikely (ino != source->d_ino))
- return handle_overflow (fd, previous_offset, p - buf);
-
- /* Convert to the target layout. Use a separate struct and
- memcpy to side-step aliasing issues. */
- struct __old_dirent64 result;
- result.d_ino = ino;
- result.d_off = offset;
- result.d_reclen = reclen;
- result.d_type = type;
-
- /* Write the fixed-sized part of the result to the
- buffer. */
- size_t result_name_offset = offsetof (struct __old_dirent64, d_name);
- memcpy (p, &result, result_name_offset);
-
- /* Adjust the position of the name if necessary. Copy
- everything until the end of the record, including the
- terminating NUL byte. */
- if (result_name_offset != offsetof (struct dirent64, d_name))
- memmove (p + result_name_offset, source->d_name,
- reclen - offsetof (struct dirent64, d_name));
-
- p += reclen;
- previous_offset = offset;
- }
- }
- return retval;
-}
-# endif /* SHLIB_COMPAT(libc, GLIBC_2_1, GLIBC_2_2) */
-#endif /* _DIRENT_MATCHES_DIRENT64 */
diff --git a/sysdeps/unix/sysv/linux/olddirent.h b/sysdeps/unix/sysv/linux/olddirent.h
index 239f790648c6e6b6..065ca41a6e93e1c9 100644
--- a/sysdeps/unix/sysv/linux/olddirent.h
+++ b/sysdeps/unix/sysv/linux/olddirent.h
@@ -34,8 +34,6 @@ extern struct __old_dirent64 *__old_readdir64 (DIR *__dirp);
libc_hidden_proto (__old_readdir64);
extern int __old_readdir64_r (DIR *__dirp, struct __old_dirent64 *__entry,
struct __old_dirent64 **__result);
-extern __ssize_t __old_getdents64 (int __fd, char *__buf, size_t __nbytes)
- attribute_hidden;
int __old_scandir64 (const char * __dir,
struct __old_dirent64 *** __namelist,
int (*__selector) (const struct __old_dirent64 *),
diff --git a/sysdeps/unix/sysv/linux/readdir64.c b/sysdeps/unix/sysv/linux/readdir64.c
index e6f5108c0a809353..e6b8867b7a361a62 100644
--- a/sysdeps/unix/sysv/linux/readdir64.c
+++ b/sysdeps/unix/sysv/linux/readdir64.c
@@ -26,17 +26,13 @@
#undef __readdir
#undef readdir
-/* Read a directory entry from DIRP. */
-struct dirent64 *
-__readdir64 (DIR *dirp)
+/* Read a directory entry from DIRP. No locking. */
+static struct dirent64 *
+__readdir64_unlocked (DIR *dirp)
{
struct dirent64 *dp;
int saved_errno = errno;
-#if IS_IN (libc)
- __libc_lock_lock (dirp->lock);
-#endif
-
if (dirp->offset >= dirp->size)
{
/* We've emptied out our buffer. Refill it. */
@@ -53,9 +49,6 @@ __readdir64 (DIR *dirp)
do not set errno in that case, to indicate success. */
if (bytes == 0 || errno == ENOENT)
__set_errno (saved_errno);
-#if IS_IN (libc)
- __libc_lock_unlock (dirp->lock);
-#endif
return NULL;
}
dirp->size = (size_t) bytes;
@@ -68,10 +61,16 @@ __readdir64 (DIR *dirp)
dirp->offset += dp->d_reclen;
dirp->filepos = dp->d_off;
-#if IS_IN (libc)
- __libc_lock_unlock (dirp->lock);
-#endif
+ return dp;
+}
+/* Read a directory entry from DIRP. */
+struct dirent64 *
+__readdir64 (DIR *dirp)
+{
+ __libc_lock_lock (dirp->lock);
+ struct dirent64 *dp = __readdir64_unlocked (dirp);
+ __libc_lock_unlock (dirp->lock);
return dp;
}
libc_hidden_def (__readdir64)
@@ -99,45 +98,54 @@ __old_readdir64 (DIR *dirp)
struct __old_dirent64 *dp;
int saved_errno = errno;
-#if IS_IN (libc)
__libc_lock_lock (dirp->lock);
-#endif
- if (dirp->offset >= dirp->size)
+ while (1)
{
- /* We've emptied out our buffer. Refill it. */
+ errno = 0;
+ struct dirent64 *newdp = __readdir64_unlocked (dirp);
+ if (newdp == NULL)
+ {
+ if (errno == 0 && dirp->errcode != 0)
+ __set_errno (dirp->errcode);
+ else if (errno == 0)
+ __set_errno (saved_errno);
+ dp = NULL;
+ break;
+ }
- size_t maxread = dirp->allocation;
- ssize_t bytes;
+ /* Convert to the target layout. Use a separate struct and
+ memcpy to side-step aliasing issues. */
+ struct __old_dirent64 result;
+ result.d_ino = newdp->d_ino;
+ result.d_off = newdp->d_off;
+ result.d_reclen = newdp->d_reclen;
+ result.d_type = newdp->d_type;
- bytes = __old_getdents64 (dirp->fd, dirp->data, maxread);
- if (bytes <= 0)
+ /* Check for ino_t overflow. */
+ if (__glibc_unlikely (result.d_ino != newdp->d_ino))
{
- /* Linux may fail with ENOENT on some file systems if the
- directory inode is marked as dead (deleted). POSIX
- treats this as a regular end-of-directory condition, so
- do not set errno in that case, to indicate success. */
- if (bytes == 0 || errno == ENOENT)
- __set_errno (saved_errno);
-#if IS_IN (libc)
- __libc_lock_unlock (dirp->lock);
-#endif
- return NULL;
+ dirp->errcode = ENAMETOOLONG;
+ continue;
}
- dirp->size = (size_t) bytes;
- /* Reset the offset into the buffer. */
- dirp->offset = 0;
- }
+ /* Overwrite the fixed-sized part. */
+ dp = (struct __old_dirent64 *) newdp;
+ memcpy (dp, &result, offsetof (struct __old_dirent64, d_name));
- dp = (struct __old_dirent64 *) &dirp->data[dirp->offset];
- dirp->offset += dp->d_reclen;
- dirp->filepos = dp->d_off;
+ /* Move the name. */
+ _Static_assert (offsetof (struct __old_dirent64, d_name)
+ <= offsetof (struct dirent64, d_name),
+ "old struct must be smaller");
+ if (offsetof (struct __old_dirent64, d_name)
+ != offsetof (struct dirent64, d_name))
+ memmove (dp->d_name, newdp->d_name, strlen (newdp->d_name) + 1);
-#if IS_IN (libc)
- __libc_lock_unlock (dirp->lock);
-#endif
+ __set_errno (saved_errno);
+ break;
+ }
+ __libc_lock_unlock (dirp->lock);
return dp;
}
libc_hidden_def (__old_readdir64)
diff --git a/sysdeps/unix/sysv/linux/readdir64_r.c b/sysdeps/unix/sysv/linux/readdir64_r.c
index e87882ee06d6deaf..7ad7e5945bc833c6 100644
--- a/sysdeps/unix/sysv/linux/readdir64_r.c
+++ b/sysdeps/unix/sysv/linux/readdir64_r.c
@@ -135,91 +135,37 @@ attribute_compat_text_section
__old_readdir64_r (DIR *dirp, struct __old_dirent64 *entry,
struct __old_dirent64 **result)
{
- struct __old_dirent64 *dp;
- size_t reclen;
- const int saved_errno = errno;
- int ret;
-
- __libc_lock_lock (dirp->lock);
-
- do
+ while (1)
{
- if (dirp->offset >= dirp->size)
- {
- /* We've emptied out our buffer. Refill it. */
-
- size_t maxread = dirp->allocation;
- ssize_t bytes;
-
- maxread = dirp->allocation;
-
- bytes = __old_getdents64 (dirp->fd, dirp->data, maxread);
- if (bytes <= 0)
- {
- /* On some systems getdents fails with ENOENT when the
- open directory has been rmdir'd already. POSIX.1
- requires that we treat this condition like normal EOF. */
- if (bytes < 0 && errno == ENOENT)
- {
- bytes = 0;
- __set_errno (saved_errno);
- }
- if (bytes < 0)
- dirp->errcode = errno;
+ struct dirent64 new_entry;
+ struct dirent64 *newp;
+ int ret = __readdir64_r (dirp, &new_entry, &newp);
- dp = NULL;
- break;
- }
- dirp->size = (size_t) bytes;
-
- /* Reset the offset into the buffer. */
- dirp->offset = 0;
+ if (ret != 0)
+ return ret;
+ else if (newp == NULL)
+ {
+ *result = NULL;
+ return 0;
}
-
- dp = (struct __old_dirent64 *) &dirp->data[dirp->offset];
-
- reclen = dp->d_reclen;
-
- dirp->offset += reclen;
-
- dirp->filepos = dp->d_off;
-
- if (reclen > offsetof (struct __old_dirent64, d_name) + NAME_MAX + 1)
+ else
{
- /* The record is very long. It could still fit into the
- caller-supplied buffer if we can skip padding at the
- end. */
- size_t namelen = _D_EXACT_NAMLEN (dp);
- if (namelen <= NAME_MAX)
- reclen = offsetof (struct __old_dirent64, d_name) + namelen + 1;
- else
+ entry->d_ino = newp->d_ino;
+ if (entry->d_ino != newp->d_ino)
{
- /* The name is too long. Ignore this file. */
- dirp->errcode = ENAMETOOLONG;
- dp->d_ino = 0;
+ dirp->errcode = EOVERFLOW;
continue;
}
+ size_t namelen = strlen (newp->d_name);
+ entry->d_off = newp->d_off;
+ entry->d_reclen = (offsetof (struct __old_dirent64, d_name)
+ + namelen + 1);
+ entry->d_type = newp->d_type;
+ memcpy (entry->d_name, newp->d_name, namelen + 1);
+ *result = entry;
+ return 0;
}
-
- /* Skip deleted and ignored files. */
- }
- while (dp->d_ino == 0);
-
- if (dp != NULL)
- {
- *result = memcpy (entry, dp, reclen);
- entry->d_reclen = reclen;
- ret = 0;
}
- else
- {
- *result = NULL;
- ret = dirp->errcode;
- }
-
- __libc_lock_unlock (dirp->lock);
-
- return ret;
}
compat_symbol (libc, __old_readdir64_r, readdir64_r, GLIBC_2_1);

261
glibc-RHEL-111120-7.patch Normal file
View File

@ -0,0 +1,261 @@
commit 4ec355af454695556db1212d1c9ca9c3789cddf4
Author: Florian Weimer <fweimer@redhat.com>
Date: Sat Sep 21 19:32:34 2024 +0200
dirent: Add tst-readdir-long
It tests long names and ENAMETOOLONG handling, specifically
for readdir_r. This is a regression test for bug 14699,
bug 32124, and bug 32128.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/dirent/Makefile b/dirent/Makefile
index f9056724f03125c0..91edcf9e70622938 100644
--- a/dirent/Makefile
+++ b/dirent/Makefile
@@ -61,6 +61,7 @@ tests := \
tst-closedir-leaks \
tst-fdopendir \
tst-fdopendir2 \
+ tst-readdir-long \
tst-scandir \
tst-scandir64 \
tst-seekdir \
diff --git a/dirent/tst-readdir-long.c b/dirent/tst-readdir-long.c
new file mode 100644
index 0000000000000000..409318fa52fc664f
--- /dev/null
+++ b/dirent/tst-readdir-long.c
@@ -0,0 +1,231 @@
+/* Test readdir (+variants) behavior with file names of varying length.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/fuse.h>
+#include <support/support.h>
+#include <support/xdirent.h>
+#include <support/readdir.h>
+
+/* If positive, at this length an EMSGSIZE error is injected. */
+static _Atomic int inject_error_at_length;
+
+/* Return a file name, LENGTH bytes long. */
+static char *
+name_of_length (size_t length)
+{
+ char *result = xmalloc (length + 1);
+ unsigned int prefix = snprintf (result, length + 1, "%zu-", length);
+ for (size_t i = prefix; i < length; ++i)
+ result[i] = 'A' + ((length + i) % 26);
+ result[length] = '\0';
+ return result;
+}
+
+/* Add the directory entry at OFFSET to the stream D. */
+static uint64_t
+add_directory_entry (struct support_fuse_dirstream *d, uint64_t offset)
+{
+ unsigned int length = offset + 1;
+ if (length > 1000)
+ /* Longer than what is possible to produce with 256
+ UTF-8-encoded Unicode code points. */
+ return 0;
+
+ char *to_free = NULL;
+ const char *name;
+ uint64_t ino = 1000 + length; /* Arbitrary value, distinct from 1. */
+ uint32_t type = DT_REG;
+ if (offset <= 1)
+ {
+ type = DT_DIR;
+ name = ".." + !offset; /* "." or "..". */
+ ino = 1;
+ }
+ else if (length == 1000)
+ name = "short";
+ else
+ {
+ to_free = name_of_length (length);
+ name = to_free;
+ }
+
+ ++offset;
+ bool added = support_fuse_dirstream_add (d, ino, offset, type, name);
+ free (to_free);
+ if (added)
+ return offset;
+ else
+ return 0;
+}
+
+/* Set to true if getdents64 should produce only one entry. */
+static _Atomic bool one_entry_per_getdents64;
+
+static void
+fuse_thread (struct support_fuse *f, void *closure)
+{
+ struct fuse_in_header *inh;
+ while ((inh = support_fuse_next (f)) != NULL)
+ {
+ if (support_fuse_handle_mountpoint (f)
+ || (inh->nodeid == 1 && support_fuse_handle_directory (f)))
+ continue;
+ switch (inh->opcode)
+ {
+ case FUSE_READDIR:
+ if (inh->nodeid == 1)
+ {
+ uint64_t offset = support_fuse_cast (READ, inh)->offset;
+ if (inject_error_at_length == offset + 1)
+ support_fuse_reply_error (f, EMSGSIZE);
+ else
+ {
+ struct support_fuse_dirstream *d
+ = support_fuse_prepare_readdir (f);
+ while (true)
+ {
+ offset = add_directory_entry (d, offset);
+ if (offset == 0 || one_entry_per_getdents64
+ /* Error will be reported at next READDIR. */
+ || offset + 1 == inject_error_at_length)
+ break;
+ }
+ support_fuse_reply_prepared (f);
+ }
+ }
+ else
+ support_fuse_reply_error (f, EIO);
+ break;
+ default:
+ FAIL ("unexpected event %s", support_fuse_opcode (inh->opcode));
+ support_fuse_reply_error (f, EIO);
+ }
+ }
+}
+
+/* Run the tests for the specified readdir variant OP. */
+static void
+run_readdir_tests (struct support_fuse *f, enum support_readdir_op op)
+{
+ printf ("info: testing %s (inject_error=%d unbuffered=%d)\n",
+ support_readdir_function (op), inject_error_at_length,
+ (int) one_entry_per_getdents64);
+
+ bool testing_r = support_readdir_r_variant (op);
+
+ DIR *dir = xopendir (support_fuse_mountpoint (f));
+ struct support_dirent e = { 0, };
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE (e.d_ino, 1);
+ TEST_COMPARE_STRING (e.d_name, ".");
+
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE (e.d_ino, 1);
+ TEST_COMPARE_STRING (e.d_name, "..");
+
+ for (unsigned int i = 3; i < 1000; ++i)
+ {
+ if (i == inject_error_at_length)
+ /* Error expected below. */
+ break;
+
+ if (i >= sizeof ((struct dirent) { 0, }.d_name) && testing_r)
+ /* This is a readir_r test. The longer names are not
+ available because they do not fit into struct dirent. */
+ break;
+
+ char *expected_name = name_of_length (i);
+ TEST_COMPARE (strlen (expected_name), i);
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE (e.d_ino, 1000 + i);
+ TEST_COMPARE_STRING (e.d_name, expected_name);
+ free (expected_name);
+ }
+
+ if (inject_error_at_length == 0)
+ {
+ /* Check that the ENAMETOOLONG error does not prevent reading a
+ later short name. */
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE (e.d_ino, 2000);
+ TEST_COMPARE_STRING (e.d_name, "short");
+
+ if (testing_r)
+ /* An earlier name was too long. */
+ support_readdir_expect_error (dir, op, ENAMETOOLONG);
+ else
+ /* Entire directory read without error. */
+ TEST_VERIFY (!support_readdir (dir, op, &e));
+ }
+ else
+ support_readdir_expect_error (dir, op, EMSGSIZE);
+
+ free (e.d_name);
+ xclosedir (dir);
+}
+
+/* Run all readdir variants for both fully-buffered an unbuffered
+ (one-at-a-time) directory streams. */
+static void
+run_fully_buffered_and_singleton_buffers (struct support_fuse *f)
+{
+ for (int do_one_entry = 0; do_one_entry < 2; ++do_one_entry)
+ {
+ one_entry_per_getdents64 = do_one_entry;
+ for (enum support_readdir_op op = 0; op <= support_readdir_op_last();
+ ++op)
+ run_readdir_tests (f, op);
+ }
+}
+
+static int
+do_test (void)
+{
+ /* Smoke test for name_of_length. */
+ {
+ char *name = name_of_length (5);
+ TEST_COMPARE_STRING (name, "5-HIJ");
+ free (name);
+
+ name = name_of_length (6);
+ TEST_COMPARE_STRING (name, "6-IJKL");
+ free (name);
+ }
+
+ support_fuse_init ();
+ struct support_fuse *f = support_fuse_mount (fuse_thread, NULL);
+
+ run_fully_buffered_and_singleton_buffers (f);
+
+ inject_error_at_length = 100;
+ run_fully_buffered_and_singleton_buffers (f);
+
+ inject_error_at_length = 300;
+ run_fully_buffered_and_singleton_buffers (f);
+
+ support_fuse_unmount (f);
+ return 0;
+}
+
+#include <support/test-driver.c>

236
glibc-RHEL-111120-8.patch Normal file
View File

@ -0,0 +1,236 @@
commit 6aa1645f669322b36bda8e1fded6fd524d3e08ff
Author: Florian Weimer <fweimer@redhat.com>
Date: Sat Sep 21 19:32:34 2024 +0200
dirent: Add tst-rewinddir
It verifies that rewinddir allows restarting the directory
iteration.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/dirent/Makefile b/dirent/Makefile
index 91edcf9e70622938..045c786575a7d5ff 100644
--- a/dirent/Makefile
+++ b/dirent/Makefile
@@ -62,6 +62,7 @@ tests := \
tst-fdopendir \
tst-fdopendir2 \
tst-readdir-long \
+ tst-rewinddir \
tst-scandir \
tst-scandir64 \
tst-seekdir \
diff --git a/dirent/tst-rewinddir.c b/dirent/tst-rewinddir.c
new file mode 100644
index 0000000000000000..1479766ebe8fc911
--- /dev/null
+++ b/dirent/tst-rewinddir.c
@@ -0,0 +1,207 @@
+/* Test for rewinddir, using FUSE.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <support/check.h>
+#include <support/fuse.h>
+#include <support/readdir.h>
+#include <support/support.h>
+#include <support/xdirent.h>
+
+/* Return the file name at the indicated directory offset. */
+static char *
+name_at_offset (unsigned int offset)
+{
+ if (offset <= 1)
+ return xstrdup (".." + !offset); /* "." or "..". */
+ else
+ /* Pad the name with a lot of zeros, so that the dirent buffer gets
+ filled more quickly. */
+ return xasprintf ("file%0240u", offset);
+}
+
+/* This many directory entries, including "." and "..". */
+enum { directory_entries = 200 };
+
+/* Add the directory entry at OFFSET to the stream D. */
+static uint64_t
+add_directory_entry (struct support_fuse_dirstream *d, uint64_t offset)
+{
+ if (offset >= directory_entries)
+ return 0;
+
+ char *name = name_at_offset (offset);
+ uint64_t ino = 1000 + offset; /* Arbitrary value, distinct from 1. */
+ uint32_t type = DT_REG;
+ if (offset <= 1)
+ {
+ type = DT_DIR;
+ ino = 1;
+ }
+
+ ++offset;
+ bool added = support_fuse_dirstream_add (d, ino, offset, type, name);
+ free (name);
+ if (added)
+ return offset;
+ else
+ return 0;
+}
+
+/* Set to true if getdents64 should produce only one entry. */
+static bool one_entry_per_getdents64;
+
+static void
+fuse_thread (struct support_fuse *f, void *closure)
+{
+ struct fuse_in_header *inh;
+ while ((inh = support_fuse_next (f)) != NULL)
+ {
+ if (support_fuse_handle_mountpoint (f)
+ || (inh->nodeid == 1 && support_fuse_handle_directory (f)))
+ continue;
+ switch (inh->opcode)
+ {
+ case FUSE_READDIR:
+ if (inh->nodeid == 1)
+ {
+ uint64_t offset = support_fuse_cast (READ, inh)->offset;
+ struct support_fuse_dirstream *d
+ = support_fuse_prepare_readdir (f);
+ while (true)
+ {
+ offset = add_directory_entry (d, offset);
+ if (offset == 0 || one_entry_per_getdents64)
+ break;
+ }
+ support_fuse_reply_prepared (f);
+ }
+ else
+ support_fuse_reply_error (f, EIO);
+ break;
+ default:
+ FAIL ("unexpected event %s", support_fuse_opcode (inh->opcode));
+ support_fuse_reply_error (f, EIO);
+ }
+ }
+}
+
+/* Lists the entire directory from start to end. */
+static void
+verify_directory (DIR *dir, enum support_readdir_op op)
+{
+ struct support_dirent e = { 0, };
+
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE_STRING (e.d_name, ".");
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE_STRING (e.d_name, "..");
+ for (int i = 2; i < directory_entries; ++i)
+ {
+ char *expected = name_at_offset (i);
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE_STRING (e.d_name, expected);
+ free (expected);
+ }
+ TEST_VERIFY (!support_readdir (dir, op, &e));
+ free (e.d_name);
+}
+
+/* Run tests with rewinding after ENTRIES readdir calls. */
+static void
+rewind_after (unsigned int rewind_at)
+{
+ for (enum support_readdir_op op = 0; op <= support_readdir_op_last (); ++op)
+ {
+ printf ("info: testing %s (rewind_at=%u)\n",
+ support_readdir_function (op), rewind_at);
+
+ struct support_fuse *f = support_fuse_mount (fuse_thread, NULL);
+ DIR *dir = xopendir (support_fuse_mountpoint (f));
+ struct support_dirent e = { 0, };
+
+ switch (rewind_at)
+ {
+ case 0:
+ break;
+ case 1:
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE_STRING (e.d_name, ".");
+ break;
+ default:
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE_STRING (e.d_name, ".");
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE_STRING (e.d_name, "..");
+ for (int i = 2; i < directory_entries; ++i)
+ {
+ if (i == rewind_at)
+ break;
+ char *expected = name_at_offset (i);
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE_STRING (e.d_name, expected);
+ free (expected);
+ }
+ break;
+ }
+
+ errno = 0;
+ rewinddir (dir);
+ TEST_COMPARE (errno, 0);
+ verify_directory (dir, op);
+
+ free (e.d_name);
+ xclosedir (dir);
+ support_fuse_unmount (f);
+ }
+}
+
+static int
+do_test (void)
+{
+ support_fuse_init ();
+
+ /* One pass without rewinding to verify that the generated directory
+ content matches expectations. */
+ {
+ struct support_fuse *f = support_fuse_mount (fuse_thread, NULL);
+ DIR *dir = xopendir (support_fuse_mountpoint (f));
+ verify_directory (dir, SUPPORT_READDIR64);
+ xclosedir (dir);
+ support_fuse_unmount (f);
+ }
+
+ for (int do_unbuffered = 0; do_unbuffered < 2; ++do_unbuffered)
+ {
+ one_entry_per_getdents64 = do_unbuffered;
+
+ for (int i = 0; i < 20; ++i)
+ rewind_after (i);
+ rewind_after (50);
+ rewind_after (100);
+ rewind_after (150);
+ rewind_after (180);
+ rewind_after (199);
+ }
+
+ return 0;
+}
+
+#include <support/test-driver.c>

214
glibc-RHEL-111120-9.patch Normal file
View File

@ -0,0 +1,214 @@
commit 6f3f6c506cdaf981a4374f1f12863b98ac7fea1a
Author: Florian Weimer <fweimer@redhat.com>
Date: Sat Sep 21 19:32:34 2024 +0200
Linux: readdir64_r should not skip d_ino == 0 entries (bug 32126)
This is the same bug as bug 12165, but for readdir_r. The
regression test covers both bug 12165 and bug 32126.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/dirent/Makefile b/dirent/Makefile
index 045c786575a7d5ff..11b772e3abb700eb 100644
--- a/dirent/Makefile
+++ b/dirent/Makefile
@@ -62,6 +62,7 @@ tests := \
tst-fdopendir \
tst-fdopendir2 \
tst-readdir-long \
+ tst-readdir-zero-inode \
tst-rewinddir \
tst-scandir \
tst-scandir64 \
diff --git a/dirent/tst-readdir-zero-inode.c b/dirent/tst-readdir-zero-inode.c
new file mode 100644
index 0000000000000000..af9fb946abe6c483
--- /dev/null
+++ b/dirent/tst-readdir-zero-inode.c
@@ -0,0 +1,134 @@
+/* Test that readdir does not skip entries with d_ino == 0 (bug 12165).
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include <support/check.h>
+#include <support/fuse.h>
+#include <support/readdir.h>
+#include <support/xdirent.h>
+
+/* Add the directory entry at OFFSET to the stream D. */
+static uint64_t
+add_directory_entry (struct support_fuse_dirstream *d, uint64_t offset)
+{
+ bool added = false;
+ ++offset;
+ switch (offset - 1)
+ {
+ case 0:
+ added = support_fuse_dirstream_add (d, 1, offset, DT_DIR, ".");
+ break;
+ case 1:
+ added = support_fuse_dirstream_add (d, 1, offset, DT_DIR, "..");
+ break;
+ case 2:
+ added = support_fuse_dirstream_add (d, 2, offset, DT_REG, "before");
+ break;
+ case 3:
+ added = support_fuse_dirstream_add (d, 0, offset, DT_REG, "zero");
+ break;
+ case 4:
+ added = support_fuse_dirstream_add (d, 3, offset, DT_REG, "after");
+ break;
+ }
+ if (added)
+ return offset;
+ else
+ return 0;
+}
+
+/* Set to true if getdents64 should produce only one entry. */
+static bool one_entry_per_getdents64;
+
+static void
+fuse_thread (struct support_fuse *f, void *closure)
+{
+ struct fuse_in_header *inh;
+ while ((inh = support_fuse_next (f)) != NULL)
+ {
+ if (support_fuse_handle_mountpoint (f)
+ || (inh->nodeid == 1 && support_fuse_handle_directory (f)))
+ continue;
+ switch (inh->opcode)
+ {
+ case FUSE_READDIR:
+ if (inh->nodeid == 1)
+ {
+ uint64_t offset = support_fuse_cast (READ, inh)->offset;
+ struct support_fuse_dirstream *d
+ = support_fuse_prepare_readdir (f);
+ while (true)
+ {
+ offset = add_directory_entry (d, offset);
+ if (offset == 0 || one_entry_per_getdents64)
+ break;
+ }
+ support_fuse_reply_prepared (f);
+ }
+ else
+ support_fuse_reply_error (f, EIO);
+ break;
+ default:
+ FAIL ("unexpected event %s", support_fuse_opcode (inh->opcode));
+ support_fuse_reply_error (f, EIO);
+ }
+ }
+}
+
+static int
+do_test (void)
+{
+ support_fuse_init ();
+
+ for (enum support_readdir_op op = 0; op <= support_readdir_op_last (); ++op)
+ {
+ struct support_fuse *f = support_fuse_mount (fuse_thread, NULL);
+ DIR *dir = xopendir (support_fuse_mountpoint (f));
+ struct support_dirent e = { 0, };
+
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE_STRING (e.d_name, ".");
+ TEST_COMPARE (e.d_ino, 1);
+
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE_STRING (e.d_name, "..");
+ TEST_COMPARE (e.d_ino, 1);
+
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE_STRING (e.d_name, "before");
+ TEST_COMPARE (e.d_ino, 2);
+
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE_STRING (e.d_name, "zero");
+ TEST_COMPARE (e.d_ino, 0);
+
+ TEST_VERIFY (support_readdir (dir, op, &e));
+ TEST_COMPARE_STRING (e.d_name, "after");
+ TEST_COMPARE (e.d_ino, 3);
+
+ TEST_VERIFY (!support_readdir (dir, op, &e));
+
+ free (e.d_name);
+ xclosedir (dir);
+ support_fuse_unmount (f);
+ }
+
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/readdir64_r.c b/sysdeps/unix/sysv/linux/readdir64_r.c
index 7ad7e5945bc833c6..c42a161ffcf9fd70 100644
--- a/sysdeps/unix/sysv/linux/readdir64_r.c
+++ b/sysdeps/unix/sysv/linux/readdir64_r.c
@@ -37,7 +37,7 @@ __readdir64_r (DIR *dirp, struct dirent64 *entry, struct dirent64 **result)
__libc_lock_lock (dirp->lock);
- do
+ while (1)
{
if (dirp->offset >= dirp->size)
{
@@ -79,26 +79,21 @@ __readdir64_r (DIR *dirp, struct dirent64 *entry, struct dirent64 **result)
dirp->filepos = dp->d_off;
- if (reclen > offsetof (struct dirent64, d_name) + NAME_MAX + 1)
+ if (reclen <= offsetof (struct dirent64, d_name) + NAME_MAX + 1)
+ break;
+
+ /* The record is very long. It could still fit into the
+ caller-supplied buffer if we can skip padding at the end. */
+ size_t namelen = _D_EXACT_NAMLEN (dp);
+ if (namelen <= NAME_MAX)
{
- /* The record is very long. It could still fit into the
- caller-supplied buffer if we can skip padding at the
- end. */
- size_t namelen = _D_EXACT_NAMLEN (dp);
- if (namelen <= NAME_MAX)
- reclen = offsetof (struct dirent64, d_name) + namelen + 1;
- else
- {
- /* The name is too long. Ignore this file. */
- dirp->errcode = ENAMETOOLONG;
- dp->d_ino = 0;
- continue;
- }
+ reclen = offsetof (struct dirent64, d_name) + namelen + 1;
+ break;
}
- /* Skip deleted and ignored files. */
+ /* The name is too long. Ignore this file. */
+ dirp->errcode = ENAMETOOLONG;
}
- while (dp->d_ino == 0);
if (dp != NULL)
{

View File

@ -13,7 +13,7 @@ Date: Fri Sep 12 21:33:34 2025 +0200
Reviewed-by: Collin Funk <collin.funk1@gmail.com>
diff --git a/nss/getXXbyYY_r.c b/nss/getXXbyYY_r.c
index fe7d5b7d0eddfb05..3a15b1a4ae151fcc 100644
index eae6c3480e..2b0735fb6a 100644
--- a/nss/getXXbyYY_r.c
+++ b/nss/getXXbyYY_r.c
@@ -157,19 +157,15 @@ __merge_einval (LOOKUP_TYPE *a,

378
glibc-RHEL-115819-1.patch Normal file
View File

@ -0,0 +1,378 @@
commit a7fe3e805d2ee128ac5f43b2a24201726d41cc04
Author: Carlos O'Donell <carlos@redhat.com>
Date: Wed Jun 19 11:48:05 2024 -0400
Fix conditionals on mtrace-based tests (bug 31892)
The conditionals for several mtrace-based tests in catgets, elf, libio,
malloc, misc, nptl, posix, and stdio-common were incorrect leading to
test failures when bootstrapping glibc without perl.
The correct conditional for mtrace-based tests requires three checks:
first checking for run-built-tests, then build-shared, and lastly that
PERL is not equal to "no" (missing perl).
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Conflicts:
stdio-common/Makefile
(fixup context)
diff --git a/catgets/Makefile b/catgets/Makefile
index 24b4560d5fefcd08..40c65eac950ee662 100644
--- a/catgets/Makefile
+++ b/catgets/Makefile
@@ -43,8 +43,12 @@ tests-special += \
$(objpfx)test-gencat.out \
$(objpfx)test1.cat \
$(objpfx)test2.cat \
- $(objpfx)tst-catgets-mem.out
# tests-special
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+tests-special += $(objpfx)tst-catgets-mem.out
+endif
+endif
endif
gencat-modules = xmalloc
@@ -68,9 +72,17 @@ generated += \
test1.h \
test2.cat \
test2.h \
+ # generated
+ifeq ($(run-built-tests),yes)
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+generated += \
tst-catgets-mem.out \
tst-catgets.mtrace \
# generated
+endif
+endif
+endif
generated-dirs += \
de \
diff --git a/elf/Makefile b/elf/Makefile
index fb35102f827b96cd..fc721b4f6379cb07 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -647,13 +647,19 @@ $(objpfx)tst-valgrind-smoke.out: tst-valgrind-smoke.sh $(objpfx)ld.so $(objpfx)v
tests += $(tests-execstack-$(have-z-execstack))
ifeq ($(run-built-tests),yes)
tests-special += \
- $(objpfx)noload-mem.out \
$(objpfx)tst-ldconfig-X.out \
$(objpfx)tst-ldconfig-p.out \
$(objpfx)tst-ldconfig-soname.out \
- $(objpfx)tst-leaks1-mem.out \
$(objpfx)tst-rtld-help.out \
# tests-special
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+tests-special += \
+ $(objpfx)noload-mem.out \
+ $(objpfx)tst-leaks1-mem.out \
+ # tests-special
+endif
+endif
endif
tlsmod17a-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
tlsmod18a-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
diff --git a/libio/Makefile b/libio/Makefile
index b189455bb9b8fd1b..03ffb659e8c0f347 100644
--- a/libio/Makefile
+++ b/libio/Makefile
@@ -264,15 +264,28 @@ tst-bz22415-ENV = MALLOC_TRACE=$(objpfx)tst-bz22415.mtrace \
tst-bz24228-ENV = MALLOC_TRACE=$(objpfx)tst-bz24228.mtrace \
LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so
-generated += test-fmemopen.mtrace test-fmemopen.check
-generated += tst-fdopen-seek-failure.mtrace tst-fdopen-seek-failure.check
-generated += tst-fopenloc.mtrace tst-fopenloc.check
-generated += tst-bz22415.mtrace tst-bz22415.check
-
aux := fileops genops stdfiles stdio strops
+ifeq ($(run-built-tests),yes)
+ifeq ($(build-shared),yes)
+ifneq ($(PERL),no)
+generated += \
+ test-fmemopen.check \
+ test-fmemopen.mtrace \
+ tst-bz22415.check \
+ tst-bz22415.mtrace \
+ tst-bz24228.check \
+ tst-bz24228.mtrace \
+ tst-fdopen-seek-failure.check \
+ tst-fdopen-seek-failure.mtrace \
+ tst-fopenloc.check \
+ tst-fopenloc.mtrace \
+ # generated
+endif
+endif
+endif
+
ifeq ($(build-shared),yes)
-generated += tst-bz24228.mtrace tst-bz24228.check
aux += oldfileops oldstdfiles
tests += \
tst-stderr-compat \
@@ -289,16 +302,23 @@ shared-only-routines = oldiofopen oldiofdopen oldiofclose oldfileops \
ifeq ($(run-built-tests),yes)
tests-special += \
- $(objpfx)test-fmemopen-mem.out \
$(objpfx)test-freopen.out \
- $(objpfx)tst-bz22415-mem.out \
- $(objpfx)tst-fdopen-seek-failure-mem.out \
# tests-special
ifeq (yes,$(build-shared))
# Run tst-fopenloc-cmp.out and tst-openloc-mem.out only if shared
# library is enabled since they depend on tst-fopenloc.out.
-tests-special += $(objpfx)tst-fopenloc-cmp.out $(objpfx)tst-fopenloc-mem.out \
- $(objpfx)tst-bz24228-mem.out
+tests-special += $(objpfx)tst-fopenloc-cmp.out
+ifeq ($(build-shared),yes)
+ifneq ($(PERL),no)
+tests-special += \
+ $(objpfx)test-fmemopen-mem.out \
+ $(objpfx)tst-bz22415-mem.out \
+ $(objpfx)tst-bz24228-mem.out \
+ $(objpfx)tst-fdopen-seek-failure-mem.out \
+ $(objpfx)tst-fopenloc-mem.out \
+ # tests-special
+endif
+endif
endif
tests += \
diff --git a/misc/Makefile b/misc/Makefile
index 6aa74d332b40ca94..a932b1aab461bc7d 100644
--- a/misc/Makefile
+++ b/misc/Makefile
@@ -214,12 +214,18 @@ routines_no_fortify += \
syslog \
# routines_no_fortify
+ifeq ($(run-built-tests),yes)
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
generated += \
tst-allocate_once-mem.out \
tst-allocate_once.mtrace \
tst-error1-mem.out \
tst-error1.mtrace \
# generated
+endif
+endif
+endif
aux := init-misc
install-lib := libg.a
@@ -293,8 +299,14 @@ xtests += \
# xtests
ifeq ($(run-built-tests),yes)
-tests-special += $(objpfx)tst-error1-mem.out \
- $(objpfx)tst-allocate_once-mem.out
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+tests-special += \
+ $(objpfx)tst-allocate_once-mem.out \
+ $(objpfx)tst-error1-mem.out \
+ # tests-special
+endif
+endif
endif
tests-container := \
diff --git a/nptl/Makefile b/nptl/Makefile
index c9d9079cdb8a5643..34c80f6f38261669 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -558,10 +558,12 @@ xtests-static += tst-setuid1-static
ifeq ($(run-built-tests),yes)
tests-special += \
$(objpfx)tst-oddstacklimit.out \
- $(objpfx)tst-stack3-mem.out \
# tests-special
ifeq ($(build-shared),yes)
tests-special += $(objpfx)tst-tls6.out
+ifneq ($(PERL),no)
+tests-special += $(objpfx)tst-stack3-mem.out
+endif
endif
endif
@@ -619,10 +621,17 @@ tst-stack3-ENV = MALLOC_TRACE=$(objpfx)tst-stack3.mtrace \
$(objpfx)tst-stack3-mem.out: $(objpfx)tst-stack3.out
$(common-objpfx)malloc/mtrace $(objpfx)tst-stack3.mtrace > $@; \
$(evaluate-test)
+
+ifeq ($(run-built-tests),yes)
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
generated += \
tst-stack3-mem.out \
tst-stack3.mtrace \
# generated
+endif
+endif
+endif
tst-stack4mod.sos=$(shell for i in 0 1 2 3 4 5 6 7 8 9 10 \
11 12 13 14 15 16 17 18 19; do \
diff --git a/posix/Makefile b/posix/Makefile
index 18ddb8c34176848e..830278a4233d7234 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -419,6 +419,17 @@ generated += \
$(addprefix wordexp-test-result, 1 2 3 4 5 6 7 8 9 10) \
annexc \
annexc.out \
+ getconf.speclist \
+ ptestcases.h \
+ testcases.h \
+ tst-getconf.out \
+ wordexp-tst.out \
+ # generated
+
+ifeq ($(run-built-tests),yes)
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+generated += \
bug-ga2-mem.out \
bug-ga2.mtrace \
bug-glob2-mem.out \
@@ -431,23 +442,22 @@ generated += \
bug-regex21.mtrace \
bug-regex31-mem.out \
bug-regex31.mtrace \
+ bug-regex36-mem.out \
bug-regex36.mtrace \
- getconf.speclist \
- ptestcases.h \
- testcases.h \
tst-boost-mem.out \
tst-boost.mtrace \
tst-fnmatch-mem.out \
tst-fnmatch.mtrace \
- tst-getconf.out \
tst-pcre-mem.out \
tst-pcre.mtrace \
tst-rxspencer-no-utf8-mem.out \
tst-rxspencer-no-utf8.mtrace \
tst-vfork3-mem.out \
tst-vfork3.mtrace \
- wordexp-tst.out \
# generated
+endif
+endif
+endif
ifeq ($(run-built-tests),yes)
ifeq (yes,$(build-shared))
@@ -462,6 +472,9 @@ endif
# XXX Please note that for now we ignore the result of this test.
tests-special += $(objpfx)annexc.out
ifeq ($(run-built-tests),yes)
+tests-special += $(objpfx)tst-getconf.out
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
tests-special += \
$(objpfx)bug-ga2-mem.out \
$(objpfx)bug-glob2-mem.out \
@@ -472,13 +485,14 @@ tests-special += \
$(objpfx)bug-regex36-mem.out \
$(objpfx)tst-boost-mem.out \
$(objpfx)tst-fnmatch-mem.out \
- $(objpfx)tst-getconf.out \
$(objpfx)tst-glob-tilde-mem.out \
$(objpfx)tst-pcre-mem.out \
$(objpfx)tst-rxspencer-no-utf8-mem.out \
$(objpfx)tst-vfork3-mem.out \
# tests-special
endif
+endif
+endif
include ../Rules
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 74512f20d39f8fec..2a01b1de6639bb6f 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -232,10 +232,6 @@ tests := \
tst-popen \
tst-popen2 \
tst-printf-binary \
- tst-printf-bz18872 \
- tst-printf-bz25691 \
- tst-printf-fp-free \
- tst-printf-fp-leak \
tst-printf-intn \
tst-printf-oct \
tst-printf-round \
@@ -266,7 +262,6 @@ tests := \
tst-vfprintf-mbs-prec \
tst-vfprintf-user-type \
tst-vfprintf-width-i18n \
- tst-vfprintf-width-prec \
tst-vfprintf-width-prec-alloc \
tst-wc-printf \
tstdiomisc \
@@ -275,6 +270,20 @@ tests := \
xbug \
# tests
+ifeq ($(run-built-tests),yes)
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+tests += \
+ tst-printf-bz18872 \
+ tst-printf-bz25691 \
+ tst-printf-fp-free \
+ tst-printf-fp-leak \
+ tst-vfprintf-width-prec \
+ # tests
+endif
+endif
+endif
+
tests-container += \
tst-popen3 \
tst-setvbuf2 \
@@ -302,14 +311,19 @@ test-srcs = \
ifeq ($(run-built-tests),yes)
tests-special += \
- $(objpfx)tst-printf-bz18872-mem.out \
- $(objpfx)tst-printf-bz25691-mem.out \
- $(objpfx)tst-printf-fp-free-mem.out \
- $(objpfx)tst-printf-fp-leak-mem.out \
$(objpfx)tst-printf.out \
$(objpfx)tst-printfsz-islongdouble.out \
$(objpfx)tst-setvbuf1-cmp.out \
$(objpfx)tst-unbputc.out \
+ # tests-special
+
+ifeq (yes,$(build-shared))
+ifneq ($(PERL),no)
+tests-special += \
+ $(objpfx)tst-printf-bz18872-mem.out \
+ $(objpfx)tst-printf-bz25691-mem.out \
+ $(objpfx)tst-printf-fp-free-mem.out \
+ $(objpfx)tst-printf-fp-leak-mem.out \
$(objpfx)tst-ungetc-leak-mem.out \
$(objpfx)tst-vfprintf-width-prec-mem.out \
# tests-special
@@ -330,6 +344,8 @@ generated += \
tst-vfprintf-width-prec-mem.out \
tst-vfprintf-width-prec.mtrace \
# generated
+endif
+endif
endif # $(run-built-tests)
tests-special += $(objpfx)tst-errno-manual.out

630
glibc-RHEL-115819-2.patch Normal file
View File

@ -0,0 +1,630 @@
commit 7f04bb4e49413bd57ac3215f3480b09ae7131968
Author: Joseph Myers <josmyers@redhat.com>
Date: Wed Aug 21 19:58:14 2024 +0000
Add more tests of getline
There is very little test coverage for getline (only a minimal
stdio-common/tstgetln.c which doesn't verify anything about the
results of the getline calls). Add some more thorough tests
(generally using fopencookie for convenience in testing various cases
for what the input and possible errors / EOF in the file read might
look like).
Note the following regarding testing of error cases:
* Nothing is said in the specifications about what if anything might
be written into the buffer, and whether it might be reallocated, in
error cases. The expectation of the tests (required to avoid memory
leaks on error) is that at least on error cases, the invariant that
lineptr points to at least n bytes is maintained.
* The optional EOVERFLOW error case specified in POSIX, "The number of
bytes to be written into the buffer, including the delimiter
character (if encountered), would exceed {SSIZE_MAX}.", doesn't seem
practically testable, as any case reading so many characters (half
the address space) would also be liable to run into allocation
failure along (ENOMEM) the way.
* If a read error occurs part way through reading an input line, it
seems unclear whether a partial line should be returned by getline
(avoid input getting lost), which is what glibc does at least in the
fopencookie case used in this test, or whether getline should return
-1 (error) (so avoiding the program misbehaving by processing a
truncated line as if it were complete). (There was a short,
inconclusive discussion about this on the Austin Group list on 9-10
November 2014.)
* The POSIX specification of getline inherits errors from fgetc. I
didn't try to cover fgetc errors systematically, just one example of
such an error.
Tested for x86_64 and x86.
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 2a01b1de6639bb6f..98fcd8a38523b728 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -221,6 +221,8 @@ tests := \
tst-fread \
tst-fseek \
tst-fwrite \
+ tst-getline \
+ tst-getline-enomem \
tst-gets \
tst-grouping \
tst-grouping2 \
@@ -320,6 +322,8 @@ tests-special += \
ifeq (yes,$(build-shared))
ifneq ($(PERL),no)
tests-special += \
+ $(objpfx)tst-getline-enomem-mem.out \
+ $(objpfx)tst-getline-mem.out \
$(objpfx)tst-printf-bz18872-mem.out \
$(objpfx)tst-printf-bz25691-mem.out \
$(objpfx)tst-printf-fp-free-mem.out \
@@ -329,6 +333,10 @@ tests-special += \
# tests-special
generated += \
+ tst-getline-enomem-mem.out \
+ tst-getline-enomem.mtrace \
+ tst-getline-mem.out \
+ tst-getline.mtrace \
tst-printf-bz18872-mem.out \
tst-printf-bz18872.c \
tst-printf-bz18872.mtrace \
@@ -438,6 +446,12 @@ tst-scanf-bz27650-ENV = \
tst-ungetc-leak-ENV = \
MALLOC_TRACE=$(objpfx)tst-ungetc-leak.mtrace \
LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
+tst-getline-ENV = \
+ MALLOC_TRACE=$(objpfx)tst-getline.mtrace \
+ LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
+tst-getline-enomem-ENV = \
+ MALLOC_TRACE=$(objpfx)tst-getline-enomem.mtrace \
+ LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
$(objpfx)tst-unbputc.out: tst-unbputc.sh $(objpfx)tst-unbputc
$(SHELL) $< $(common-objpfx) '$(test-program-prefix)'; \
diff --git a/stdio-common/tst-getline-enomem.c b/stdio-common/tst-getline-enomem.c
new file mode 100644
index 0000000000000000..7fc70ea9b51d1262
--- /dev/null
+++ b/stdio-common/tst-getline-enomem.c
@@ -0,0 +1,78 @@
+/* Test getline: ENOMEM on allocation failure.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <mcheck.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+
+#include <support/check.h>
+#include <support/test-driver.h>
+
+/* Produce a stream of test data based on data in COOKIE (ignored),
+ storing up to SIZE bytes in BUF. */
+
+static ssize_t
+io_read (void *cookie, char *buf, size_t size)
+{
+ memset (buf, 'x', size);
+ return size;
+}
+
+/* Set up a test stream with fopencookie. */
+
+static FILE *
+open_test_stream (void)
+{
+ static cookie_io_functions_t io_funcs = { .read = io_read };
+ static int cookie;
+ FILE *fp = fopencookie (&cookie, "r", io_funcs);
+ TEST_VERIFY_EXIT (fp != NULL);
+ return fp;
+}
+
+int
+do_test (void)
+{
+ FILE *fp;
+ char *lineptr = NULL;
+ size_t size = 0;
+ ssize_t ret;
+ mtrace ();
+ /* Test ENOMEM (and error indicator for stream set) for memory
+ allocation failure. */
+ verbose_printf ("Testing memory allocation failure\n");
+ fp = open_test_stream ();
+ struct rlimit limit;
+ TEST_VERIFY_EXIT (getrlimit (RLIMIT_AS, &limit) == 0);
+ limit.rlim_cur = 32 * 1024 * 1024;
+ TEST_VERIFY_EXIT (setrlimit (RLIMIT_AS, &limit) == 0);
+ errno = 0;
+ ret = getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (errno, ENOMEM);
+ TEST_COMPARE (!!ferror (fp), 1);
+ TEST_COMPARE (feof (fp), 0);
+ free (lineptr);
+ fclose (fp);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/stdio-common/tst-getline.c b/stdio-common/tst-getline.c
new file mode 100644
index 0000000000000000..29eb7cec0f344872
--- /dev/null
+++ b/stdio-common/tst-getline.c
@@ -0,0 +1,451 @@
+/* Test getline.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <malloc.h>
+#include <mcheck.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <support/check.h>
+#include <support/test-driver.h>
+#include <support/support.h>
+#include <support/xstdio.h>
+#include <support/xunistd.h>
+
+static struct test_data
+{
+ /* Input test data for fopencookie stream. */
+ const char *in_data;
+
+ /* The amount of test data left. */
+ size_t in_data_left;
+
+ /* Error number for forcing an error on next read. */
+ int in_error;
+
+ /* Error number for forcing an error (rather than EOF) after all
+ bytes read. */
+ int in_error_after;
+} the_cookie;
+
+/* Produce a stream of test data based on data in COOKIE, storing up
+ to SIZE bytes in BUF. */
+
+static ssize_t
+io_read (void *cookie, char *buf, size_t size)
+{
+ struct test_data *p = cookie;
+ if (p->in_error)
+ {
+ errno = p->in_error;
+ return -1;
+ }
+ if (size > p->in_data_left)
+ size = p->in_data_left;
+ memcpy (buf, p->in_data, size);
+ p->in_data += size;
+ p->in_data_left -= size;
+ if (p->in_data_left == 0)
+ p->in_error = p->in_error_after;
+ return size;
+}
+
+/* Set up a test stream with fopencookie. */
+
+static FILE *
+open_test_stream (const char *in_data, size_t size)
+{
+ static cookie_io_functions_t io_funcs = { .read = io_read };
+ the_cookie.in_data = in_data;
+ the_cookie.in_data_left = size;
+ the_cookie.in_error = 0;
+ the_cookie.in_error_after = 0;
+ FILE *fp = fopencookie (&the_cookie, "r", io_funcs);
+ TEST_VERIFY_EXIT (fp != NULL);
+ return fp;
+}
+
+/* Set up a test stream with fopencookie, using data from a string
+ literal. */
+#define OPEN_TEST_STREAM(IN_DATA) open_test_stream (IN_DATA, sizeof (IN_DATA))
+
+/* Wrap getline to verify that (as per the glibc manual), *LINEPTR is
+ returned as non-null and with at least *N bytes (even on error or
+ EOF). Also clear errno for the benefit of tests that check the
+ value of errno after the call. */
+
+ssize_t
+wrap_getline (char **lineptr, size_t *n, FILE *stream)
+{
+ errno = 0;
+ ssize_t ret = getline (lineptr, n, stream);
+ if (lineptr != NULL && n != NULL)
+ {
+ TEST_VERIFY (*lineptr != NULL);
+ TEST_VERIFY (malloc_usable_size (*lineptr) >= *n);
+ }
+ return ret;
+}
+
+int
+do_test (void)
+{
+ FILE *fp;
+ char *lineptr = NULL;
+ size_t size = 0;
+ ssize_t ret;
+ mtrace ();
+ /* Test failure with EINVAL (and error indicator for stream set) if
+ lineptr is a null pointer. */
+ verbose_printf ("Testing lineptr == NULL\n");
+ fp = OPEN_TEST_STREAM ("test");
+ ret = wrap_getline (NULL, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (errno, EINVAL);
+ TEST_COMPARE (!!ferror (fp), 1);
+ TEST_COMPARE (feof (fp), 0);
+ fclose (fp);
+ /* Test failure with EINVAL (and error indicator for stream set) if
+ n is a null pointer. */
+ verbose_printf ("Testing n == NULL\n");
+ fp = OPEN_TEST_STREAM ("test");
+ ret = wrap_getline (&lineptr, NULL, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (errno, EINVAL);
+ TEST_COMPARE (!!ferror (fp), 1);
+ TEST_COMPARE (feof (fp), 0);
+ fclose (fp);
+ /* Test failure with EINVAL (and error indicator for stream set) if
+ both lineptr and n are null pointers. */
+ verbose_printf ("Testing lineptr == NULL and n == NULL\n");
+ fp = OPEN_TEST_STREAM ("test");
+ ret = wrap_getline (NULL, NULL, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (errno, EINVAL);
+ TEST_COMPARE (!!ferror (fp), 1);
+ TEST_COMPARE (feof (fp), 0);
+ fclose (fp);
+ /* Test normal line, fitting in available space (including case with
+ null bytes). */
+ verbose_printf ("Testing normal nonempty input\n");
+ lineptr = xmalloc (10);
+ size = 10;
+ fp = OPEN_TEST_STREAM ("foo\nbar\0\n\0baz\nte\0st\n");
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 4);
+ TEST_COMPARE_BLOB (lineptr, 5, "foo\n", 5);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 5);
+ TEST_COMPARE_BLOB (lineptr, 6, "bar\0\n", 6);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 5);
+ TEST_COMPARE_BLOB (lineptr, 6, "\0baz\n", 6);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 6);
+ TEST_COMPARE_BLOB (lineptr, 7, "te\0st\n", 7);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 1);
+ TEST_COMPARE_BLOB (lineptr, 1, "", 1);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (ferror (fp), 0);
+ TEST_COMPARE (!!feof (fp), 1);
+ fclose (fp);
+ /* Test normal line, with reallocation (including case with null bytes). */
+ verbose_printf ("Testing normal nonempty input with reallocation\n");
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ fp = OPEN_TEST_STREAM ("foo\nbar\0\n\0baz\nte\0st\n"
+ "foo\nbar\0\n\0baz\nte\0st\n");
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 4);
+ TEST_COMPARE_BLOB (lineptr, 5, "foo\n", 5);
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 5);
+ TEST_COMPARE_BLOB (lineptr, 6, "bar\0\n", 6);
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 5);
+ TEST_COMPARE_BLOB (lineptr, 6, "\0baz\n", 6);
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 6);
+ TEST_COMPARE_BLOB (lineptr, 7, "te\0st\n", 7);
+ free (lineptr);
+ lineptr = xmalloc (1);
+ size = 1;
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 4);
+ TEST_COMPARE_BLOB (lineptr, 5, "foo\n", 5);
+ free (lineptr);
+ lineptr = xmalloc (1);
+ size = 1;
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 5);
+ TEST_COMPARE_BLOB (lineptr, 6, "bar\0\n", 6);
+ free (lineptr);
+ lineptr = xmalloc (1);
+ size = 1;
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 5);
+ TEST_COMPARE_BLOB (lineptr, 6, "\0baz\n", 6);
+ free (lineptr);
+ lineptr = xmalloc (1);
+ size = 1;
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 6);
+ TEST_COMPARE_BLOB (lineptr, 7, "te\0st\n", 7);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 1);
+ TEST_COMPARE_BLOB (lineptr, 1, "", 1);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (ferror (fp), 0);
+ TEST_COMPARE (!!feof (fp), 1);
+ fclose (fp);
+ /* Test EOF before delimiter but after some bytes read, fitting in
+ available space (including case with null bytes). */
+ verbose_printf ("Testing EOF before delimiter\n");
+ free (lineptr);
+ lineptr = xmalloc (10);
+ size = 10;
+ fp = open_test_stream ("foo", 3);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 3);
+ TEST_COMPARE_BLOB (lineptr, 4, "foo", 4);
+ fclose (fp);
+ free (lineptr);
+ lineptr = xmalloc (10);
+ size = 10;
+ fp = open_test_stream ("bar\0", 4);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 4);
+ TEST_COMPARE_BLOB (lineptr, 5, "bar\0", 5);
+ fclose (fp);
+ free (lineptr);
+ lineptr = xmalloc (10);
+ size = 10;
+ fp = open_test_stream ("\0baz", 4);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 4);
+ TEST_COMPARE_BLOB (lineptr, 5, "\0baz", 5);
+ fclose (fp);
+ free (lineptr);
+ lineptr = xmalloc (10);
+ size = 10;
+ fp = open_test_stream ("te\0st", 5);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 5);
+ TEST_COMPARE_BLOB (lineptr, 6, "te\0st", 6);
+ fclose (fp);
+ /* Test EOF before delimiter but after some bytes read, with
+ reallocation (including case with null bytes). */
+ verbose_printf ("Testing EOF before delimiter with reallocation\n");
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ fp = open_test_stream ("foo", 3);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 3);
+ TEST_COMPARE_BLOB (lineptr, 4, "foo", 4);
+ fclose (fp);
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ fp = open_test_stream ("bar\0", 4);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 4);
+ TEST_COMPARE_BLOB (lineptr, 5, "bar\0", 5);
+ fclose (fp);
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ fp = open_test_stream ("\0baz", 4);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 4);
+ TEST_COMPARE_BLOB (lineptr, 5, "\0baz", 5);
+ fclose (fp);
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ fp = open_test_stream ("te\0st", 5);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 5);
+ TEST_COMPARE_BLOB (lineptr, 6, "te\0st", 6);
+ fclose (fp);
+ free (lineptr);
+ lineptr = xmalloc (1);
+ size = 1;
+ fp = open_test_stream ("foo", 3);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 3);
+ TEST_COMPARE_BLOB (lineptr, 4, "foo", 4);
+ fclose (fp);
+ free (lineptr);
+ lineptr = xmalloc (1);
+ size = 1;
+ fp = open_test_stream ("bar\0", 4);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 4);
+ TEST_COMPARE_BLOB (lineptr, 5, "bar\0", 5);
+ fclose (fp);
+ free (lineptr);
+ lineptr = xmalloc (1);
+ size = 1;
+ fp = open_test_stream ("\0baz", 4);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 4);
+ TEST_COMPARE_BLOB (lineptr, 5, "\0baz", 5);
+ fclose (fp);
+ free (lineptr);
+ lineptr = xmalloc (1);
+ size = 1;
+ fp = open_test_stream ("te\0st", 5);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 5);
+ TEST_COMPARE_BLOB (lineptr, 6, "te\0st", 6);
+ fclose (fp);
+ /* Test EOF with no bytes read (nothing is specified about anything
+ written to the buffer), including EOF again when already at end
+ of file. */
+ verbose_printf ("Testing EOF with no bytes read\n");
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ fp = open_test_stream ("", 0);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (ferror (fp), 0);
+ TEST_COMPARE (!!feof (fp), 1);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (ferror (fp), 0);
+ TEST_COMPARE (!!feof (fp), 1);
+ fclose (fp);
+ free (lineptr);
+ lineptr = xmalloc (1);
+ size = 1;
+ fp = open_test_stream ("", 0);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (ferror (fp), 0);
+ TEST_COMPARE (!!feof (fp), 1);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (ferror (fp), 0);
+ TEST_COMPARE (!!feof (fp), 1);
+ fclose (fp);
+ /* Test error occurring with no bytes read, including calling
+ wrap_getline again while the file is in error state. */
+ verbose_printf ("Testing error with no bytes read\n");
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ fp = open_test_stream ("", 0);
+ the_cookie.in_error = EINVAL;
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (errno, EINVAL);
+ TEST_COMPARE (!!ferror (fp), 1);
+ TEST_COMPARE (feof (fp), 0);
+ /* Make sure error state is sticky. */
+ the_cookie.in_error = 0;
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (!!ferror (fp), 1);
+ TEST_COMPARE (feof (fp), 0);
+ fclose (fp);
+ /* Test error occurring after some bytes read. Specifications are
+ ambiguous here; at least in the fopencookie case used for
+ testing, glibc returns the partial line (but with the error
+ indicator on the stream set). */
+ verbose_printf ("Testing error after some bytes read\n");
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ fp = open_test_stream ("foo", 3);
+ the_cookie.in_error_after = EINVAL;
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, 3);
+ TEST_COMPARE_BLOB (lineptr, 4, "foo", 4);
+ TEST_COMPARE (errno, EINVAL);
+ TEST_COMPARE (!!ferror (fp), 1);
+ TEST_COMPARE (feof (fp), 0);
+ /* Make sure error state is sticky. */
+ the_cookie.in_error = 0;
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (!!ferror (fp), 1);
+ TEST_COMPARE (feof (fp), 0);
+ fclose (fp);
+ /* Test EBADF error as a representative example of an fgetc error
+ resulting in an error from wrap_getline. We don't try to cover all
+ error cases for fgetc here. */
+ verbose_printf ("Testing EBADF error\n");
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ fp = xfopen ("/dev/null", "r");
+ xclose (fileno (fp));
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (errno, EBADF);
+ TEST_COMPARE (!!ferror (fp), 1);
+ TEST_COMPARE (feof (fp), 0);
+ fclose (fp);
+ /* Test EAGAIN error as an example of an fgetc error on a valid file
+ descriptor. */
+ verbose_printf ("Testing EAGAIN error\n");
+ free (lineptr);
+ lineptr = NULL;
+ size = 0;
+ int pipefd[2];
+ xpipe (pipefd);
+ ret = fcntl (pipefd[0], F_SETFL, O_NONBLOCK);
+ TEST_VERIFY_EXIT (ret == 0);
+ fp = fdopen (pipefd[0], "r");
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (errno, EAGAIN);
+ TEST_COMPARE (!!ferror (fp), 1);
+ TEST_COMPARE (feof (fp), 0);
+ /* Make sure error state is sticky (even after more data is
+ available to read). */
+ xwrite (pipefd[1], "x\n", 2);
+ ret = wrap_getline (&lineptr, &size, fp);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (!!ferror (fp), 1);
+ TEST_COMPARE (feof (fp), 0);
+ fclose (fp);
+ free (lineptr);
+ return 0;
+}
+
+#include <support/test-driver.c>

79
glibc-RHEL-115820-1.patch Normal file
View File

@ -0,0 +1,79 @@
commit 3e4a01870ef9605ccf6475215a4b32aa86d5d206
Author: Aaron Merey <amerey@redhat.com>
Date: Thu Aug 29 12:02:25 2024 -0400
Test fclose on an unopened file.
Add new file libio/tst-fclosed-unopened.c that tests whether fclose on
an unopened file returns EOF.
Calling fclose on unopened files normally causes a use-after-free bug,
however the standard streams are an exception since they are not
deallocated by fclose.
fclose returning EOF for unopened files is not part of the external
contract but there are dependancies on this behaviour. For example,
gnulib's close_stdout in lib/closeout.c.
Tested for x86_64.
Signed-off-by: Aaron Merey <amerey@redhat.com>
diff --git a/libio/Makefile b/libio/Makefile
index 6cf5b3464b5bb4bf..3252c8c52fc3c773 100644
--- a/libio/Makefile
+++ b/libio/Makefile
@@ -95,6 +95,7 @@ tests = \
tst-eof \
tst-ext \
tst-ext2 \
+ tst-fclose-unopened \
tst-fdopen-seek-failure \
tst-fgetc-after-eof \
tst-fgetwc \
diff --git a/libio/tst-fclose-unopened.c b/libio/tst-fclose-unopened.c
new file mode 100644
index 0000000000000000..1f1cad042d8d72bf
--- /dev/null
+++ b/libio/tst-fclose-unopened.c
@@ -0,0 +1,40 @@
+/* Test using fclose on an unopened file.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <support/check.h>
+
+/* Verify that fclose on an unopened file returns EOF. This is not part
+ of the fclose external contract but there are dependancies on this
+ behaviour. */
+
+static int
+do_test (void)
+{
+ TEST_COMPARE (fclose (stdin), 0);
+
+ /* Attempt to close the unopened file and verify that EOF is returned.
+ Calling fclose on a file twice normally causes a use-after-free bug,
+ however the standard streams are an exception since they are not
+ deallocated by fclose. */
+ TEST_COMPARE (fclose (stdin), EOF);
+
+ return 0;
+}
+
+#include <support/test-driver.c>

155
glibc-RHEL-115820-2.patch Normal file
View File

@ -0,0 +1,155 @@
commit 35dc62de3d5d73a91d4ca8fa9799b510a34d170d
Author: Aaron Merey <amerey@redhat.com>
Date: Thu Sep 19 09:53:23 2024 -0400
Add another test for fclose on an unopened file
Add new file libio/tst-fclose-unopened2.c that tests whether fclose on an
unopened file returns EOF.
This test differs from tst-fclose-unopened.c by ensuring the file's buffer
is allocated prior to double-fclose. A comment in tst-fclose-unopened.c
now clarifies that it is testing a file with an unallocated buffer.
Calling fclose on unopened files normally causes a use-after-free bug,
however the standard streams are an exception since they are not
deallocated by fclose.
Tested for x86_64.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
diff --git a/libio/Makefile b/libio/Makefile
index 3252c8c52fc3c773..efa60525b39331da 100644
--- a/libio/Makefile
+++ b/libio/Makefile
@@ -96,6 +96,7 @@ tests = \
tst-ext \
tst-ext2 \
tst-fclose-unopened \
+ tst-fclose-unopened2 \
tst-fdopen-seek-failure \
tst-fgetc-after-eof \
tst-fgetwc \
@@ -254,6 +255,9 @@ LDFLAGS-tst-bz24228 = -Wl,--version-script=tst-bz24228.map
tst_wprintf2-ARGS = "Some Text"
+tst-fclose-unopened2-ENV = \
+ MALLOC_TRACE=$(objpfx)tst-fclose-unopened2.mtrace \
+ LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so
test-fmemopen-ENV = MALLOC_TRACE=$(objpfx)test-fmemopen.mtrace \
LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so
tst-fdopen-seek-failure-ENV = \
@@ -278,6 +282,8 @@ generated += \
tst-bz22415.mtrace \
tst-bz24228.check \
tst-bz24228.mtrace \
+ tst-fclose-unopened2.check \
+ tst-fclose-unopened2.mtrace \
tst-fdopen-seek-failure.check \
tst-fdopen-seek-failure.mtrace \
tst-fopenloc.check \
@@ -316,6 +322,7 @@ tests-special += \
$(objpfx)test-fmemopen-mem.out \
$(objpfx)tst-bz22415-mem.out \
$(objpfx)tst-bz24228-mem.out \
+ $(objpfx)tst-fclose-unopened2-mem.out \
$(objpfx)tst-fdopen-seek-failure-mem.out \
$(objpfx)tst-fopenloc-mem.out \
# tests-special
@@ -403,6 +410,11 @@ $(objpfx)test-fmemopen-mem.out: $(objpfx)test-fmemopen.out
$(common-objpfx)malloc/mtrace $(objpfx)test-fmemopen.mtrace > $@; \
$(evaluate-test)
+$(objpfx)tst-fclose-unopened2-mem.out: $(objpfx)tst-fclose-unopened2.out
+ $(common-objpfx)malloc/mtrace \
+ $(objpfx)tst-fclose-unopened2.mtrace > $@; \
+ $(evaluate-test)
+
$(objpfx)tst-fdopen-seek-failure-mem.out: $(objpfx)tst-fdopen-seek-failure.out
$(common-objpfx)malloc/mtrace \
$(objpfx)tst-fdopen-seek-failure.mtrace > $@; \
diff --git a/libio/tst-fclose-unopened.c b/libio/tst-fclose-unopened.c
index 1f1cad042d8d72bf..4fed2ffdfe8cf9b4 100644
--- a/libio/tst-fclose-unopened.c
+++ b/libio/tst-fclose-unopened.c
@@ -19,9 +19,11 @@
#include <stdio.h>
#include <support/check.h>
-/* Verify that fclose on an unopened file returns EOF. This is not part
- of the fclose external contract but there are dependancies on this
- behaviour. */
+/* Verify that fclose on an unopened file returns EOF. This test uses
+ a file with an unallocated buffer.
+
+ This is not part of the fclose external contract but there are
+ dependencies on this behaviour. */
static int
do_test (void)
diff --git a/libio/tst-fclose-unopened2.c b/libio/tst-fclose-unopened2.c
new file mode 100644
index 0000000000000000..1e99d9dc3d561b80
--- /dev/null
+++ b/libio/tst-fclose-unopened2.c
@@ -0,0 +1,51 @@
+/* Test using fclose on an unopened file.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <mcheck.h>
+#include <stdio.h>
+#include <support/check.h>
+
+/* Verify that fclose on an unopened file returns EOF. This test uses
+ a file with an allocated buffer.
+
+ This is not part of the fclose external contract but there are
+ dependencies on this behaviour. */
+
+static int
+do_test (void)
+{
+ mtrace ();
+
+ /* Input file tst-fclose-unopened2.input has 6 bytes plus newline. */
+ char buf[6];
+
+ /* Read from the file to ensure its internal buffer is allocated. */
+ TEST_COMPARE (fread (buf, 1, sizeof (buf), stdin), sizeof (buf));
+
+ TEST_COMPARE (fclose (stdin), 0);
+
+ /* Attempt to close the unopened file and verify that EOF is returned.
+ Calling fclose on a file twice normally causes a use-after-free bug,
+ however the standard streams are an exception since they are not
+ deallocated by fclose. */
+ TEST_COMPARE (fclose (stdin), EOF);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/libio/tst-fclose-unopened2.input b/libio/tst-fclose-unopened2.input
new file mode 100644
index 0000000000000000..399f9ba41aff870b
--- /dev/null
+++ b/libio/tst-fclose-unopened2.input
@@ -0,0 +1 @@
+fclose

261
glibc-RHEL-115823-1.patch Normal file
View File

@ -0,0 +1,261 @@
commit 96d0bf98cafd0b63721f369ca21ec64590551d47
Author: Joseph Myers <josmyers@redhat.com>
Date: Tue Sep 3 13:53:01 2024 +0000
Add support/ code for checking file contents
For use in freopen tests, add various support/ helper interfaces for
use in checking file contents.
Tested for x86_64.
diff --git a/support/Makefile b/support/Makefile
index 6b859e85ddb157f4..aee37b9c82e94499 100644
--- a/support/Makefile
+++ b/support/Makefile
@@ -49,6 +49,8 @@ libsupport-routines = \
support_check_stat_fd \
support_check_stat_path \
support_chroot \
+ support_compare_file_bytes \
+ support_compare_file_string \
support_copy_file \
support_copy_file_range \
support_create_timer \
@@ -66,6 +68,8 @@ libsupport-routines = \
support_isolate_in_subprocess \
support_mutex_pi_monotonic \
support_need_proc \
+ support_open_and_compare_file_bytes \
+ support_open_and_compare_file_string \
support_openpty \
support_path_support_time64 \
support_paths \
diff --git a/support/file_contents.h b/support/file_contents.h
new file mode 100644
index 0000000000000000..9b2d750aae8a885a
--- /dev/null
+++ b/support/file_contents.h
@@ -0,0 +1,63 @@
+/* Functionality for checking file contents.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef SUPPORT_FILE_CONTENTS_H
+#define SUPPORT_FILE_CONTENTS_H
+
+#include <support/check.h>
+#include <stdio.h>
+
+__BEGIN_DECLS
+
+/* Check that an already-open file has exactly the given bytes,
+ starting at the current location in the file. The file position
+ indicator is updated to point after the bytes compared. Return 0
+ if equal, 1 otherwise or on read error. */
+int support_compare_file_bytes (FILE *fp, const char *contents, size_t length);
+
+/* Check that an already-open file has exactly the given string as
+ contents, starting at the current offset. The file position
+ indicator is updated to point after the bytes compared. Return 0
+ if equal, 1 otherwise or on read error. */
+int support_compare_file_string (FILE *fp, const char *contents);
+
+/* Check that a not-currently-open file has exactly the given bytes.
+ Return 0 if equal, 1 otherwise or on read error. */
+int support_open_and_compare_file_bytes (const char *file,
+ const char *contents,
+ size_t length);
+
+/* Check that a not-currently-open file has exactly the given string
+ as contents, starting at the current offset. Return 0 if equal, 1
+ otherwise or on read error. */
+int support_open_and_compare_file_string (const char *file,
+ const char *contents);
+
+/* Compare bytes read from an open file with the given string. The
+ file position indicator is updated to point after the bytes
+ compared. */
+#define TEST_COMPARE_FILE_STRING(FP, CONTENTS) \
+ TEST_COMPARE (support_compare_file_string (FP, CONTENTS), 0)
+
+/* Read a file and compare bytes read from it with the given string. */
+#define TEST_OPEN_AND_COMPARE_FILE_STRING(FILE, CONTENTS) \
+ TEST_COMPARE (support_open_and_compare_file_string (FILE, CONTENTS), 0)
+
+__END_DECLS
+
+#endif /* SUPPORT_FILE_CONTENTS_H */
diff --git a/support/support_compare_file_bytes.c b/support/support_compare_file_bytes.c
new file mode 100644
index 0000000000000000..e261e1da8f7b02b2
--- /dev/null
+++ b/support/support_compare_file_bytes.c
@@ -0,0 +1,42 @@
+/* Compare bytes from an open file.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+
+#include <support/file_contents.h>
+
+/* Check that an already-open file has exactly the given bytes,
+ starting at the current offset. */
+
+int
+support_compare_file_bytes (FILE *fp, const char *contents, size_t length)
+{
+ int c;
+ while (length > 0)
+ {
+ c = getc (fp);
+ if (c == EOF || (unsigned char) c != (unsigned char) contents[0])
+ return 1;
+ contents++;
+ length--;
+ }
+ c = getc (fp);
+ if (c != EOF || ferror (fp))
+ return 1;
+ return 0;
+}
diff --git a/support/support_compare_file_string.c b/support/support_compare_file_string.c
new file mode 100644
index 0000000000000000..04513c3af197037d
--- /dev/null
+++ b/support/support_compare_file_string.c
@@ -0,0 +1,28 @@
+/* Compare string from an open file.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <support/file_contents.h>
+
+int
+support_compare_file_string (FILE *fp, const char *contents)
+{
+ return support_compare_file_bytes (fp, contents, strlen (contents));
+}
diff --git a/support/support_open_and_compare_file_bytes.c b/support/support_open_and_compare_file_bytes.c
new file mode 100644
index 0000000000000000..f804ed8e460d82f0
--- /dev/null
+++ b/support/support_open_and_compare_file_bytes.c
@@ -0,0 +1,33 @@
+/* Compare bytes from a file.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/file_contents.h>
+#include <support/xstdio.h>
+
+/* Check that a not-currently-open file has exactly the given
+ bytes. */
+
+int
+support_open_and_compare_file_bytes (const char *file, const char *contents,
+ size_t length)
+{
+ FILE *fp = xfopen (file, "r");
+ int ret = support_compare_file_bytes (fp, contents, length);
+ xfclose (fp);
+ return ret;
+}
diff --git a/support/support_open_and_compare_file_string.c b/support/support_open_and_compare_file_string.c
new file mode 100644
index 0000000000000000..2b596d4c88b697f2
--- /dev/null
+++ b/support/support_open_and_compare_file_string.c
@@ -0,0 +1,32 @@
+/* Compare string from a file.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+#include <support/file_contents.h>
+#include <support/xstdio.h>
+
+/* Check that a not-currently-open file has exactly the given string
+ as contents, starting at the current offset. */
+
+int
+support_open_and_compare_file_string (const char *file, const char *contents)
+{
+ return support_open_and_compare_file_bytes (file, contents,
+ strlen (contents));
+}

739
glibc-RHEL-115823-2.patch Normal file
View File

@ -0,0 +1,739 @@
commit ed4bb289cf739f537deb735eaa01be531df084b9
Author: Joseph Myers <josmyers@redhat.com>
Date: Wed Sep 4 16:32:21 2024 +0000
Add more thorough tests of freopen
freopen is rather minimally tested in libio/tst-freopen and
libio/test-freopen. Add some more thorough tests, covering different
cases for change of mode in particular. The tests are run for both
freopen and freopen64 (given that those functions have two separate
copies of much of the code, so any bug fix directly in the freopen
code would probably need applying in both places).
Note that there are two parts of the tests disabled because of bugs
discovered through running the tests, with bug numbers given in
comments. I expect to address those separately. The tests also don't
cover changes to cancellation ("c" in mode); I think that will better
be handled through a separate test. Also to handle separately:
testing on stdin / stdout / stderr; documenting lack of support for
streams opened with popen / fmemopen / open_memstream / fopencookie;
maybe also a chroot test without /proc; maybe also more thorough tests
for large file handling on 32-bit systems (freopen64).
Tested for x86_64.
Conflicts:
stdio-common/Makefile
(fixup context for newly added tests)
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index cee076cb7bcff2d2..f949ec37482d8859 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -224,6 +224,10 @@ tests := \
tst-fphex \
tst-fphex-wide \
tst-fread \
+ tst-freopen2 \
+ tst-freopen3 \
+ tst-freopen64-2 \
+ tst-freopen64-3 \
tst-fseek \
tst-fseek-mmap \
tst-fwrite \
@@ -331,6 +335,8 @@ tests-special += \
ifeq (yes,$(build-shared))
ifneq ($(PERL),no)
tests-special += \
+ $(objpfx)tst-freopen2-mem.out \
+ $(objpfx)tst-freopen64-2-mem.out \
$(objpfx)tst-getline-enomem-mem.out \
$(objpfx)tst-getline-mem.out \
$(objpfx)tst-printf-bz18872-mem.out \
@@ -342,6 +348,10 @@ tests-special += \
# tests-special
generated += \
+ tst-freopen2-mem.out \
+ tst-freopen2.mtrace \
+ tst-freopen64-2-mem.out \
+ tst-freopen64-2.mtrace \
tst-getline-enomem-mem.out \
tst-getline-enomem.mtrace \
tst-getline-mem.out \
@@ -461,6 +471,12 @@ tst-getline-ENV = \
tst-getline-enomem-ENV = \
MALLOC_TRACE=$(objpfx)tst-getline-enomem.mtrace \
LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
+tst-freopen2-ENV = \
+ MALLOC_TRACE=$(objpfx)tst-freopen2.mtrace \
+ LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
+tst-freopen64-2-ENV = \
+ MALLOC_TRACE=$(objpfx)tst-freopen64-2.mtrace \
+ LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
$(objpfx)tst-unbputc.out: tst-unbputc.sh $(objpfx)tst-unbputc
$(SHELL) $< $(common-objpfx) '$(test-program-prefix)'; \
diff --git a/stdio-common/tst-freopen2-main.c b/stdio-common/tst-freopen2-main.c
new file mode 100644
index 0000000000000000..22b21afebf709563
--- /dev/null
+++ b/stdio-common/tst-freopen2-main.c
@@ -0,0 +1,526 @@
+/* Test freopen.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <mcheck.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
+
+#include <support/check.h>
+#include <support/descriptors.h>
+#include <support/file_contents.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/test-driver.h>
+#include <support/xstdio.h>
+
+#define START_TEST(DESC) \
+ do \
+ { \
+ fds = support_descriptors_list (); \
+ verbose_printf (DESC); \
+ } \
+ while (0)
+
+#define END_TEST \
+ do \
+ { \
+ support_descriptors_check (fds); \
+ support_descriptors_free (fds); \
+ } \
+ while (0)
+
+int
+do_test (void)
+{
+ mtrace ();
+ struct support_descriptors *fds;
+ char *temp_dir = support_create_temp_directory ("tst-freopen2");
+ char *file1 = xasprintf ("%s/file1", temp_dir);
+ support_write_file_string (file1, "file1");
+ add_temp_file (file1);
+ char *file2 = xasprintf ("%s/file2", temp_dir);
+ support_write_file_string (file2, "file2");
+ add_temp_file (file2);
+ char *file3 = xasprintf ("%s/file3", temp_dir);
+ char *file4 = xasprintf ("%s/file4", temp_dir);
+ char *file1a = xasprintf ("%s/file1a", temp_dir);
+ FILE *fp;
+ int ret;
+ wint_t wc;
+
+ /* Test each pair of old and new modes from r w a. */
+
+ START_TEST ("Testing r -> r\n");
+ fp = xfopen (file1, "r");
+ fp = FREOPEN (file2, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "file2");
+ xfclose (fp);
+ END_TEST;
+
+ START_TEST ("Testing r -> w\n");
+ fp = xfopen (file1, "r");
+ fp = FREOPEN (file2, "w", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("File2new", fp);
+ TEST_VERIFY (ret >= 0);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file1, "file1");
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "File2new");
+ END_TEST;
+
+ START_TEST ("Testing r -> a\n");
+ fp = xfopen (file1, "r");
+ fp = FREOPEN (file2, "a", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("3", fp);
+ TEST_VERIFY (ret >= 0);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "File2new3");
+ END_TEST;
+
+ START_TEST ("Testing w -> r\n");
+ fp = xfopen (file1, "w");
+ fp = FREOPEN (file2, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "File2new3");
+ xfclose (fp);
+ END_TEST;
+
+ START_TEST ("Testing w -> w\n");
+ fp = xfopen (file1, "w");
+ fp = FREOPEN (file2, "w", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("next", fp);
+ TEST_VERIFY (ret >= 0);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file1, "");
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "next");
+ END_TEST;
+
+ START_TEST ("Testing w -> a\n");
+ fp = xfopen (file1, "w");
+ fp = FREOPEN (file2, "a", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("4", fp);
+ TEST_VERIFY (ret >= 0);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "next4");
+ END_TEST;
+
+ START_TEST ("Testing a -> r\n");
+ fp = xfopen (file1, "a");
+ fp = FREOPEN (file2, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "next4");
+ xfclose (fp);
+ END_TEST;
+
+ START_TEST ("Testing a -> w\n");
+ fp = xfopen (file1, "a");
+ fp = FREOPEN (file2, "w", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("another", fp);
+ TEST_VERIFY (ret >= 0);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "another");
+ END_TEST;
+
+ START_TEST ("Testing a -> a\n");
+ fp = xfopen (file1, "a");
+ fp = FREOPEN (file2, "a", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("5", fp);
+ TEST_VERIFY (ret >= 0);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "another5");
+ END_TEST;
+
+ /* Test for file originally opened with fopen64. */
+ START_TEST ("Testing fopen64 a -> a\n");
+ fp = fopen64 (file1, "a");
+ TEST_VERIFY_EXIT (fp != NULL);
+ fp = FREOPEN (file2, "a", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("64", fp);
+ TEST_VERIFY (ret >= 0);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "another564");
+ END_TEST;
+
+ /* Test calling freopen more than once on the same FILE *. */
+
+ START_TEST ("Testing r -> w -> r\n");
+ fp = xfopen (file1, "r");
+ fp = FREOPEN (file2, "w", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("freopen-twice", fp);
+ TEST_VERIFY (ret >= 0);
+ fp = FREOPEN (file2, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "freopen-twice");
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "freopen-twice");
+ END_TEST;
+
+ START_TEST ("Testing r -> w -> r (exactly one freopen64)\n");
+ fp = xfopen (file1, "r");
+ fp = OTHER_FREOPEN (file2, "w", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("freopen-twice64", fp);
+ TEST_VERIFY (ret >= 0);
+ fp = FREOPEN (file2, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "freopen-twice64");
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "freopen-twice64");
+ END_TEST;
+
+ /* Test changing to/from b (binary, no-op). */
+
+ START_TEST ("Testing rb -> r\n");
+ fp = xfopen (file1, "rb");
+ fp = FREOPEN (file2, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "freopen-twice64");
+ xfclose (fp);
+ END_TEST;
+
+ START_TEST ("Testing r -> rb\n");
+ fp = xfopen (file1, "r");
+ fp = FREOPEN (file2, "rb", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "freopen-twice64");
+ xfclose (fp);
+ END_TEST;
+
+ /* Test changing to/from + (read-and-write). */
+
+ START_TEST ("Testing r -> w+\n");
+ fp = xfopen (file1, "r");
+ fp = FREOPEN (file2, "w+", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("latest", fp);
+ TEST_VERIFY (ret >= 0);
+ ret = fseek (fp, 0, SEEK_SET);
+ TEST_COMPARE (ret, 0);
+ TEST_COMPARE_FILE_STRING (fp, "latest");
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "latest");
+ END_TEST;
+
+ START_TEST ("Testing w -> a+\n");
+ fp = xfopen (file1, "w");
+ fp = FREOPEN (file2, "a+", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("suffix", fp);
+ TEST_VERIFY (ret >= 0);
+ ret = fseek (fp, 0, SEEK_SET);
+ TEST_COMPARE (ret, 0);
+ TEST_COMPARE_FILE_STRING (fp, "latestsuffix");
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "latestsuffix");
+ END_TEST;
+
+ START_TEST ("Testing a -> r+\n");
+ fp = xfopen (file1, "a");
+ fp = FREOPEN (file2, "r+", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "latestsuffix");
+ ret = fseek (fp, 0, SEEK_SET);
+ TEST_COMPARE (ret, 0);
+ ret = fputs ("new", fp);
+ TEST_VERIFY (ret >= 0);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "newestsuffix");
+ END_TEST;
+
+ START_TEST ("Testing r+ -> w\n");
+ fp = xfopen (file1, "r+");
+ fp = FREOPEN (file2, "w", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("plusto", fp);
+ TEST_VERIFY (ret >= 0);
+ ret = fseek (fp, 0, SEEK_SET);
+ TEST_COMPARE (ret, 0);
+ errno = 0;
+ TEST_COMPARE (fgetc (fp), EOF);
+ TEST_COMPARE (errno, EBADF);
+ clearerr (fp);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "plusto");
+ END_TEST;
+
+ START_TEST ("Testing w+ -> a\n");
+ fp = xfopen (file1, "w+");
+ fp = FREOPEN (file2, "a", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("more", fp);
+ TEST_VERIFY (ret >= 0);
+ ret = fseek (fp, 0, SEEK_SET);
+ TEST_COMPARE (ret, 0);
+ errno = 0;
+ TEST_COMPARE (fgetc (fp), EOF);
+ TEST_COMPARE (errno, EBADF);
+ clearerr (fp);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "plustomore");
+ END_TEST;
+
+ START_TEST ("Testing a+ -> r\n");
+ fp = xfopen (file1, "a+");
+ fp = FREOPEN (file2, "rr", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "plustomore");
+ ret = fputs ("2", fp);
+ TEST_COMPARE (ret, EOF);
+ clearerr (fp);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "plustomore");
+ END_TEST;
+
+ /* Test changing to/from e (FD_CLOEXEC). */
+
+ START_TEST ("Testing re -> r\n");
+ fp = xfopen (file1, "re");
+ ret = fcntl (fileno (fp), F_GETFD);
+ TEST_VERIFY (ret != -1);
+ TEST_COMPARE (ret & FD_CLOEXEC, FD_CLOEXEC);
+ fp = FREOPEN (file2, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fcntl (fileno (fp), F_GETFD);
+ TEST_VERIFY (ret != -1);
+#if 0 /* Fails to clear FD_CLOEXEC (bug 32134). */
+ TEST_COMPARE (ret & FD_CLOEXEC, 0);
+#endif
+ TEST_COMPARE_FILE_STRING (fp, "plustomore");
+ xfclose (fp);
+ END_TEST;
+
+ START_TEST ("Testing r -> re\n");
+ fp = xfopen (file1, "r");
+ ret = fcntl (fileno (fp), F_GETFD);
+ TEST_VERIFY (ret != -1);
+ TEST_COMPARE (ret & FD_CLOEXEC, 0);
+ fp = FREOPEN (file2, "re", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fcntl (fileno (fp), F_GETFD);
+ TEST_VERIFY (ret != -1);
+ TEST_COMPARE (ret & FD_CLOEXEC, FD_CLOEXEC);
+ TEST_COMPARE_FILE_STRING (fp, "plustomore");
+ xfclose (fp);
+ END_TEST;
+
+ /* Test changing to/from m (mmap) (a no-op as far as testing
+ semantics is concerned). */
+
+ START_TEST ("Testing rm -> r\n");
+ fp = xfopen (file1, "rm");
+ fp = FREOPEN (file2, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "plustomore");
+ xfclose (fp);
+ END_TEST;
+
+ START_TEST ("Testing r -> rm\n");
+ fp = xfopen (file1, "r");
+ fp = FREOPEN (file2, "rm", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "plustomore");
+ xfclose (fp);
+ END_TEST;
+
+ /* Test changing to/from x (O_EXCL). */
+
+ START_TEST ("Testing wx -> w\n");
+ fp = xfopen (file3, "wx");
+ add_temp_file (file3);
+ fp = FREOPEN (file2, "w", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = fputs ("wxtow", fp);
+ TEST_VERIFY (ret >= 0);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file2, "wxtow");
+ END_TEST;
+
+ START_TEST ("Testing w -> wx (file does not exist)\n");
+ fp = xfopen (file1, "w");
+ fp = FREOPEN (file4, "wx", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ add_temp_file (file4);
+ ret = fputs ("wtowx", fp);
+ TEST_VERIFY (ret >= 0);
+ xfclose (fp);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file4, "wtowx");
+ END_TEST;
+
+ /* Test with ,ccs=CHARSET. */
+
+ START_TEST ("testing w,ccs=utf-8 -> r\n");
+ fp = xfopen (file1, "w,ccs=utf-8");
+ ret = fputws (L"\xc0\xc1", fp);
+ TEST_VERIFY (ret >= 0);
+ fp = FREOPEN (file2, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "wxtow");
+ xfclose (fp);
+ END_TEST;
+
+ START_TEST ("testing w,ccs=iso-8859-1 -> r,ccs=utf-8\n");
+ fp = xfopen (file2, "w,ccs=iso-8859-1");
+ ret = fputws (L"\xc0\xc1", fp);
+ TEST_VERIFY (ret >= 0);
+#if 0 /* Doesn't work (bug 23675). */
+ fp = FREOPEN (file1, "r,ccs=utf-8", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+#else /* Works instead. */
+ xfclose (fp);
+ fp = xfopen (file1, "r,ccs=utf-8");
+#endif
+ wc = fgetwc (fp);
+ TEST_COMPARE (wc, (wint_t) 0xc0);
+ wc = fgetwc (fp);
+ TEST_COMPARE (wc, (wint_t) 0xc1);
+ wc = fgetwc (fp);
+ TEST_COMPARE (wc, WEOF);
+ xfclose (fp);
+ END_TEST;
+
+ START_TEST ("testing r,ccs=utf-8 -> r\n");
+ fp = xfopen (file1, "r,ccs=utf-8");
+ fp = FREOPEN (file1, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "\u00c0\u00c1");
+ xfclose (fp);
+ END_TEST;
+
+ /* Test that errors closing the old file are ignored. */
+
+ START_TEST ("testing errors closing old file ignored\n");
+ fp = xfopen ("/dev/full", "w");
+ fputc ('x', fp);
+ fp = FREOPEN (file1, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "\u00c0\u00c1");
+ xfclose (fp);
+ END_TEST;
+
+ /* Test that error / EOF state from the old file are cleared. */
+
+ START_TEST ("testing error state from old file cleared\n");
+ fp = xfopen ("/dev/full", "w");
+ fputc ('x', fp);
+ fflush (fp);
+ TEST_VERIFY (ferror (fp));
+ TEST_VERIFY (!feof (fp));
+ fp = FREOPEN (file2, "w", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_VERIFY (!ferror (fp));
+ TEST_VERIFY (!feof (fp));
+ xfclose (fp);
+ END_TEST;
+
+ START_TEST ("testing EOF state from old file cleared\n");
+ fp = xfopen ("/dev/null", "r");
+ fgetc (fp);
+ TEST_VERIFY (!ferror (fp));
+ TEST_VERIFY (feof (fp));
+ fp = FREOPEN (file2, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_VERIFY (!ferror (fp));
+ TEST_VERIFY (!feof (fp));
+ xfclose (fp);
+ END_TEST;
+
+ /* Test freopen with NULL, same mode (should flush content and reset
+ file offset). */
+
+ START_TEST ("testing freopen with NULL, same mode\n");
+ fp = xfopen (file1, "r+");
+ ret = fputs ("same mode", fp);
+ TEST_VERIFY (ret >= 0);
+ fp = FREOPEN (NULL, "r+", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "same mode");
+ xfclose (fp);
+ END_TEST;
+
+ /* Test freopen with NULL, different mode. */
+
+ START_TEST ("testing freopen with NULL, different mode\n");
+ fp = xfopen (file1, "w");
+ ret = fputs ("different mode", fp);
+ TEST_VERIFY (ret >= 0);
+ fp = FREOPEN (NULL, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "different mode");
+ xfclose (fp);
+ END_TEST;
+
+ /* Test freopen with NULL, renamed file. This verifies that
+ reopening succeeds (and resets the file position indicator to
+ start of file) even when the original path could no longer be
+ opened. */
+
+ START_TEST ("testing freopen with NULL, renamed file\n");
+ fp = xfopen (file1, "r+");
+ ret = fputs ("file has been renamed", fp);
+ TEST_VERIFY (ret >= 0);
+ ret = rename (file1, file1a);
+ TEST_COMPARE (ret, 0);
+ fp = FREOPEN (NULL, "r+", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "file has been renamed");
+ xfclose (fp);
+ ret = rename (file1a, file1);
+ TEST_COMPARE (ret, 0);
+ END_TEST;
+
+ /* Test freopen with NULL, deleted file. This verifies that
+ reopening succeeds (and resets the file position indicator to
+ start of file) even when the original path could no longer be
+ opened. */
+
+ START_TEST ("testing freopen with NULL, deleted file\n");
+ fp = xfopen (file1, "r+");
+ ret = fputs ("file has now been deleted", fp);
+ TEST_VERIFY (ret >= 0);
+ ret = remove (file1);
+ TEST_COMPARE (ret, 0);
+ fp = FREOPEN (NULL, "r+", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ TEST_COMPARE_FILE_STRING (fp, "file has now been deleted");
+ xfclose (fp);
+ /* Recreate the file so it is present when expected for temporary
+ file deletion. */
+ support_write_file_string (file1, "file1");
+ END_TEST;
+
+ free (temp_dir);
+ free (file1);
+ free (file2);
+ free (file3);
+ free (file4);
+ free (file1a);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/stdio-common/tst-freopen2.c b/stdio-common/tst-freopen2.c
new file mode 100644
index 0000000000000000..11ec7a9783b7caa3
--- /dev/null
+++ b/stdio-common/tst-freopen2.c
@@ -0,0 +1,3 @@
+#define FREOPEN freopen
+#define OTHER_FREOPEN freopen64
+#include <tst-freopen2-main.c>
diff --git a/stdio-common/tst-freopen3-main.c b/stdio-common/tst-freopen3-main.c
new file mode 100644
index 0000000000000000..5107e1f98e189e4b
--- /dev/null
+++ b/stdio-common/tst-freopen3-main.c
@@ -0,0 +1,90 @@
+/* Test freopen failure.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <support/check.h>
+#include <support/descriptors.h>
+#include <support/file_contents.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/test-driver.h>
+#include <support/xstdio.h>
+
+#define START_TEST(DESC) \
+ do \
+ { \
+ fds = support_descriptors_list (); \
+ verbose_printf (DESC); \
+ } \
+ while (0)
+
+#define END_TEST \
+ do \
+ { \
+ support_descriptors_check (fds); \
+ support_descriptors_free (fds); \
+ } \
+ while (0)
+
+int
+do_test (void)
+{
+ struct support_descriptors *fds;
+ char *temp_dir = support_create_temp_directory ("tst-freopen3");
+ char *file1 = xasprintf ("%s/file1", temp_dir);
+ support_write_file_string (file1, "file1");
+ add_temp_file (file1);
+ char *file2 = xasprintf ("%s/file2", temp_dir);
+ support_write_file_string (file2, "file2");
+ add_temp_file (file2);
+ char *file_nodir = xasprintf ("%s/nodir/file", temp_dir);
+ FILE *fp;
+ int ret;
+ int fd;
+
+ START_TEST ("Testing w -> wx (file exists)\n");
+ fp = xfopen (file1, "w");
+ fp = FREOPEN (file2, "wx", fp);
+ TEST_VERIFY (fp == NULL);
+ END_TEST;
+
+ /* Test old file is closed even when opening the new file fails. */
+
+ START_TEST ("testing r -> r (opening new file fails)\n");
+ fp = xfopen (file1, "r");
+ fd = fileno (fp);
+ fp = FREOPEN (file_nodir, "r", fp);
+ TEST_VERIFY (fp == NULL);
+ errno = 0;
+ ret = fcntl (fd, F_GETFL);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (errno, EBADF);
+ END_TEST;
+
+ free (temp_dir);
+ free (file1);
+ free (file2);
+ free (file_nodir);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/stdio-common/tst-freopen3.c b/stdio-common/tst-freopen3.c
new file mode 100644
index 0000000000000000..5041b6b2332c8af1
--- /dev/null
+++ b/stdio-common/tst-freopen3.c
@@ -0,0 +1,2 @@
+#define FREOPEN freopen
+#include <tst-freopen3-main.c>
diff --git a/stdio-common/tst-freopen64-2.c b/stdio-common/tst-freopen64-2.c
new file mode 100644
index 0000000000000000..88fdc64d8c6548f5
--- /dev/null
+++ b/stdio-common/tst-freopen64-2.c
@@ -0,0 +1,3 @@
+#define FREOPEN freopen64
+#define OTHER_FREOPEN freopen
+#include <tst-freopen2-main.c>
diff --git a/stdio-common/tst-freopen64-3.c b/stdio-common/tst-freopen64-3.c
new file mode 100644
index 0000000000000000..b91b6d2c033a1a79
--- /dev/null
+++ b/stdio-common/tst-freopen64-3.c
@@ -0,0 +1,2 @@
+#define FREOPEN freopen64
+#include <tst-freopen3-main.c>

69
glibc-RHEL-115823-3.patch Normal file
View File

@ -0,0 +1,69 @@
commit f512634ddef242ef0ff025ddeba64ce51035040f
Author: Joseph Myers <josmyers@redhat.com>
Date: Thu Sep 5 11:15:29 2024 +0000
Clear flags2 flags set from mode in freopen (bug 32134)
As reported in bug 32134, freopen does not clear the flags set in
fp->_flags2 by the "e", "m" or "c" mode characters. Clear these so
that they can be set or not as appropriate from the mode string passed
to freopen. The relevant test for "e" in tst-freopen2-main.c is
enabled accordingly; "c" is expected to be covered in a separately
written test (and while tst-freopen2-main.c does include transitions
to and from "m", that's not really a semantic flag intended to result
in behaving in an observably different way).
Tested for x86_64.
diff --git a/libio/freopen.c b/libio/freopen.c
index c7e36db7758c8f3b..f6c943ddf82e399c 100644
--- a/libio/freopen.c
+++ b/libio/freopen.c
@@ -63,6 +63,9 @@ freopen (const char *filename, const char *mode, FILE *fp)
up here. */
_IO_old_file_close_it (fp);
_IO_JUMPS_FUNC_UPDATE (fp, &_IO_old_file_jumps);
+ fp->_flags2 &= ~(_IO_FLAGS2_MMAP
+ | _IO_FLAGS2_NOTCANCEL
+ | _IO_FLAGS2_CLOEXEC);
result = _IO_old_file_fopen (fp, gfilename, mode);
}
else
@@ -72,6 +75,9 @@ freopen (const char *filename, const char *mode, FILE *fp)
_IO_JUMPS_FILE_plus (fp) = &_IO_file_jumps;
if (_IO_vtable_offset (fp) == 0 && fp->_wide_data != NULL)
fp->_wide_data->_wide_vtable = &_IO_wfile_jumps;
+ fp->_flags2 &= ~(_IO_FLAGS2_MMAP
+ | _IO_FLAGS2_NOTCANCEL
+ | _IO_FLAGS2_CLOEXEC);
result = _IO_file_fopen (fp, gfilename, mode, 1);
if (result != NULL)
result = __fopen_maybe_mmap (result);
diff --git a/libio/freopen64.c b/libio/freopen64.c
index 9a6d5ed8016b6ed6..0f3cb16331318425 100644
--- a/libio/freopen64.c
+++ b/libio/freopen64.c
@@ -56,6 +56,9 @@ freopen64 (const char *filename, const char *mode, FILE *fp)
_IO_JUMPS_FILE_plus (fp) = &_IO_file_jumps;
if (_IO_vtable_offset (fp) == 0 && fp->_wide_data != NULL)
fp->_wide_data->_wide_vtable = &_IO_wfile_jumps;
+ fp->_flags2 &= ~(_IO_FLAGS2_MMAP
+ | _IO_FLAGS2_NOTCANCEL
+ | _IO_FLAGS2_CLOEXEC);
result = _IO_file_fopen (fp, gfilename, mode, 0);
fp->_flags2 &= ~_IO_FLAGS2_NOCLOSE;
if (result != NULL)
diff --git a/stdio-common/tst-freopen2-main.c b/stdio-common/tst-freopen2-main.c
index 22b21afebf709563..5dad41c76b02e6de 100644
--- a/stdio-common/tst-freopen2-main.c
+++ b/stdio-common/tst-freopen2-main.c
@@ -308,9 +308,7 @@ do_test (void)
TEST_VERIFY_EXIT (fp != NULL);
ret = fcntl (fileno (fp), F_GETFD);
TEST_VERIFY (ret != -1);
-#if 0 /* Fails to clear FD_CLOEXEC (bug 32134). */
TEST_COMPARE (ret & FD_CLOEXEC, 0);
-#endif
TEST_COMPARE_FILE_STRING (fp, "plustomore");
xfclose (fp);
END_TEST;

119
glibc-RHEL-115823-4.patch Normal file
View File

@ -0,0 +1,119 @@
commit 9c0d6f7a1046aba111e25e34ec07242853e859dc
Author: Joseph Myers <josmyers@redhat.com>
Date: Thu Sep 5 11:16:59 2024 +0000
Fix memory leak on freopen error return (bug 32140)
As reported in bug 32140, freopen leaks the FILE object when it
returns NULL: there is no valid use of the FILE * pointer (including
passing to freopen again or to fclose) after such an error return, so
the underlying object should be freed. Add code to free it.
Note 1: while I think it's clear from the relevant standards that the
object should be freed and the FILE * can't be used after the call in
this case (the stream is closed, which ends the lifetime of the FILE),
it's entirely possible that some existing code does in fact try to use
the existing FILE * in some way and could be broken by this change.
(Though the most common case for freopen may be stdin / stdout /
stderr, which _IO_deallocate_file explicitly checks for and does not
deallocate.)
Note 2: the deallocation is only done in the _IO_IS_FILEBUF case.
Other kinds of streams bypass all the freopen logic handling closing
the file, meaning a call to _IO_deallocate_file would neither be safe
(the FILE might still be linked into the list of all open FILEs) nor
sufficient (other internal memory allocations associated with the file
would not have been freed). I think the validity of freopen for any
other kind of stream will need clarifying with the Austin Group, but
if it is valid in any such case (where "valid" means "not undefined
behavior so required to close the stream" rather than "required to
successfully associate the stream with the new file in cases where
fopen would work"), more significant changes would be needed to ensure
the stream gets fully closed.
Tested for x86_64.
diff --git a/libio/freopen.c b/libio/freopen.c
index f6c943ddf82e399c..ceeff8f2acb6333f 100644
--- a/libio/freopen.c
+++ b/libio/freopen.c
@@ -114,5 +114,7 @@ freopen (const char *filename, const char *mode, FILE *fp)
end:
_IO_release_lock (fp);
+ if (result == NULL && (fp->_flags & _IO_IS_FILEBUF) != 0)
+ _IO_deallocate_file (fp);
return result;
}
diff --git a/libio/freopen64.c b/libio/freopen64.c
index 0f3cb16331318425..3a314aca5ce808ca 100644
--- a/libio/freopen64.c
+++ b/libio/freopen64.c
@@ -94,5 +94,7 @@ freopen64 (const char *filename, const char *mode, FILE *fp)
end:
_IO_release_lock (fp);
+ if (result == NULL && (fp->_flags & _IO_IS_FILEBUF) != 0)
+ _IO_deallocate_file (fp);
return result;
}
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index f949ec37482d8859..ea6212b4eacd07d1 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -336,7 +336,9 @@ ifeq (yes,$(build-shared))
ifneq ($(PERL),no)
tests-special += \
$(objpfx)tst-freopen2-mem.out \
+ $(objpfx)tst-freopen3-mem.out \
$(objpfx)tst-freopen64-2-mem.out \
+ $(objpfx)tst-freopen64-3-mem.out \
$(objpfx)tst-getline-enomem-mem.out \
$(objpfx)tst-getline-mem.out \
$(objpfx)tst-printf-bz18872-mem.out \
@@ -350,8 +352,12 @@ tests-special += \
generated += \
tst-freopen2-mem.out \
tst-freopen2.mtrace \
+ tst-freopen3-mem.out \
+ tst-freopen3.mtrace \
tst-freopen64-2-mem.out \
tst-freopen64-2.mtrace \
+ tst-freopen64-3-mem.out \
+ tst-freopen64-3.mtrace \
tst-getline-enomem-mem.out \
tst-getline-enomem.mtrace \
tst-getline-mem.out \
@@ -477,6 +483,12 @@ tst-freopen2-ENV = \
tst-freopen64-2-ENV = \
MALLOC_TRACE=$(objpfx)tst-freopen64-2.mtrace \
LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
+tst-freopen3-ENV = \
+ MALLOC_TRACE=$(objpfx)tst-freopen3.mtrace \
+ LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
+tst-freopen64-3-ENV = \
+ MALLOC_TRACE=$(objpfx)tst-freopen64-3.mtrace \
+ LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
$(objpfx)tst-unbputc.out: tst-unbputc.sh $(objpfx)tst-unbputc
$(SHELL) $< $(common-objpfx) '$(test-program-prefix)'; \
diff --git a/stdio-common/tst-freopen3-main.c b/stdio-common/tst-freopen3-main.c
index 5107e1f98e189e4b..990a6e5921843793 100644
--- a/stdio-common/tst-freopen3-main.c
+++ b/stdio-common/tst-freopen3-main.c
@@ -18,6 +18,7 @@
#include <errno.h>
#include <fcntl.h>
+#include <mcheck.h>
#include <stdio.h>
#include <stdlib.h>
@@ -48,6 +49,7 @@
int
do_test (void)
{
+ mtrace ();
struct support_descriptors *fds;
char *temp_dir = support_create_temp_directory ("tst-freopen3");
char *file1 = xasprintf ("%s/file1", temp_dir);

86
glibc-RHEL-115823-5.patch Normal file
View File

@ -0,0 +1,86 @@
commit e44ca1c085b3bd41266c882ea1cb0fd436231635
Author: Joseph Myers <josmyers@redhat.com>
Date: Thu Sep 5 20:08:10 2024 +0000
Fix freopen handling of ,ccs= (bug 23675)
As reported in bug 23675 and shown up in the recently added tests of
different cases of freopen (relevant part of the test currently
conditioned under #if 0 to avoid a failure resulting from this bug),
freopen wrongly forces the stream to unoriented even when a mode with
,ccs= is specified, though such a mode is supposed to result in a
wide-oriented stream. Move the clearing of _mode to before the actual
reopening occurs, so that the main fopen implementation can leave a
wide-oriented stream in the ,ccs= case.
Tested for x86_64.
diff --git a/libio/freopen.c b/libio/freopen.c
index ceeff8f2acb6333f..00f26ad578afc1b1 100644
--- a/libio/freopen.c
+++ b/libio/freopen.c
@@ -66,6 +66,7 @@ freopen (const char *filename, const char *mode, FILE *fp)
fp->_flags2 &= ~(_IO_FLAGS2_MMAP
| _IO_FLAGS2_NOTCANCEL
| _IO_FLAGS2_CLOEXEC);
+ fp->_mode = 0;
result = _IO_old_file_fopen (fp, gfilename, mode);
}
else
@@ -78,6 +79,7 @@ freopen (const char *filename, const char *mode, FILE *fp)
fp->_flags2 &= ~(_IO_FLAGS2_MMAP
| _IO_FLAGS2_NOTCANCEL
| _IO_FLAGS2_CLOEXEC);
+ fp->_mode = 0;
result = _IO_file_fopen (fp, gfilename, mode, 1);
if (result != NULL)
result = __fopen_maybe_mmap (result);
@@ -85,9 +87,6 @@ freopen (const char *filename, const char *mode, FILE *fp)
fp->_flags2 &= ~_IO_FLAGS2_NOCLOSE;
if (result != NULL)
{
- /* unbound stream orientation */
- result->_mode = 0;
-
if (fd != -1 && _IO_fileno (result) != fd)
{
/* At this point we have both file descriptors already allocated,
diff --git a/libio/freopen64.c b/libio/freopen64.c
index 3a314aca5ce808ca..77c0dd3fdf3c3494 100644
--- a/libio/freopen64.c
+++ b/libio/freopen64.c
@@ -59,15 +59,13 @@ freopen64 (const char *filename, const char *mode, FILE *fp)
fp->_flags2 &= ~(_IO_FLAGS2_MMAP
| _IO_FLAGS2_NOTCANCEL
| _IO_FLAGS2_CLOEXEC);
+ fp->_mode = 0;
result = _IO_file_fopen (fp, gfilename, mode, 0);
fp->_flags2 &= ~_IO_FLAGS2_NOCLOSE;
if (result != NULL)
result = __fopen_maybe_mmap (result);
if (result != NULL)
{
- /* unbound stream orientation */
- result->_mode = 0;
-
if (fd != -1 && _IO_fileno (result) != fd)
{
/* At this point we have both file descriptors already allocated,
diff --git a/stdio-common/tst-freopen2-main.c b/stdio-common/tst-freopen2-main.c
index 5dad41c76b02e6de..74c3125fca697fe3 100644
--- a/stdio-common/tst-freopen2-main.c
+++ b/stdio-common/tst-freopen2-main.c
@@ -386,13 +386,8 @@ do_test (void)
fp = xfopen (file2, "w,ccs=iso-8859-1");
ret = fputws (L"\xc0\xc1", fp);
TEST_VERIFY (ret >= 0);
-#if 0 /* Doesn't work (bug 23675). */
fp = FREOPEN (file1, "r,ccs=utf-8", fp);
TEST_VERIFY_EXIT (fp != NULL);
-#else /* Works instead. */
- xfclose (fp);
- fp = xfopen (file1, "r,ccs=utf-8");
-#endif
wc = fgetwc (fp);
TEST_COMPARE (wc, (wint_t) 0xc0);
wc = fgetwc (fp);

33
glibc-RHEL-115823-6.patch Normal file
View File

@ -0,0 +1,33 @@
commit a2509a8bc955988f01f389a1cf74db3a9da42409
Author: Joseph Myers <josmyers@redhat.com>
Date: Fri Sep 6 20:38:23 2024 +0000
Document limitations on streams passed to freopen
As recently discussed, document that freopen does not work with
streams opened with functions such as popen, fmemopen, open_memstream
or fopencookie. I've filed
<https://austingroupbugs.net/view.php?id=1855> to clarify this issue
in POSIX.
Tested with "make info" and "make html".
diff --git a/manual/stdio.texi b/manual/stdio.texi
index de174eafcd5fde0c..7d787735079ea964 100644
--- a/manual/stdio.texi
+++ b/manual/stdio.texi
@@ -330,6 +330,14 @@ this ability, so using @code{freopen} is more portable.
When the sources are compiled with @code{_FILE_OFFSET_BITS == 64} on a
32 bit machine this function is in fact @code{freopen64} since the LFS
interface replaces transparently the old interface.
+
+@Theglibc{} only supports use of @code{freopen} on streams opened with
+@code{fopen} or @code{fopen64} and on the original values of the
+standard streams @code{stdin}, @code{stdout}, and @code{stderr}; such
+a stream may be reopened multiple times with @code{freopen}. If it is
+called on another kind of stream (opened with functions such as
+@code{popen}, @code{fmemopen}, @code{open_memstream}, and
+@code{fopencookie}), @code{freopen} fails and returns a null pointer.
@end deftypefun
@deftypefun {FILE *} freopen64 (const char *@var{filename}, const char *@var{opentype}, FILE *@var{stream})

495
glibc-RHEL-115823-7.patch Normal file
View File

@ -0,0 +1,495 @@
commit e0f3bf10acf4aab27752847828bfecd3fce41190
Author: Joseph Myers <josmyers@redhat.com>
Date: Fri Sep 20 23:26:31 2024 +0000
Add freopen special-case tests: chroot, EFBIG, stdin/stdout/stderr
Add tests of special cases for freopen that were omitted from the more
general tests of different modes and similar issues. The special
cases in the three tests here are logically unconnected, it was simply
convenient to put these tests in one patch.
* Test freopen with a NULL path to the new file, in a chroot. Rather
than asserting that this fails (logically, failure in this case is
an implementation detail; it's not required for freopen to rely on
/proc), verify that either it fails (without memory leaks) or that
it succeeds and behaves as expected on success. There is no check
for file descriptor leaks because the machinery for that also
depends on /proc, so can't be used in a chroot.
* Test that freopen and freopen64 are genuinely different in
configurations with 32-bit off_t by checking for an EFBIG trying to
write past 2GB in a file opened with freopen in such a configuration
but no error with 64-bit off_t or when opening with freopen64.
* Test freopen of stdin, stdout and stderr.
Tested for x86_64 and x86.
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index ea6212b4eacd07d1..13ad32289e6a9f50 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -226,8 +226,13 @@ tests := \
tst-fread \
tst-freopen2 \
tst-freopen3 \
+ tst-freopen4 \
+ tst-freopen5 \
+ tst-freopen6 \
tst-freopen64-2 \
tst-freopen64-3 \
+ tst-freopen64-4 \
+ tst-freopen64-6 \
tst-fseek \
tst-fseek-mmap \
tst-fwrite \
@@ -337,8 +342,13 @@ ifneq ($(PERL),no)
tests-special += \
$(objpfx)tst-freopen2-mem.out \
$(objpfx)tst-freopen3-mem.out \
+ $(objpfx)tst-freopen4-mem.out \
+ $(objpfx)tst-freopen5-mem.out \
+ $(objpfx)tst-freopen6-mem.out \
$(objpfx)tst-freopen64-2-mem.out \
$(objpfx)tst-freopen64-3-mem.out \
+ $(objpfx)tst-freopen64-4-mem.out \
+ $(objpfx)tst-freopen64-6-mem.out \
$(objpfx)tst-getline-enomem-mem.out \
$(objpfx)tst-getline-mem.out \
$(objpfx)tst-printf-bz18872-mem.out \
@@ -354,10 +364,20 @@ generated += \
tst-freopen2.mtrace \
tst-freopen3-mem.out \
tst-freopen3.mtrace \
+ tst-freopen4-mem.out \
+ tst-freopen4.mtrace \
+ tst-freopen5-mem.out \
+ tst-freopen5.mtrace \
+ tst-freopen6-mem.out \
+ tst-freopen6.mtrace \
tst-freopen64-2-mem.out \
tst-freopen64-2.mtrace \
tst-freopen64-3-mem.out \
tst-freopen64-3.mtrace \
+ tst-freopen64-4-mem.out \
+ tst-freopen64-4.mtrace \
+ tst-freopen64-6-mem.out \
+ tst-freopen64-6.mtrace \
tst-getline-enomem-mem.out \
tst-getline-enomem.mtrace \
tst-getline-mem.out \
@@ -489,6 +509,21 @@ tst-freopen3-ENV = \
tst-freopen64-3-ENV = \
MALLOC_TRACE=$(objpfx)tst-freopen64-3.mtrace \
LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
+tst-freopen4-ENV = \
+ MALLOC_TRACE=$(objpfx)tst-freopen4.mtrace \
+ LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
+tst-freopen64-4-ENV = \
+ MALLOC_TRACE=$(objpfx)tst-freopen64-4.mtrace \
+ LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
+tst-freopen5-ENV = \
+ MALLOC_TRACE=$(objpfx)tst-freopen5.mtrace \
+ LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
+tst-freopen6-ENV = \
+ MALLOC_TRACE=$(objpfx)tst-freopen6.mtrace \
+ LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
+tst-freopen64-6-ENV = \
+ MALLOC_TRACE=$(objpfx)tst-freopen64-6.mtrace \
+ LD_PRELOAD=$(common-objpfx)malloc/libc_malloc_debug.so
$(objpfx)tst-unbputc.out: tst-unbputc.sh $(objpfx)tst-unbputc
$(SHELL) $< $(common-objpfx) '$(test-program-prefix)'; \
diff --git a/stdio-common/tst-freopen4-main.c b/stdio-common/tst-freopen4-main.c
new file mode 100644
index 0000000000000000..e169442cf4df2e9d
--- /dev/null
+++ b/stdio-common/tst-freopen4-main.c
@@ -0,0 +1,100 @@
+/* Test freopen in chroot.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <mcheck.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <support/check.h>
+#include <support/file_contents.h>
+#include <support/namespace.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/test-driver.h>
+#include <support/xstdio.h>
+#include <support/xunistd.h>
+
+int
+do_test (void)
+{
+ mtrace ();
+ char *temp_dir = support_create_temp_directory ("tst-freopen4");
+ FILE *fp;
+ int ret;
+
+ /* These chroot tests verify that either reopening a renamed or
+ deleted file works even in the absence of /proc, or that it fails
+ (without memory leaks); thus, for example, such reopening does
+ not crash in the absence of /proc. */
+
+ support_become_root ();
+ if (!support_can_chroot ())
+ return EXIT_UNSUPPORTED;
+ xchroot (temp_dir);
+
+ /* Test freopen with NULL, renamed file. This verifies that
+ reopening succeeds (and resets the file position indicator to
+ start of file) even when the original path could no longer be
+ opened, or fails without a memory leak. (It is not possible to
+ use <support/descriptors.h> to test for file descriptor leaks
+ here, because that also depends on /proc.) */
+
+ verbose_printf ("testing freopen with NULL, renamed file\n");
+ fp = xfopen ("/file1", "w+");
+ ret = fputs ("file has been renamed", fp);
+ TEST_VERIFY (ret >= 0);
+ ret = rename ("/file1", "/file1a");
+ TEST_COMPARE (ret, 0);
+ fp = FREOPEN (NULL, "r+", fp);
+ if (fp != NULL)
+ {
+ puts ("freopen of renamed file succeeded");
+ TEST_COMPARE_FILE_STRING (fp, "file has been renamed");
+ xfclose (fp);
+ }
+ else
+ puts ("freopen of renamed file failed (OK)");
+ ret = rename ("/file1a", "/file1");
+ TEST_COMPARE (ret, 0);
+
+ /* Test freopen with NULL, deleted file. This verifies that
+ reopening succeeds (and resets the file position indicator to
+ start of file) even when the original path could no longer be
+ opened, or fails without a memory leak. */
+
+ verbose_printf ("testing freopen with NULL, deleted file\n");
+ fp = xfopen ("/file1", "r+");
+ ret = fputs ("file has now been deleted", fp);
+ TEST_VERIFY (ret >= 0);
+ ret = remove ("/file1");
+ TEST_COMPARE (ret, 0);
+ fp = FREOPEN (NULL, "r+", fp);
+ if (fp != NULL)
+ {
+ puts ("freopen of deleted file succeeded");
+ TEST_COMPARE_FILE_STRING (fp, "file has now been deleted");
+ xfclose (fp);
+ }
+ else
+ puts ("freopen of deleted file failed (OK)");
+
+ free (temp_dir);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/stdio-common/tst-freopen4.c b/stdio-common/tst-freopen4.c
new file mode 100644
index 0000000000000000..f39ec0d21730879f
--- /dev/null
+++ b/stdio-common/tst-freopen4.c
@@ -0,0 +1,2 @@
+#define FREOPEN freopen
+#include <tst-freopen4-main.c>
diff --git a/stdio-common/tst-freopen5.c b/stdio-common/tst-freopen5.c
new file mode 100644
index 0000000000000000..f32626bccfe5c10a
--- /dev/null
+++ b/stdio-common/tst-freopen5.c
@@ -0,0 +1,144 @@
+/* Test freopen and freopen64 with large offsets.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <mcheck.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <support/check.h>
+#include <support/descriptors.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/test-driver.h>
+#include <support/xstdio.h>
+
+#define START_TEST(DESC) \
+ do \
+ { \
+ fds = support_descriptors_list (); \
+ verbose_printf (DESC); \
+ } \
+ while (0)
+
+#define END_TEST \
+ do \
+ { \
+ support_descriptors_check (fds); \
+ support_descriptors_free (fds); \
+ } \
+ while (0)
+
+int
+do_test (void)
+{
+ mtrace ();
+ struct support_descriptors *fds;
+ FILE *fp;
+ int ret;
+
+ char *temp_dir = support_create_temp_directory ("tst-freopen5");
+ /* This file is removed at the end of each test rather than left
+ around between tests to avoid problems with subsequent tests
+ reopening it as a large (2GB + 1 byte) file. */
+ char *file1 = xasprintf ("%s/file1", temp_dir);
+
+ /* fopen with freopen64: large offsets OK. */
+ START_TEST ("testing fopen with freopen64\n");
+ fp = fopen ("/dev/null", "r");
+ TEST_VERIFY_EXIT (fp != NULL);
+ fp = freopen64 (file1, "w", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ setbuf (fp, NULL);
+ ret = fseeko64 (fp, 1LL << 32, SEEK_SET);
+ TEST_COMPARE (ret, 0);
+ ret = fputc ('x', fp);
+ TEST_COMPARE (ret, 'x');
+ xfclose (fp);
+ ret = remove (file1);
+ TEST_COMPARE (ret, 0);
+ END_TEST;
+
+ /* fopen64 with freopen64: large offsets OK. */
+ START_TEST ("testing fopen64 with freopen64\n");
+ fp = fopen64 ("/dev/null", "r");
+ TEST_VERIFY_EXIT (fp != NULL);
+ fp = freopen64 (file1, "w", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ setbuf (fp, NULL);
+ ret = fseeko64 (fp, 1LL << 32, SEEK_SET);
+ TEST_COMPARE (ret, 0);
+ ret = fputc ('x', fp);
+ TEST_COMPARE (ret, 'x');
+ xfclose (fp);
+ ret = remove (file1);
+ TEST_COMPARE (ret, 0);
+ END_TEST;
+
+ /* fopen with freopen: large offsets not OK on 32-bit systems. */
+ START_TEST ("testing fopen with freopen\n");
+ fp = fopen ("/dev/null", "r");
+ TEST_VERIFY_EXIT (fp != NULL);
+ fp = freopen (file1, "w", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ setbuf (fp, NULL);
+ ret = fseeko64 (fp, 1LL << 32, SEEK_SET);
+ TEST_COMPARE (ret, 0);
+ errno = 0;
+ ret = fputc ('x', fp);
+ if (sizeof (off_t) == 4)
+ {
+ TEST_COMPARE (ret, EOF);
+ TEST_COMPARE (errno, EFBIG);
+ }
+ else
+ TEST_COMPARE (ret, 'x');
+ fclose (fp);
+ ret = remove (file1);
+ TEST_COMPARE (ret, 0);
+ END_TEST;
+
+ /* fopen64 with freopen: large offsets not OK on 32-bit systems. */
+ START_TEST ("testing fopen64 with freopen\n");
+ fp = fopen64 ("/dev/null", "r");
+ TEST_VERIFY_EXIT (fp != NULL);
+ fp = freopen (file1, "w", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ setbuf (fp, NULL);
+ ret = fseeko64 (fp, 1LL << 32, SEEK_SET);
+ TEST_COMPARE (ret, 0);
+ errno = 0;
+ ret = fputc ('x', fp);
+ if (sizeof (off_t) == 4)
+ {
+ TEST_COMPARE (ret, EOF);
+ TEST_COMPARE (errno, EFBIG);
+ }
+ else
+ TEST_COMPARE (ret, 'x');
+ fclose (fp);
+ ret = remove (file1);
+ TEST_COMPARE (ret, 0);
+ END_TEST;
+
+ free (temp_dir);
+ free (file1);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/stdio-common/tst-freopen6-main.c b/stdio-common/tst-freopen6-main.c
new file mode 100644
index 0000000000000000..f493f42fd7486b72
--- /dev/null
+++ b/stdio-common/tst-freopen6-main.c
@@ -0,0 +1,98 @@
+/* Test freopen of stdin / stdout / stderr.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <mcheck.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <support/check.h>
+#include <support/file_contents.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/test-driver.h>
+#include <support/xstdio.h>
+
+int
+do_test (void)
+{
+ mtrace ();
+ char *temp_dir = support_create_temp_directory ("tst-freopen6");
+ char *file1 = xasprintf ("%s/file1", temp_dir);
+ support_write_file_string (file1, "file1");
+ add_temp_file (file1);
+ FILE *fp;
+ int ret;
+
+ verbose_printf ("Testing reopening stdin\n");
+ fp = FREOPEN (file1, "r", stdin);
+ TEST_VERIFY_EXIT (fp == stdin);
+ ret = getchar ();
+ TEST_COMPARE (ret, 'f');
+ ret = getchar ();
+ TEST_COMPARE (ret, 'i');
+ ret = getchar ();
+ TEST_COMPARE (ret, 'l');
+ ret = getchar ();
+ TEST_COMPARE (ret, 'e');
+ ret = getchar ();
+ TEST_COMPARE (ret, '1');
+ ret = getchar ();
+ TEST_COMPARE (ret, EOF);
+ xfclose (fp);
+
+ verbose_printf ("Testing reopening stderr\n");
+ fp = FREOPEN (file1, "w+", stderr);
+ TEST_VERIFY_EXIT (fp == stderr);
+ errno = EINVAL;
+ perror ("test");
+ ret = fseek (fp, 0, SEEK_SET);
+ TEST_COMPARE (ret, 0);
+ TEST_COMPARE_FILE_STRING (fp, "test: Invalid argument\n");
+ xfclose (fp);
+
+ verbose_printf ("Testing reopening stdout\n");
+ /* Defer checks until the old stdout has been restored to make it
+ more likely any errors are written to the old stdout (rather than
+ the temporary file used for the redirected stdout). */
+ int old_stdout = dup (STDOUT_FILENO);
+ TEST_VERIFY_EXIT (old_stdout != -1);
+ int ret_fseek = 0;
+ int ret_compare = 0;
+ fp = FREOPEN (file1, "w+", stdout);
+ int fp_eq_stdout = fp == stdout;
+ if (fp != NULL)
+ {
+ printf ("reopened\n");
+ ret_fseek = fseek (fp, 0, SEEK_SET);
+ ret_compare = support_compare_file_string (fp, "reopened\n");
+ }
+ xfclose (fp);
+ stdout = fdopen (old_stdout, "w");
+ TEST_VERIFY (fp_eq_stdout);
+ TEST_COMPARE (ret_fseek, 0);
+ TEST_COMPARE (ret_compare, 0);
+ xfclose (stdout);
+
+ free (temp_dir);
+ free (file1);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/stdio-common/tst-freopen6.c b/stdio-common/tst-freopen6.c
new file mode 100644
index 0000000000000000..8fd6957b54fa9bc2
--- /dev/null
+++ b/stdio-common/tst-freopen6.c
@@ -0,0 +1,2 @@
+#define FREOPEN freopen
+#include <tst-freopen6-main.c>
diff --git a/stdio-common/tst-freopen64-4.c b/stdio-common/tst-freopen64-4.c
new file mode 100644
index 0000000000000000..1411be2bfa0105c1
--- /dev/null
+++ b/stdio-common/tst-freopen64-4.c
@@ -0,0 +1,2 @@
+#define FREOPEN freopen64
+#include <tst-freopen4-main.c>
diff --git a/stdio-common/tst-freopen64-6.c b/stdio-common/tst-freopen64-6.c
new file mode 100644
index 0000000000000000..3ec509a36c2471f6
--- /dev/null
+++ b/stdio-common/tst-freopen64-6.c
@@ -0,0 +1,2 @@
+#define FREOPEN freopen64
+#include <tst-freopen6-main.c>

30
glibc-RHEL-115823-8.patch Normal file
View File

@ -0,0 +1,30 @@
commit 6948ee4edf0c57c556f8d5f394d9191216d05780
Author: Florian Weimer <fweimer@redhat.com>
Date: Sat Sep 28 21:06:11 2024 +0200
stdio-common: Fix memory leak in tst-freopen4* tests on UNSUPPORTED
The temp_dir allocation leaks if support_can_chroot returns false.
diff --git a/stdio-common/tst-freopen4-main.c b/stdio-common/tst-freopen4-main.c
index e169442cf4df2e9d..7284677a97e10af6 100644
--- a/stdio-common/tst-freopen4-main.c
+++ b/stdio-common/tst-freopen4-main.c
@@ -33,7 +33,7 @@ int
do_test (void)
{
mtrace ();
- char *temp_dir = support_create_temp_directory ("tst-freopen4");
+ char *temp_dir;
FILE *fp;
int ret;
@@ -45,6 +45,8 @@ do_test (void)
support_become_root ();
if (!support_can_chroot ())
return EXIT_UNSUPPORTED;
+
+ temp_dir = support_create_temp_directory ("tst-freopen4");
xchroot (temp_dir);
/* Test freopen with NULL, renamed file. This verifies that

216
glibc-RHEL-115823-9.patch Normal file
View File

@ -0,0 +1,216 @@
commit 42c810c2cf3554afbdd60885b7da6bb4e702466f
Author: Joseph Myers <josmyers@redhat.com>
Date: Mon Oct 7 19:44:25 2024 +0000
Add freopen special-case tests: thread cancellation
Add tests of freopen adding or removing "c" (non-cancelling I/O) from
the mode string (so completing my planned tests of freopen with
different features used in the mode strings). Note that it's in the
nature of the uncertain time at which cancellation might act (possibly
during freopen, possibly during subsequent reads) that these can leak
memory or file descriptors, so these do not include leak tests.
Tested for x86_64.
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 13ad32289e6a9f50..4c39b9d9fb3d029f 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -229,10 +229,12 @@ tests := \
tst-freopen4 \
tst-freopen5 \
tst-freopen6 \
+ tst-freopen7 \
tst-freopen64-2 \
tst-freopen64-3 \
tst-freopen64-4 \
tst-freopen64-6 \
+ tst-freopen64-7 \
tst-fseek \
tst-fseek-mmap \
tst-fwrite \
@@ -636,3 +638,6 @@ $(objpfx)tst-setvbuf2.out: $(objpfx)tst-setvbuf2-ind
$(objpfx)tst-printf-round: $(libm)
$(objpfx)tst-scanf-round: $(libm)
+
+$(objpfx)tst-freopen7: $(shared-thread-library)
+$(objpfx)tst-freopen64-7: $(shared-thread-library)
diff --git a/stdio-common/tst-freopen64-7.c b/stdio-common/tst-freopen64-7.c
new file mode 100644
index 0000000000000000..f34c2805210079b9
--- /dev/null
+++ b/stdio-common/tst-freopen64-7.c
@@ -0,0 +1,2 @@
+#define FREOPEN freopen64
+#include <tst-freopen7-main.c>
diff --git a/stdio-common/tst-freopen7-main.c b/stdio-common/tst-freopen7-main.c
new file mode 100644
index 0000000000000000..965e0b4adce750cc
--- /dev/null
+++ b/stdio-common/tst-freopen7-main.c
@@ -0,0 +1,155 @@
+/* Test freopen cancellation handling.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <mcheck.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
+
+#include <support/check.h>
+#include <support/file_contents.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/test-driver.h>
+#include <support/xstdio.h>
+#include <support/xthread.h>
+#include <support/xunistd.h>
+
+char *file1, *file2, *file3, *fifo;
+
+sem_t sem;
+
+void *
+test_rc_to_r (void *p)
+{
+ int ret;
+ FILE *fp, *fp2;
+ ret = sem_post (&sem);
+ TEST_VERIFY_EXIT (ret == 0);
+ fp = xfopen (file1, "rc");
+ for (int i = 0; i < 1000000; i++)
+ {
+ fgetc (fp);
+ fseek (fp, 0, SEEK_SET);
+ }
+ fp2 = xfopen (file3, "wc");
+ fputs ("rc_to_r got to freopen", fp2);
+ xfclose (fp2);
+ /* Cancellation should occur at some point from here onwards
+ (possibly leaking memory and file descriptors associated with the
+ FILE). */
+ fp = FREOPEN (file2, "r", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ for (;;)
+ {
+ fgetc (fp);
+ fseek (fp, 0, SEEK_SET);
+ }
+}
+
+void *
+test_r_to_rc (void *p)
+{
+ int ret;
+ FILE *fp;
+ fp = xfopen (file1, "r");
+ fp = FREOPEN (fifo, "rc", fp);
+ TEST_VERIFY_EXIT (fp != NULL);
+ ret = sem_post (&sem);
+ TEST_VERIFY_EXIT (ret == 0);
+ /* No cancellation should occur for I/O on fifo. */
+ ret = fgetc (fp);
+ /* At this point, the other thread has called pthread_cancel and
+ then written a byte to the fifo, so this thread is cancelled at
+ the next cancellation point. */
+ TEST_VERIFY (ret == 'x');
+ xfclose (fp);
+ fp = xfopen (file3, "wc");
+ fputs ("r_to_rc got to fclose", fp);
+ xfclose (fp);
+ pthread_testcancel ();
+ FAIL_EXIT1 ("test_r_to_rc not cancelled\n");
+}
+
+int
+do_test (void)
+{
+ char *temp_dir = support_create_temp_directory ("tst-freopen-cancel");
+ file1 = xasprintf ("%s/file1", temp_dir);
+ support_write_file_string (file1, "file1");
+ add_temp_file (file1);
+ file2 = xasprintf ("%s/file2", temp_dir);
+ support_write_file_string (file2, "file2");
+ add_temp_file (file2);
+ file3 = xasprintf ("%s/file3", temp_dir);
+ support_write_file_string (file3, "file3");
+ add_temp_file (file3);
+ fifo = xasprintf ("%s/fifo", temp_dir);
+ xmkfifo (fifo, 0666);
+ add_temp_file (fifo);
+ int ret;
+ pthread_t thr;
+ void *retval;
+
+ /* Test changing to/from c (cancellation disabled). */
+
+ verbose_printf ("Testing rc -> r\n");
+ ret = sem_init (&sem, 0, 0);
+ TEST_VERIFY_EXIT (ret == 0);
+ thr = xpthread_create (NULL, test_rc_to_r, NULL);
+ ret = sem_wait (&sem);
+ TEST_VERIFY_EXIT (ret == 0);
+ xpthread_cancel (thr);
+ ret = pthread_join (thr, &retval);
+ TEST_COMPARE (ret, 0);
+ TEST_VERIFY (retval == PTHREAD_CANCELED);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file3, "rc_to_r got to freopen");
+
+ verbose_printf ("Testing r -> rc\n");
+ ret = sem_init (&sem, 0, 0);
+ TEST_VERIFY_EXIT (ret == 0);
+ thr = xpthread_create (NULL, test_r_to_rc, NULL);
+ FILE *fp = xfopen (fifo, "w");
+ ret = sem_wait (&sem);
+ TEST_VERIFY_EXIT (ret == 0);
+ /* This call happens while, or before, the other thread is waiting
+ to read a character from the fifo. It thus verifies that
+ cancellation does not occur from the fgetc call in that thread
+ (it should instead occur only in pthread_testcancel call),
+ because the expected string is only written to file3 after that
+ thread closes the fifo. */
+ xpthread_cancel (thr);
+ fputc ('x', fp);
+ xfclose (fp);
+ ret = pthread_join (thr, &retval);
+ TEST_COMPARE (ret, 0);
+ TEST_VERIFY (retval == PTHREAD_CANCELED);
+ TEST_OPEN_AND_COMPARE_FILE_STRING (file3, "r_to_rc got to fclose");
+
+ free (temp_dir);
+ free (file1);
+ free (file2);
+ free (file3);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/stdio-common/tst-freopen7.c b/stdio-common/tst-freopen7.c
new file mode 100644
index 0000000000000000..03d0de798e3d2616
--- /dev/null
+++ b/stdio-common/tst-freopen7.c
@@ -0,0 +1,2 @@
+#define FREOPEN freopen
+#include <tst-freopen7-main.c>

160
glibc-RHEL-115825.patch Normal file
View File

@ -0,0 +1,160 @@
commit b371ed272695919a332d30bd2754a82e5e683178
Author: Joseph Myers <josmyers@redhat.com>
Date: Mon Oct 21 20:56:48 2024 +0000
Check time arguments to pthread_timedjoin_np and pthread_clockjoin_np
The pthread_timedjoin_np and pthread_clockjoin_np functions do not
check that a valid time has been specified. The documentation for
these functions in the glibc manual isn't sufficiently detailed to say
if they should, but consistency with POSIX functions such as
pthread_mutex_timedlock and pthread_cond_timedwait strongly indicates
that an EINVAL error is appropriate (even if there might be some
ambiguity about exactly where such a check should go in relation to
other checks for whether the thread exists, whether it's immediately
joinable, etc.). Copy the logic for such a check used in
pthread_rwlock_common.c.
pthread_join_common had some logic calling valid_nanoseconds before
commit 9e92278ffad441daf588ff1ff5bd8094aa33fbfd, "nptl: Remove
clockwait_tid"; I haven't checked exactly what cases that detected.
Tested for x86_64 and x86.
diff --git a/nptl/pthread_join_common.c b/nptl/pthread_join_common.c
index 9c685c79cfd19415..273db80543103772 100644
--- a/nptl/pthread_join_common.c
+++ b/nptl/pthread_join_common.c
@@ -49,6 +49,12 @@ __pthread_clockjoin_ex (pthread_t threadid, void **thread_return,
/* We cannot wait for the thread. */
return EINVAL;
+ /* Make sure the clock and time specified are valid. */
+ if (abstime
+ && __glibc_unlikely (!futex_abstimed_supported_clockid (clockid)
+ || ! valid_nanoseconds (abstime->tv_nsec)))
+ return EINVAL;
+
struct pthread *self = THREAD_SELF;
int result = 0;
diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
index 82de1ae81b244dec..0d9e232acec2ed39 100644
--- a/sysdeps/pthread/Makefile
+++ b/sysdeps/pthread/Makefile
@@ -175,6 +175,7 @@ tests += \
tst-join13 \
tst-join14 \
tst-join15 \
+ tst-join16 \
tst-key1 \
tst-key2 \
tst-key3 \
@@ -293,6 +294,7 @@ tests-time64 += \
tst-cnd-timedwait-time64 \
tst-cond11-time64 \
tst-join14-time64 \
+ tst-join16-time64 \
tst-mtx-timedlock-time64 \
tst-rwlock14-time64 \
tst-sem5-time64 \
diff --git a/sysdeps/pthread/tst-join16-time64.c b/sysdeps/pthread/tst-join16-time64.c
new file mode 100644
index 0000000000000000..730cc5656308c30c
--- /dev/null
+++ b/sysdeps/pthread/tst-join16-time64.c
@@ -0,0 +1 @@
+#include "tst-join16.c"
diff --git a/sysdeps/pthread/tst-join16.c b/sysdeps/pthread/tst-join16.c
new file mode 100644
index 0000000000000000..8bf37b5e42fc46f6
--- /dev/null
+++ b/sysdeps/pthread/tst-join16.c
@@ -0,0 +1,87 @@
+/* Test pthread_timedjoin_np and pthread_clockjoin_np with an invalid timeout.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <time.h>
+#include <support/check.h>
+#include <support/xthread.h>
+#include <support/xtime.h>
+
+
+#define CLOCK_USE_TIMEDJOIN (-1)
+
+static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void *
+tf (void *arg)
+{
+ xpthread_mutex_lock (&lock);
+ xpthread_mutex_unlock (&lock);
+ return (void *) 42l;
+}
+
+static int
+do_test_clock (clockid_t clockid)
+{
+ const clockid_t clockid_for_get =
+ (clockid == CLOCK_USE_TIMEDJOIN) ? CLOCK_REALTIME : clockid;
+
+ xpthread_mutex_lock (&lock);
+ pthread_t th = xpthread_create (NULL, tf, NULL);
+
+ void *status;
+ int ret;
+ struct timespec timeout = xclock_now (clockid_for_get);
+ timeout.tv_sec += 2;
+ timeout.tv_nsec = -1;
+ if (clockid == CLOCK_USE_TIMEDJOIN)
+ ret = pthread_timedjoin_np (th, &status, &timeout);
+ else
+ ret = pthread_clockjoin_np (th, &status, clockid, &timeout);
+ TEST_COMPARE (ret, EINVAL);
+ timeout.tv_nsec = 1000000000;
+ if (clockid == CLOCK_USE_TIMEDJOIN)
+ ret = pthread_timedjoin_np (th, &status, &timeout);
+ else
+ ret = pthread_clockjoin_np (th, &status, clockid, &timeout);
+ TEST_COMPARE (ret, EINVAL);
+ xpthread_mutex_unlock (&lock);
+ timeout.tv_nsec = 0;
+ ret = pthread_join (th, &status);
+ TEST_COMPARE (ret, 0);
+ if (status != (void *) 42l)
+ FAIL_EXIT1 ("return value %p, expected %p\n", status, (void *) 42l);
+
+ return 0;
+}
+
+static int
+do_test (void)
+{
+ puts ("testing pthread_timedjoin_np");
+ do_test_clock (CLOCK_USE_TIMEDJOIN);
+ puts ("testing CLOCK_REALTIME");
+ do_test_clock (CLOCK_REALTIME);
+ puts ("testing CLOCK_MONOTONIC");
+ do_test_clock (CLOCK_MONOTONIC);
+ return 0;
+}
+
+#include <support/test-driver.c>

85
glibc-RHEL-117418.patch Normal file
View File

@ -0,0 +1,85 @@
commit afce5fccdf680113cdb6fc44d1b4ca7daea42c25
Author: Florian Weimer <fweimer@redhat.com>
Date: Thu Sep 25 08:37:13 2025 +0200
manual: Improve documentation of the shutdown function
Document the SHUT_* constants and attempt to explain the
implications for Linux TCP and UNIX domain sockets.
The Linux TCP behavior was discovered when writing the
socket/tst-shutdown test.
Suggested by Sergey Organov in
<https://inbox.sourceware.org/libc-help/qblfrh$4m4i$1@blaine.gmane.org/>.
Reviewed-by: Collin Funk <collin.funk1@gmail.com>
diff --git a/manual/socket.texi b/manual/socket.texi
index d804c7a48b..56948073d5 100644
--- a/manual/socket.texi
+++ b/manual/socket.texi
@@ -2317,22 +2317,23 @@ The @code{shutdown} function shuts down the connection of socket
@var{socket}. The argument @var{how} specifies what action to
perform:
-@table @code
-@item 0
-Stop receiving data for this socket. If further data arrives,
-reject it.
+@vtable @code
+@item SHUT_RD
+Stop receiving data on the socket.
-@item 1
-Stop trying to transmit data from this socket. Discard any data
-waiting to be sent. Stop looking for acknowledgement of data already
-sent; don't retransmit it if it is lost.
+@item SHUT_WR
+Indicate to the peer that no further data will be transmitted on the
+socket. This indication is ordered with regard to past send
+operations on the socket, and data pending at the time of the call is
+still delivered.
-@item 2
-Stop both reception and transmission.
-@end table
+@item SHUT_RDWR
+Combine the actions of @code{SHUT_RD} and @code{SHUT_WR}.
+@end vtable
The return value is @code{0} on success and @code{-1} on failure. The
-following @code{errno} error conditions are defined for this function:
+following generic @code{errno} error conditions are defined for this
+function:
@table @code
@item EBADF
@@ -2346,6 +2347,27 @@ following @code{errno} error conditions are defined for this function:
@end table
@end deftypefun
+Additional errors can be reported for specific socket types.
+
+The exact impact of the @code{shutdown} function depends on the socket
+protocol and its implementation. In portable code, the @code{shutdown}
+function cannot be used on its own to gracefully terminate a connection
+which is operated in full-duplex mode (with both peers sending data).
+
+On Linux, when @code{SHUT_RD} is used to shut down a TCP socket, any
+pending data in the incoming socket buffer and any data that arrives
+subsequently is discarded, without reporting an error or generating a
+TCP RST segment. Attempts to read data from this socket using
+@code{recv} and similar functions (@pxref{Receiving Data}) return zero.
+(Other systems may treat @code{SHUT_RD} with pending data as a data loss
+event and generate RST segments. Linux @code{AF_LOCAL}/@code{AF_UNIX}
+sockets also report errors to peers.)
+
+Similarly, when @code{SHUT_WR} is used on a Linux TCP socket, a FIN
+segment is sent to the peer, ordered after any data written previously
+to the socket. After encountering the FIN segment, the peer will
+recognize this as an end-of-stream condition.
+
@node Socket Pairs
@subsection Socket Pairs
@cindex creating a socket pair

4742
glibc-RHEL-118273-1.patch Normal file

File diff suppressed because it is too large Load Diff

514
glibc-RHEL-118273-10.patch Normal file
View File

@ -0,0 +1,514 @@
commit 157f89fa3d616729c8d7797168a9b3eaaa6ebf6e
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Tue Apr 30 13:49:58 2024 +0100
aarch64/fpu: Add vector variants of hypot
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index e8af35099d7b9f8f..06657782a1ee7106 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -13,6 +13,7 @@ libmvec-supported-funcs = acos \
exp10 \
exp2 \
expm1 \
+ hypot \
log \
log10 \
log1p \
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index 3cb1b82bd2785a4b..aedae9457b148983 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -109,6 +109,11 @@ libmvec {
_ZGVnN4v_erfcf;
_ZGVsMxv_erfc;
_ZGVsMxv_erfcf;
+ _ZGVnN4vv_hypotf;
+ _ZGVnN2vv_hypotf;
+ _ZGVnN2vv_hypot;
+ _ZGVsMxvv_hypotf;
+ _ZGVsMxvv_hypot;
_ZGVnN2v_sinh;
_ZGVnN2v_sinhf;
_ZGVnN4v_sinhf;
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index 383c4369729a3452..a8889a92fd041585 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -31,6 +31,7 @@ libmvec_hidden_proto (V_NAME_F1(exp10));
libmvec_hidden_proto (V_NAME_F1(exp2));
libmvec_hidden_proto (V_NAME_F1(exp));
libmvec_hidden_proto (V_NAME_F1(expm1));
+libmvec_hidden_proto (V_NAME_F2(hypot));
libmvec_hidden_proto (V_NAME_F1(log10));
libmvec_hidden_proto (V_NAME_F1(log1p));
libmvec_hidden_proto (V_NAME_F1(log2));
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index e29b2d1c09273969..ca3017733959702f 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -89,6 +89,10 @@
# define __DECL_SIMD_expm1 __DECL_SIMD_aarch64
# undef __DECL_SIMD_expm1f
# define __DECL_SIMD_expm1f __DECL_SIMD_aarch64
+# undef __DECL_SIMD_hypot
+# define __DECL_SIMD_hypot __DECL_SIMD_aarch64
+# undef __DECL_SIMD_hypotf
+# define __DECL_SIMD_hypotf __DECL_SIMD_aarch64
# undef __DECL_SIMD_log
# define __DECL_SIMD_log __DECL_SIMD_aarch64
# undef __DECL_SIMD_logf
@@ -162,6 +166,7 @@ __vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_exp10f (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_expm1f (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4vv_hypotf (__f32x4_t, __f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_log10f (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_log1pf (__f32x4_t);
@@ -186,6 +191,7 @@ __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_exp10 (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_exp2 (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_expm1 (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2vv_hypot (__f64x2_t, __f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_log10 (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t);
@@ -215,6 +221,7 @@ __sv_f32_t _ZGVsMxv_expf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_exp10f (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_exp2f (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_expm1f (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxvv_hypotf (__sv_f32_t, __sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_logf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_log10f (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_log1pf (__sv_f32_t, __sv_bool_t);
@@ -239,6 +246,7 @@ __sv_f64_t _ZGVsMxv_exp (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_exp10 (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_exp2 (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_expm1 (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxvv_hypot (__sv_f64_t, __sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_log (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_log10 (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_log1p (__sv_f64_t, __sv_bool_t);
diff --git a/sysdeps/aarch64/fpu/hypot_advsimd.c b/sysdeps/aarch64/fpu/hypot_advsimd.c
new file mode 100644
index 0000000000000000..e4e279fa0c362336
--- /dev/null
+++ b/sysdeps/aarch64/fpu/hypot_advsimd.c
@@ -0,0 +1,97 @@
+/* Double-precision vector (Advanced SIMD) hypot function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_math.h"
+
+#if WANT_SIMD_EXCEPT
+static const struct data
+{
+ uint64x2_t tiny_bound, thres;
+} data = {
+ .tiny_bound = V2 (0x2000000000000000), /* asuint (0x1p-511). */
+ .thres = V2 (0x3fe0000000000000), /* asuint (0x1p511) - tiny_bound. */
+};
+#else
+static const struct data
+{
+ uint64x2_t tiny_bound;
+ uint32x4_t thres;
+} data = {
+ .tiny_bound = V2 (0x0360000000000000), /* asuint (0x1p-969). */
+ .thres = V4 (0x7c900000), /* asuint (inf) - tiny_bound. */
+};
+#endif
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, float64x2_t sqsum,
+ uint32x2_t special)
+{
+ return v_call2_f64 (hypot, x, y, vsqrtq_f64 (sqsum), vmovl_u32 (special));
+}
+
+/* Vector implementation of double-precision hypot.
+ Maximum error observed is 1.21 ULP:
+ _ZGVnN2vv_hypot (0x1.6a1b193ff85b5p-204, 0x1.bc50676c2a447p-222)
+ got 0x1.6a1b19400964ep-204
+ want 0x1.6a1b19400964dp-204. */
+#if WANT_SIMD_EXCEPT
+
+float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t ax = vabsq_f64 (x);
+ float64x2_t ay = vabsq_f64 (y);
+
+ uint64x2_t ix = vreinterpretq_u64_f64 (ax);
+ uint64x2_t iy = vreinterpretq_u64_f64 (ay);
+
+ /* Extreme values, NaNs, and infinities should be handled by the scalar
+ fallback for correct flag handling. */
+ uint64x2_t specialx = vcgeq_u64 (vsubq_u64 (ix, d->tiny_bound), d->thres);
+ uint64x2_t specialy = vcgeq_u64 (vsubq_u64 (iy, d->tiny_bound), d->thres);
+ ax = v_zerofy_f64 (ax, specialx);
+ ay = v_zerofy_f64 (ay, specialy);
+ uint32x2_t special = vaddhn_u64 (specialx, specialy);
+
+ float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (ax, ax), ay, ay);
+
+ if (__glibc_unlikely (v_any_u32h (special)))
+ return special_case (x, y, sqsum, special);
+
+ return vsqrtq_f64 (sqsum);
+}
+#else
+
+float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (x, x), y, y);
+
+ uint32x2_t special = vcge_u32 (
+ vsubhn_u64 (vreinterpretq_u64_f64 (sqsum), d->tiny_bound),
+ vget_low_u32 (d->thres));
+
+ if (__glibc_unlikely (v_any_u32h (special)))
+ return special_case (x, y, sqsum, special);
+
+ return vsqrtq_f64 (sqsum);
+}
+#endif
diff --git a/sysdeps/aarch64/fpu/hypot_sve.c b/sysdeps/aarch64/fpu/hypot_sve.c
new file mode 100644
index 0000000000000000..74417040acb2f32f
--- /dev/null
+++ b/sysdeps/aarch64/fpu/hypot_sve.c
@@ -0,0 +1,54 @@
+/* Double-precision vector (SVE) hypot function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_math.h"
+
+static const struct data
+{
+ uint64_t tiny_bound, thres;
+} data = {
+ .tiny_bound = 0x0c80000000000000, /* asuint (0x1p-102). */
+ .thres = 0x7300000000000000, /* asuint (inf) - tiny_bound. */
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t sqsum, svfloat64_t x, svfloat64_t y, svbool_t pg,
+ svbool_t special)
+{
+ return sv_call2_f64 (hypot, x, y, svsqrt_x (pg, sqsum), special);
+}
+
+/* SVE implementation of double-precision hypot.
+ Maximum error observed is 1.21 ULP:
+ _ZGVsMxvv_hypot (-0x1.6a22d0412cdd3p+352, 0x1.d3d89bd66fb1ap+330)
+ got 0x1.6a22d0412cfp+352
+ want 0x1.6a22d0412cf01p+352. */
+svfloat64_t SV_NAME_D2 (hypot) (svfloat64_t x, svfloat64_t y, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat64_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y);
+
+ svbool_t special = svcmpge (
+ pg, svsub_x (pg, svreinterpret_u64 (sqsum), d->tiny_bound), d->thres);
+
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (sqsum, x, y, pg, special);
+ return svsqrt_x (pg, sqsum);
+}
diff --git a/sysdeps/aarch64/fpu/hypotf_advsimd.c b/sysdeps/aarch64/fpu/hypotf_advsimd.c
new file mode 100644
index 0000000000000000..34818b021abce1b7
--- /dev/null
+++ b/sysdeps/aarch64/fpu/hypotf_advsimd.c
@@ -0,0 +1,98 @@
+/* Single-precision vector (Advanced SIMD) hypot function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_math.h"
+
+#if WANT_SIMD_EXCEPT
+static const struct data
+{
+ uint32x4_t tiny_bound, thres;
+} data = {
+ .tiny_bound = V4 (0x20000000), /* asuint (0x1p-63). */
+ .thres = V4 (0x3f000000), /* asuint (0x1p63) - tiny_bound. */
+};
+#else
+static const struct data
+{
+ uint32x4_t tiny_bound;
+ uint16x8_t thres;
+} data = {
+ .tiny_bound = V4 (0x0C800000), /* asuint (0x1p-102). */
+ .thres = V8 (0x7300), /* asuint (inf) - tiny_bound. */
+};
+#endif
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, float32x4_t sqsum,
+ uint16x4_t special)
+{
+ return v_call2_f32 (hypotf, x, y, vsqrtq_f32 (sqsum), vmovl_u16 (special));
+}
+
+/* Vector implementation of single-precision hypot.
+ Maximum error observed is 1.21 ULP:
+ _ZGVnN4vv_hypotf (0x1.6a419cp-13, 0x1.82a852p-22) got 0x1.6a41d2p-13
+ want 0x1.6a41dp-13. */
+#if WANT_SIMD_EXCEPT
+
+float32x4_t VPCS_ATTR V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float32x4_t ax = vabsq_f32 (x);
+ float32x4_t ay = vabsq_f32 (y);
+
+ uint32x4_t ix = vreinterpretq_u32_f32 (ax);
+ uint32x4_t iy = vreinterpretq_u32_f32 (ay);
+
+ /* Extreme values, NaNs, and infinities should be handled by the scalar
+ fallback for correct flag handling. */
+ uint32x4_t specialx = vcgeq_u32 (vsubq_u32 (ix, d->tiny_bound), d->thres);
+ uint32x4_t specialy = vcgeq_u32 (vsubq_u32 (iy, d->tiny_bound), d->thres);
+ ax = v_zerofy_f32 (ax, specialx);
+ ay = v_zerofy_f32 (ay, specialy);
+ uint16x4_t special = vaddhn_u32 (specialx, specialy);
+
+ float32x4_t sqsum = vfmaq_f32 (vmulq_f32 (ax, ax), ay, ay);
+
+ if (__glibc_unlikely (v_any_u16h (special)))
+ return special_case (x, y, sqsum, special);
+
+ return vsqrtq_f32 (sqsum);
+}
+#else
+
+float32x4_t VPCS_ATTR V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float32x4_t sqsum = vfmaq_f32 (vmulq_f32 (x, x), y, y);
+
+ uint16x4_t special = vcge_u16 (
+ vsubhn_u32 (vreinterpretq_u32_f32 (sqsum), d->tiny_bound),
+ vget_low_u16 (d->thres));
+
+ if (__glibc_unlikely (v_any_u16h (special)))
+ return special_case (x, y, sqsum, special);
+
+ return vsqrtq_f32 (sqsum);
+}
+#endif
+libmvec_hidden_def (V_NAME_F2 (hypot))
+HALF_WIDTH_ALIAS_F2(hypot)
diff --git a/sysdeps/aarch64/fpu/hypotf_sve.c b/sysdeps/aarch64/fpu/hypotf_sve.c
new file mode 100644
index 0000000000000000..3a403de66eb091f4
--- /dev/null
+++ b/sysdeps/aarch64/fpu/hypotf_sve.c
@@ -0,0 +1,48 @@
+/* Single-precision vector (SVE) hypot function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_math.h"
+
+#define TinyBound 0x0c800000 /* asuint (0x1p-102). */
+#define Thres 0x73000000 /* 0x70000000 - TinyBound. */
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t sqsum, svfloat32_t x, svfloat32_t y, svbool_t pg,
+ svbool_t special)
+{
+ return sv_call2_f32 (hypotf, x, y, svsqrt_x (pg, sqsum), special);
+}
+
+/* SVE implementation of single-precision hypot.
+ Maximum error observed is 1.21 ULP:
+ _ZGVsMxvv_hypotf (0x1.6a213cp-19, -0x1.32b982p-26) got 0x1.6a2346p-19
+ want 0x1.6a2344p-19. */
+svfloat32_t SV_NAME_F2 (hypot) (svfloat32_t x, svfloat32_t y,
+ const svbool_t pg)
+{
+ svfloat32_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y);
+
+ svbool_t special = svcmpge (
+ pg, svsub_x (pg, svreinterpret_u32 (sqsum), TinyBound), Thres);
+
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (sqsum, x, y, pg, special);
+
+ return svsqrt_x (pg, sqsum);
+}
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index f2d8714075ab99b8..417125be476cd75f 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -38,6 +38,7 @@ VPCS_VECTOR_WRAPPER (exp_advsimd, _ZGVnN2v_exp)
VPCS_VECTOR_WRAPPER (exp10_advsimd, _ZGVnN2v_exp10)
VPCS_VECTOR_WRAPPER (exp2_advsimd, _ZGVnN2v_exp2)
VPCS_VECTOR_WRAPPER (expm1_advsimd, _ZGVnN2v_expm1)
+VPCS_VECTOR_WRAPPER_ff (hypot_advsimd, _ZGVnN2vv_hypot)
VPCS_VECTOR_WRAPPER (log_advsimd, _ZGVnN2v_log)
VPCS_VECTOR_WRAPPER (log10_advsimd, _ZGVnN2v_log10)
VPCS_VECTOR_WRAPPER (log1p_advsimd, _ZGVnN2v_log1p)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index 37873d5e432ae9e8..31ebf18705f68856 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -57,6 +57,7 @@ SVE_VECTOR_WRAPPER (exp_sve, _ZGVsMxv_exp)
SVE_VECTOR_WRAPPER (exp10_sve, _ZGVsMxv_exp10)
SVE_VECTOR_WRAPPER (exp2_sve, _ZGVsMxv_exp2)
SVE_VECTOR_WRAPPER (expm1_sve, _ZGVsMxv_expm1)
+SVE_VECTOR_WRAPPER_ff (hypot_sve, _ZGVsMxvv_hypot)
SVE_VECTOR_WRAPPER (log_sve, _ZGVsMxv_log)
SVE_VECTOR_WRAPPER (log10_sve, _ZGVsMxv_log10)
SVE_VECTOR_WRAPPER (log1p_sve, _ZGVsMxv_log1p)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index 08e33115b9dc6f5e..dab0f1cfcb79a305 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -38,6 +38,7 @@ VPCS_VECTOR_WRAPPER (expf_advsimd, _ZGVnN4v_expf)
VPCS_VECTOR_WRAPPER (exp10f_advsimd, _ZGVnN4v_exp10f)
VPCS_VECTOR_WRAPPER (exp2f_advsimd, _ZGVnN4v_exp2f)
VPCS_VECTOR_WRAPPER (expm1f_advsimd, _ZGVnN4v_expm1f)
+VPCS_VECTOR_WRAPPER_ff (hypotf_advsimd, _ZGVnN4vv_hypotf)
VPCS_VECTOR_WRAPPER (logf_advsimd, _ZGVnN4v_logf)
VPCS_VECTOR_WRAPPER (log10f_advsimd, _ZGVnN4v_log10f)
VPCS_VECTOR_WRAPPER (log1pf_advsimd, _ZGVnN4v_log1pf)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index 025daa662efd6f7f..2aa6cbcc28d69cf8 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -57,6 +57,7 @@ SVE_VECTOR_WRAPPER (expf_sve, _ZGVsMxv_expf)
SVE_VECTOR_WRAPPER (exp10f_sve, _ZGVsMxv_exp10f)
SVE_VECTOR_WRAPPER (exp2f_sve, _ZGVsMxv_exp2f)
SVE_VECTOR_WRAPPER (expm1f_sve, _ZGVsMxv_expm1f)
+SVE_VECTOR_WRAPPER_ff (hypotf_sve, _ZGVsMxvv_hypotf)
SVE_VECTOR_WRAPPER (logf_sve, _ZGVsMxv_logf)
SVE_VECTOR_WRAPPER (log10f_sve, _ZGVsMxv_log10f)
SVE_VECTOR_WRAPPER (log1pf_sve, _ZGVsMxv_log1pf)
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 055da83d639a2430..17723d0c9e2dfcf5 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1174,10 +1174,18 @@ double: 1
float: 1
ldouble: 1
+Function: "hypot_advsimd":
+double: 1
+float: 1
+
Function: "hypot_downward":
double: 1
ldouble: 1
+Function: "hypot_sve":
+double: 1
+float: 1
+
Function: "hypot_towardzero":
double: 1
ldouble: 1
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index 26c3fbf18b2f12a9..1184374efd25cfa6 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -89,6 +89,8 @@ GLIBC_2.40 _ZGVnN2v_sinh F
GLIBC_2.40 _ZGVnN2v_sinhf F
GLIBC_2.40 _ZGVnN2v_tanh F
GLIBC_2.40 _ZGVnN2v_tanhf F
+GLIBC_2.40 _ZGVnN2vv_hypot F
+GLIBC_2.40 _ZGVnN2vv_hypotf F
GLIBC_2.40 _ZGVnN4v_acoshf F
GLIBC_2.40 _ZGVnN4v_asinhf F
GLIBC_2.40 _ZGVnN4v_atanhf F
@@ -97,6 +99,7 @@ GLIBC_2.40 _ZGVnN4v_erfcf F
GLIBC_2.40 _ZGVnN4v_erff F
GLIBC_2.40 _ZGVnN4v_sinhf F
GLIBC_2.40 _ZGVnN4v_tanhf F
+GLIBC_2.40 _ZGVnN4vv_hypotf F
GLIBC_2.40 _ZGVsMxv_acosh F
GLIBC_2.40 _ZGVsMxv_acoshf F
GLIBC_2.40 _ZGVsMxv_asinh F
@@ -113,3 +116,5 @@ GLIBC_2.40 _ZGVsMxv_sinh F
GLIBC_2.40 _ZGVsMxv_sinhf F
GLIBC_2.40 _ZGVsMxv_tanh F
GLIBC_2.40 _ZGVsMxv_tanhf F
+GLIBC_2.40 _ZGVsMxvv_hypot F
+GLIBC_2.40 _ZGVsMxvv_hypotf F

715
glibc-RHEL-118273-11.patch Normal file
View File

@ -0,0 +1,715 @@
commit 75207bde6870eb4b258e16fbb41252b2e6377675
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Tue Apr 30 13:49:59 2024 +0100
aarch64/fpu: Add vector variants of cbrt
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index 06657782a1ee7106..990d1135b93485c5 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -5,6 +5,7 @@ libmvec-supported-funcs = acos \
atan \
atanh \
atan2 \
+ cbrt \
cos \
cosh \
erf \
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index aedae9457b148983..36a9e4df1e058c46 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -94,6 +94,11 @@ libmvec {
_ZGVnN4v_atanhf;
_ZGVsMxv_atanh;
_ZGVsMxv_atanhf;
+ _ZGVnN2v_cbrt;
+ _ZGVnN2v_cbrtf;
+ _ZGVnN4v_cbrtf;
+ _ZGVsMxv_cbrt;
+ _ZGVsMxv_cbrtf;
_ZGVnN2v_cosh;
_ZGVnN2v_coshf;
_ZGVnN4v_coshf;
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index a8889a92fd041585..54858efd8aa0ff82 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -23,6 +23,7 @@ libmvec_hidden_proto (V_NAME_F1(asin));
libmvec_hidden_proto (V_NAME_F1(asinh));
libmvec_hidden_proto (V_NAME_F1(atan));
libmvec_hidden_proto (V_NAME_F1(atanh));
+libmvec_hidden_proto (V_NAME_F1(cbrt));
libmvec_hidden_proto (V_NAME_F1(cos));
libmvec_hidden_proto (V_NAME_F1(cosh));
libmvec_hidden_proto (V_NAME_F1(erf));
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index ca3017733959702f..b1c024fe13a7dc32 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -57,6 +57,10 @@
# define __DECL_SIMD_atan2 __DECL_SIMD_aarch64
# undef __DECL_SIMD_atan2f
# define __DECL_SIMD_atan2f __DECL_SIMD_aarch64
+# undef __DECL_SIMD_cbrt
+# define __DECL_SIMD_cbrt __DECL_SIMD_aarch64
+# undef __DECL_SIMD_cbrtf
+# define __DECL_SIMD_cbrtf __DECL_SIMD_aarch64
# undef __DECL_SIMD_cos
# define __DECL_SIMD_cos __DECL_SIMD_aarch64
# undef __DECL_SIMD_cosf
@@ -158,6 +162,7 @@ __vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_cbrtf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t);
@@ -183,6 +188,7 @@ __vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_cbrt (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t);
@@ -213,6 +219,7 @@ __sv_f32_t _ZGVsMxv_asinf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_asinhf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_atanf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_atanhf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_cbrtf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_erff (__sv_f32_t, __sv_bool_t);
@@ -238,6 +245,7 @@ __sv_f64_t _ZGVsMxv_asin (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_asinh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_atan (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_atanh (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_cbrt (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_erf (__sv_f64_t, __sv_bool_t);
diff --git a/sysdeps/aarch64/fpu/cbrt_advsimd.c b/sysdeps/aarch64/fpu/cbrt_advsimd.c
new file mode 100644
index 0000000000000000..adfbb60cd3918c95
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cbrt_advsimd.c
@@ -0,0 +1,121 @@
+/* Double-precision vector (AdvSIMD) cbrt function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+
+const static struct data
+{
+ float64x2_t poly[4], one_third, shift;
+ int64x2_t exp_bias;
+ uint64x2_t abs_mask, tiny_bound;
+ uint32x4_t thresh;
+ double table[5];
+} data = {
+ .shift = V2 (0x1.8p52),
+ .poly = { /* Generated with fpminimax in [0.5, 1]. */
+ V2 (0x1.c14e8ee44767p-2), V2 (0x1.dd2d3f99e4c0ep-1),
+ V2 (-0x1.08e83026b7e74p-1), V2 (0x1.2c74eaa3ba428p-3) },
+ .exp_bias = V2 (1022),
+ .abs_mask = V2(0x7fffffffffffffff),
+ .tiny_bound = V2(0x0010000000000000), /* Smallest normal. */
+ .thresh = V4(0x7fe00000), /* asuint64 (infinity) - tiny_bound. */
+ .one_third = V2(0x1.5555555555555p-2),
+ .table = { /* table[i] = 2^((i - 2) / 3). */
+ 0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0,
+ 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0 }
+};
+
+#define MantissaMask v_u64 (0x000fffffffffffff)
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint32x2_t special)
+{
+ return v_call_f64 (cbrt, x, y, vmovl_u32 (special));
+}
+
+/* Approximation for double-precision vector cbrt(x), using low-order polynomial
+ and two Newton iterations. Greatest observed error is 1.79 ULP. Errors repeat
+ according to the exponent, for instance an error observed for double value
+ m * 2^e will be observed for any input m * 2^(e + 3*i), where i is an
+ integer.
+ __v_cbrt(0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
+ want 0x1.965fe72821e99p+0. */
+VPCS_ATTR float64x2_t V_NAME_D1 (cbrt) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
+
+ /* Subnormal, +/-0 and special values. */
+ uint32x2_t special
+ = vcge_u32 (vsubhn_u64 (iax, d->tiny_bound), vget_low_u32 (d->thresh));
+
+ /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+ version of frexp, which gets subnormal values wrong - these have to be
+ special-cased as a result. */
+ float64x2_t m = vbslq_f64 (MantissaMask, x, v_f64 (0.5));
+ int64x2_t exp_bias = d->exp_bias;
+ uint64x2_t ia12 = vshrq_n_u64 (iax, 52);
+ int64x2_t e = vsubq_s64 (vreinterpretq_s64_u64 (ia12), exp_bias);
+
+ /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point for
+ Newton iterations. */
+ float64x2_t p = v_pairwise_poly_3_f64 (m, vmulq_f64 (m, m), d->poly);
+ float64x2_t one_third = d->one_third;
+ /* Two iterations of Newton's method for iteratively approximating cbrt. */
+ float64x2_t m_by_3 = vmulq_f64 (m, one_third);
+ float64x2_t two_thirds = vaddq_f64 (one_third, one_third);
+ float64x2_t a
+ = vfmaq_f64 (vdivq_f64 (m_by_3, vmulq_f64 (p, p)), two_thirds, p);
+ a = vfmaq_f64 (vdivq_f64 (m_by_3, vmulq_f64 (a, a)), two_thirds, a);
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+ not necessarily a multiple of 3 we lose some information.
+
+ Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which is
+ an integer in [-2, 2], and can be looked up in the table T. Hence the
+ result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign. */
+
+ float64x2_t ef = vcvtq_f64_s64 (e);
+ float64x2_t eb3f = vrndnq_f64 (vmulq_f64 (ef, one_third));
+ int64x2_t em3 = vcvtq_s64_f64 (vfmsq_f64 (ef, eb3f, v_f64 (3)));
+ int64x2_t ey = vcvtq_s64_f64 (eb3f);
+
+ float64x2_t my = (float64x2_t){ d->table[em3[0] + 2], d->table[em3[1] + 2] };
+ my = vmulq_f64 (my, a);
+
+ /* Vector version of ldexp. */
+ float64x2_t y = vreinterpretq_f64_s64 (
+ vshlq_n_s64 (vaddq_s64 (ey, vaddq_s64 (exp_bias, v_s64 (1))), 52));
+ y = vmulq_f64 (y, my);
+
+ if (__glibc_unlikely (v_any_u32h (special)))
+ return special_case (x, vbslq_f64 (d->abs_mask, y, x), special);
+
+ /* Copy sign. */
+ return vbslq_f64 (d->abs_mask, y, x);
+}
diff --git a/sysdeps/aarch64/fpu/cbrt_sve.c b/sysdeps/aarch64/fpu/cbrt_sve.c
new file mode 100644
index 0000000000000000..fc976eda2a6018f7
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cbrt_sve.c
@@ -0,0 +1,128 @@
+/* Double-precision vector (SVE) cbrt function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+
+const static struct data
+{
+ float64_t poly[4];
+ float64_t table[5];
+ float64_t one_third, two_thirds, shift;
+ int64_t exp_bias;
+ uint64_t tiny_bound, thresh;
+} data = {
+ /* Generated with FPMinimax in [0.5, 1]. */
+ .poly = { 0x1.c14e8ee44767p-2, 0x1.dd2d3f99e4c0ep-1, -0x1.08e83026b7e74p-1,
+ 0x1.2c74eaa3ba428p-3, },
+ /* table[i] = 2^((i - 2) / 3). */
+ .table = { 0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0,
+ 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0, },
+ .one_third = 0x1.5555555555555p-2,
+ .two_thirds = 0x1.5555555555555p-1,
+ .shift = 0x1.8p52,
+ .exp_bias = 1022,
+ .tiny_bound = 0x0010000000000000, /* Smallest normal. */
+ .thresh = 0x7fe0000000000000, /* asuint64 (infinity) - tiny_bound. */
+};
+
+#define MantissaMask 0x000fffffffffffff
+#define HalfExp 0x3fe0000000000000
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (cbrt, x, y, special);
+}
+
+static inline svfloat64_t
+shifted_lookup (const svbool_t pg, const float64_t *table, svint64_t i)
+{
+ return svld1_gather_index (pg, table, svadd_x (pg, i, 2));
+}
+
+/* Approximation for double-precision vector cbrt(x), using low-order
+ polynomial and two Newton iterations. Greatest observed error is 1.79 ULP.
+ Errors repeat according to the exponent, for instance an error observed for
+ double value m * 2^e will be observed for any input m * 2^(e + 3*i), where i
+ is an integer.
+ _ZGVsMxv_cbrt (0x0.3fffb8d4413f3p-1022) got 0x1.965f53b0e5d97p-342
+ want 0x1.965f53b0e5d95p-342. */
+svfloat64_t SV_NAME_D1 (cbrt) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat64_t ax = svabs_x (pg, x);
+ svuint64_t iax = svreinterpret_u64 (ax);
+ svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), iax);
+
+ /* Subnormal, +/-0 and special values. */
+ svbool_t special = svcmpge (pg, svsub_x (pg, iax, d->tiny_bound), d->thresh);
+
+ /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+ version of frexp, which gets subnormal values wrong - these have to be
+ special-cased as a result. */
+ svfloat64_t m = svreinterpret_f64 (svorr_x (
+ pg, svand_x (pg, svreinterpret_u64 (x), MantissaMask), HalfExp));
+ svint64_t e
+ = svsub_x (pg, svreinterpret_s64 (svlsr_x (pg, iax, 52)), d->exp_bias);
+
+ /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point
+ for Newton iterations. */
+ svfloat64_t p
+ = sv_pairwise_poly_3_f64_x (pg, m, svmul_x (pg, m, m), d->poly);
+
+ /* Two iterations of Newton's method for iteratively approximating cbrt. */
+ svfloat64_t m_by_3 = svmul_x (pg, m, d->one_third);
+ svfloat64_t a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, p, p)), p,
+ d->two_thirds);
+ a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, a, a)), a, d->two_thirds);
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+ not necessarily a multiple of 3 we lose some information.
+
+ Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
+ is an integer in [-2, 2], and can be looked up in the table T. Hence the
+ result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign. */
+ svfloat64_t eb3f = svmul_x (pg, svcvt_f64_x (pg, e), d->one_third);
+ svint64_t ey = svcvt_s64_x (pg, eb3f);
+ svint64_t em3 = svmls_x (pg, e, ey, 3);
+
+ svfloat64_t my = shifted_lookup (pg, d->table, em3);
+ my = svmul_x (pg, my, a);
+
+ /* Vector version of ldexp. */
+ svfloat64_t y = svscale_x (pg, my, ey);
+
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (
+ x, svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign)),
+ special);
+
+ /* Copy sign. */
+ return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
+}
diff --git a/sysdeps/aarch64/fpu/cbrtf_advsimd.c b/sysdeps/aarch64/fpu/cbrtf_advsimd.c
new file mode 100644
index 0000000000000000..27debb8b57c8c3e2
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cbrtf_advsimd.c
@@ -0,0 +1,123 @@
+/* Single-precision vector (AdvSIMD) cbrt function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+
+const static struct data
+{
+ float32x4_t poly[4], one_third;
+ float table[5];
+} data = {
+ .poly = { /* Very rough approximation of cbrt(x) in [0.5, 1], generated with
+ FPMinimax. */
+ V4 (0x1.c14e96p-2), V4 (0x1.dd2d3p-1), V4 (-0x1.08e81ap-1),
+ V4 (0x1.2c74c2p-3) },
+ .table = { /* table[i] = 2^((i - 2) / 3). */
+ 0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0 },
+ .one_third = V4 (0x1.555556p-2f),
+};
+
+#define SignMask v_u32 (0x80000000)
+#define SmallestNormal v_u32 (0x00800000)
+#define Thresh vdup_n_u16 (0x7f00) /* asuint(INFINITY) - SmallestNormal. */
+#define MantissaMask v_u32 (0x007fffff)
+#define HalfExp v_u32 (0x3f000000)
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint16x4_t special)
+{
+ return v_call_f32 (cbrtf, x, y, vmovl_u16 (special));
+}
+
+static inline float32x4_t
+shifted_lookup (const float *table, int32x4_t i)
+{
+ return (float32x4_t){ table[i[0] + 2], table[i[1] + 2], table[i[2] + 2],
+ table[i[3] + 2] };
+}
+
+/* Approximation for vector single-precision cbrt(x) using Newton iteration
+ with initial guess obtained by a low-order polynomial. Greatest error
+ is 1.64 ULP. This is observed for every value where the mantissa is
+ 0x1.85a2aa and the exponent is a multiple of 3, for example:
+ _ZGVnN4v_cbrtf(0x1.85a2aap+3) got 0x1.267936p+1
+ want 0x1.267932p+1. */
+VPCS_ATTR float32x4_t V_NAME_F1 (cbrt) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint32x4_t iax = vreinterpretq_u32_f32 (vabsq_f32 (x));
+
+ /* Subnormal, +/-0 and special values. */
+ uint16x4_t special = vcge_u16 (vsubhn_u32 (iax, SmallestNormal), Thresh);
+
+ /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+ version of frexpf, which gets subnormal values wrong - these have to be
+ special-cased as a result. */
+ float32x4_t m = vbslq_f32 (MantissaMask, x, v_f32 (0.5));
+ int32x4_t e
+ = vsubq_s32 (vreinterpretq_s32_u32 (vshrq_n_u32 (iax, 23)), v_s32 (126));
+
+ /* p is a rough approximation for cbrt(m) in [0.5, 1.0]. The better this is,
+ the less accurate the next stage of the algorithm needs to be. An order-4
+ polynomial is enough for one Newton iteration. */
+ float32x4_t p = v_pairwise_poly_3_f32 (m, vmulq_f32 (m, m), d->poly);
+
+ float32x4_t one_third = d->one_third;
+ float32x4_t two_thirds = vaddq_f32 (one_third, one_third);
+
+ /* One iteration of Newton's method for iteratively approximating cbrt. */
+ float32x4_t m_by_3 = vmulq_f32 (m, one_third);
+ float32x4_t a
+ = vfmaq_f32 (vdivq_f32 (m_by_3, vmulq_f32 (p, p)), two_thirds, p);
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+ not necessarily a multiple of 3 we lose some information.
+
+ Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
+ is an integer in [-2, 2], and can be looked up in the table T. Hence the
+ result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign. */
+ float32x4_t ef = vmulq_f32 (vcvtq_f32_s32 (e), one_third);
+ int32x4_t ey = vcvtq_s32_f32 (ef);
+ int32x4_t em3 = vsubq_s32 (e, vmulq_s32 (ey, v_s32 (3)));
+
+ float32x4_t my = shifted_lookup (d->table, em3);
+ my = vmulq_f32 (my, a);
+
+ /* Vector version of ldexpf. */
+ float32x4_t y
+ = vreinterpretq_f32_s32 (vshlq_n_s32 (vaddq_s32 (ey, v_s32 (127)), 23));
+ y = vmulq_f32 (y, my);
+
+ if (__glibc_unlikely (v_any_u16h (special)))
+ return special_case (x, vbslq_f32 (SignMask, x, y), special);
+
+ /* Copy sign. */
+ return vbslq_f32 (SignMask, x, y);
+}
+libmvec_hidden_def (V_NAME_F1 (cbrt))
+HALF_WIDTH_ALIAS_F1 (cbrt)
diff --git a/sysdeps/aarch64/fpu/cbrtf_sve.c b/sysdeps/aarch64/fpu/cbrtf_sve.c
new file mode 100644
index 0000000000000000..23c220c202244c1f
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cbrtf_sve.c
@@ -0,0 +1,122 @@
+/* Single-precision vector (SVE) cbrt function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_math.h"
+#include "poly_sve_f32.h"
+
+const static struct data
+{
+ float32_t poly[4];
+ float32_t table[5];
+ float32_t one_third, two_thirds;
+} data = {
+ /* Very rough approximation of cbrt(x) in [0.5, 1], generated with FPMinimax.
+ */
+ .poly = { 0x1.c14e96p-2, 0x1.dd2d3p-1, -0x1.08e81ap-1,
+ 0x1.2c74c2p-3, },
+ /* table[i] = 2^((i - 2) / 3). */
+ .table = { 0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0 },
+ .one_third = 0x1.555556p-2f,
+ .two_thirds = 0x1.555556p-1f,
+};
+
+#define SmallestNormal 0x00800000
+#define Thresh 0x7f000000 /* asuint(INFINITY) - SmallestNormal. */
+#define MantissaMask 0x007fffff
+#define HalfExp 0x3f000000
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (cbrtf, x, y, special);
+}
+
+static inline svfloat32_t
+shifted_lookup (const svbool_t pg, const float32_t *table, svint32_t i)
+{
+ return svld1_gather_index (pg, table, svadd_x (pg, i, 2));
+}
+
+/* Approximation for vector single-precision cbrt(x) using Newton iteration
+ with initial guess obtained by a low-order polynomial. Greatest error
+ is 1.64 ULP. This is observed for every value where the mantissa is
+ 0x1.85a2aa and the exponent is a multiple of 3, for example:
+ _ZGVsMxv_cbrtf (0x1.85a2aap+3) got 0x1.267936p+1
+ want 0x1.267932p+1. */
+svfloat32_t SV_NAME_F1 (cbrt) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat32_t ax = svabs_x (pg, x);
+ svuint32_t iax = svreinterpret_u32 (ax);
+ svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
+
+ /* Subnormal, +/-0 and special values. */
+ svbool_t special = svcmpge (pg, svsub_x (pg, iax, SmallestNormal), Thresh);
+
+ /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+ version of frexpf, which gets subnormal values wrong - these have to be
+ special-cased as a result. */
+ svfloat32_t m = svreinterpret_f32 (svorr_x (
+ pg, svand_x (pg, svreinterpret_u32 (x), MantissaMask), HalfExp));
+ svint32_t e = svsub_x (pg, svreinterpret_s32 (svlsr_x (pg, iax, 23)), 126);
+
+ /* p is a rough approximation for cbrt(m) in [0.5, 1.0]. The better this is,
+ the less accurate the next stage of the algorithm needs to be. An order-4
+ polynomial is enough for one Newton iteration. */
+ svfloat32_t p
+ = sv_pairwise_poly_3_f32_x (pg, m, svmul_x (pg, m, m), d->poly);
+
+ /* One iteration of Newton's method for iteratively approximating cbrt. */
+ svfloat32_t m_by_3 = svmul_x (pg, m, d->one_third);
+ svfloat32_t a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, p, p)), p,
+ d->two_thirds);
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+ not necessarily a multiple of 3 we lose some information.
+
+ Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
+ is an integer in [-2, 2], and can be looked up in the table T. Hence the
+ result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign. */
+ svfloat32_t ef = svmul_x (pg, svcvt_f32_x (pg, e), d->one_third);
+ svint32_t ey = svcvt_s32_x (pg, ef);
+ svint32_t em3 = svmls_x (pg, e, ey, 3);
+
+ svfloat32_t my = shifted_lookup (pg, d->table, em3);
+ my = svmul_x (pg, my, a);
+
+ /* Vector version of ldexpf. */
+ svfloat32_t y = svscale_x (pg, my, ey);
+
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (
+ x, svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign)),
+ special);
+
+ /* Copy sign. */
+ return svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign));
+}
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index 417125be476cd75f..1877db3ac6932037 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -30,6 +30,7 @@ VPCS_VECTOR_WRAPPER (asinh_advsimd, _ZGVnN2v_asinh)
VPCS_VECTOR_WRAPPER (atan_advsimd, _ZGVnN2v_atan)
VPCS_VECTOR_WRAPPER (atanh_advsimd, _ZGVnN2v_atanh)
VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2)
+VPCS_VECTOR_WRAPPER (cbrt_advsimd, _ZGVnN2v_cbrt)
VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
VPCS_VECTOR_WRAPPER (cosh_advsimd, _ZGVnN2v_cosh)
VPCS_VECTOR_WRAPPER (erf_advsimd, _ZGVnN2v_erf)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index 31ebf18705f68856..b702f942dea0749f 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -49,6 +49,7 @@ SVE_VECTOR_WRAPPER (asinh_sve, _ZGVsMxv_asinh)
SVE_VECTOR_WRAPPER (atan_sve, _ZGVsMxv_atan)
SVE_VECTOR_WRAPPER (atanh_sve, _ZGVsMxv_atanh)
SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2)
+SVE_VECTOR_WRAPPER (cbrt_sve, _ZGVsMxv_cbrt)
SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
SVE_VECTOR_WRAPPER (cosh_sve, _ZGVsMxv_cosh)
SVE_VECTOR_WRAPPER (erf_sve, _ZGVsMxv_erf)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index dab0f1cfcb79a305..9cb451b4f045e625 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -30,6 +30,7 @@ VPCS_VECTOR_WRAPPER (asinhf_advsimd, _ZGVnN4v_asinhf)
VPCS_VECTOR_WRAPPER (atanf_advsimd, _ZGVnN4v_atanf)
VPCS_VECTOR_WRAPPER (atanhf_advsimd, _ZGVnN4v_atanhf)
VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f)
+VPCS_VECTOR_WRAPPER (cbrtf_advsimd, _ZGVnN4v_cbrtf)
VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
VPCS_VECTOR_WRAPPER (coshf_advsimd, _ZGVnN4v_coshf)
VPCS_VECTOR_WRAPPER (erff_advsimd, _ZGVnN4v_erff)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index 2aa6cbcc28d69cf8..5b3dd22916d2a50d 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -49,6 +49,7 @@ SVE_VECTOR_WRAPPER (asinhf_sve, _ZGVsMxv_asinhf)
SVE_VECTOR_WRAPPER (atanf_sve, _ZGVsMxv_atanf)
SVE_VECTOR_WRAPPER (atanhf_sve, _ZGVsMxv_atanhf)
SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f)
+SVE_VECTOR_WRAPPER (cbrtf_sve, _ZGVsMxv_cbrtf)
SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
SVE_VECTOR_WRAPPER (coshf_sve, _ZGVsMxv_coshf)
SVE_VECTOR_WRAPPER (erff_sve, _ZGVsMxv_erff)
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 17723d0c9e2dfcf5..a67cd7cd7399c533 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -477,11 +477,19 @@ double: 4
float: 1
ldouble: 1
+Function: "cbrt_advsimd":
+double: 1
+float: 1
+
Function: "cbrt_downward":
double: 4
float: 1
ldouble: 1
+Function: "cbrt_sve":
+double: 1
+float: 1
+
Function: "cbrt_towardzero":
double: 3
float: 1
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index 1184374efd25cfa6..89ac1dfa36279eb0 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -79,6 +79,8 @@ GLIBC_2.40 _ZGVnN2v_asinh F
GLIBC_2.40 _ZGVnN2v_asinhf F
GLIBC_2.40 _ZGVnN2v_atanh F
GLIBC_2.40 _ZGVnN2v_atanhf F
+GLIBC_2.40 _ZGVnN2v_cbrt F
+GLIBC_2.40 _ZGVnN2v_cbrtf F
GLIBC_2.40 _ZGVnN2v_cosh F
GLIBC_2.40 _ZGVnN2v_coshf F
GLIBC_2.40 _ZGVnN2v_erf F
@@ -94,6 +96,7 @@ GLIBC_2.40 _ZGVnN2vv_hypotf F
GLIBC_2.40 _ZGVnN4v_acoshf F
GLIBC_2.40 _ZGVnN4v_asinhf F
GLIBC_2.40 _ZGVnN4v_atanhf F
+GLIBC_2.40 _ZGVnN4v_cbrtf F
GLIBC_2.40 _ZGVnN4v_coshf F
GLIBC_2.40 _ZGVnN4v_erfcf F
GLIBC_2.40 _ZGVnN4v_erff F
@@ -106,6 +109,8 @@ GLIBC_2.40 _ZGVsMxv_asinh F
GLIBC_2.40 _ZGVsMxv_asinhf F
GLIBC_2.40 _ZGVsMxv_atanh F
GLIBC_2.40 _ZGVsMxv_atanhf F
+GLIBC_2.40 _ZGVsMxv_cbrt F
+GLIBC_2.40 _ZGVsMxv_cbrtf F
GLIBC_2.40 _ZGVsMxv_cosh F
GLIBC_2.40 _ZGVsMxv_coshf F
GLIBC_2.40 _ZGVsMxv_erf F

2511
glibc-RHEL-118273-12.patch Normal file

File diff suppressed because it is too large Load Diff

319
glibc-RHEL-118273-13.patch Normal file
View File

@ -0,0 +1,319 @@
commit 7900ac490db32f6bccff812733f00280dde34e27
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Mon Sep 23 15:32:53 2024 +0100
AArch64: Improve codegen in users of ADVSIMD expm1f helper
Rearrange operations so MOV is not necessary in reduction or around
the special-case handler. Reduce memory access by using more indexed
MLAs in polynomial.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c
index a0616ec7542cbfce..8303ca296e030c2e 100644
--- a/sysdeps/aarch64/fpu/expm1f_advsimd.c
+++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c
@@ -18,27 +18,18 @@
<https://www.gnu.org/licenses/>. */
#include "v_math.h"
-#include "poly_advsimd_f32.h"
+#include "v_expm1f_inline.h"
static const struct data
{
- float32x4_t poly[5];
- float invln2_and_ln2[4];
- float32x4_t shift;
- int32x4_t exponent_bias;
+ struct v_expm1f_data d;
#if WANT_SIMD_EXCEPT
uint32x4_t thresh;
#else
float32x4_t oflow_bound;
#endif
} data = {
- /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2]. */
- .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5),
- V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) },
- /* Stores constants: invln2, ln2_hi, ln2_lo, 0. */
- .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },
- .shift = V4 (0x1.8p23f),
- .exponent_bias = V4 (0x3f800000),
+ .d = V_EXPM1F_DATA,
#if !WANT_SIMD_EXCEPT
/* Value above which expm1f(x) should overflow. Absolute value of the
underflow bound is greater than this, so it catches both cases - there is
@@ -55,67 +46,38 @@ static const struct data
#define TinyBound v_u32 (0x34000000 << 1)
static float32x4_t VPCS_ATTR NOINLINE
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, uint32x4_t special, const struct data *d)
{
- return v_call_f32 (expm1f, x, y, special);
+ return v_call_f32 (
+ expm1f, x, expm1f_inline (v_zerofy_f32 (x, special), &d->d), special);
}
/* Single-precision vector exp(x) - 1 function.
- The maximum error is 1.51 ULP:
- _ZGVnN4v_expm1f (0x1.8baa96p-2) got 0x1.e2fb9p-2
- want 0x1.e2fb94p-2. */
+ The maximum error is 1.62 ULP:
+ _ZGVnN4v_expm1f(0x1.85f83p-2) got 0x1.da9f4p-2
+ want 0x1.da9f44p-2. */
float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x)
{
const struct data *d = ptr_barrier (&data);
- uint32x4_t ix = vreinterpretq_u32_f32 (x);
#if WANT_SIMD_EXCEPT
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
/* If fp exceptions are to be triggered correctly, fall back to scalar for
|x| < 2^-23, |x| > oflow_bound, Inf & NaN. Add ix to itself for
shift-left by 1, and compare with thresh which was left-shifted offline -
this is effectively an absolute compare. */
uint32x4_t special
= vcgeq_u32 (vsubq_u32 (vaddq_u32 (ix, ix), TinyBound), d->thresh);
- if (__glibc_unlikely (v_any_u32 (special)))
- x = v_zerofy_f32 (x, special);
#else
/* Handles very large values (+ve and -ve), +/-NaN, +/-Inf. */
uint32x4_t special = vcagtq_f32 (x, d->oflow_bound);
#endif
- /* Reduce argument to smaller range:
- Let i = round(x / ln2)
- and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
- exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
- where 2^i is exact because i is an integer. */
- float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2);
- float32x4_t j
- = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift);
- int32x4_t i = vcvtq_s32_f32 (j);
- float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1);
- f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2);
-
- /* Approximate expm1(f) using polynomial.
- Taylor expansion for expm1(x) has the form:
- x + ax^2 + bx^3 + cx^4 ....
- So we calculate the polynomial P(f) = a + bf + cf^2 + ...
- and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
- float32x4_t p = v_horner_4_f32 (f, d->poly);
- p = vfmaq_f32 (f, vmulq_f32 (f, f), p);
-
- /* Assemble the result.
- expm1(x) ~= 2^i * (p + 1) - 1
- Let t = 2^i. */
- int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias);
- float32x4_t t = vreinterpretq_f32_s32 (u);
-
if (__glibc_unlikely (v_any_u32 (special)))
- return special_case (vreinterpretq_f32_u32 (ix),
- vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t),
- special);
+ return special_case (x, special, d);
/* expm1(x) ~= p * t + (t - 1). */
- return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t);
+ return expm1f_inline (x, &d->d);
}
libmvec_hidden_def (V_NAME_F1 (expm1))
HALF_WIDTH_ALIAS_F1 (expm1)
diff --git a/sysdeps/aarch64/fpu/sinhf_advsimd.c b/sysdeps/aarch64/fpu/sinhf_advsimd.c
index 6bb7482dc28795c1..c6ed7598e7deca1b 100644
--- a/sysdeps/aarch64/fpu/sinhf_advsimd.c
+++ b/sysdeps/aarch64/fpu/sinhf_advsimd.c
@@ -23,15 +23,13 @@
static const struct data
{
struct v_expm1f_data expm1f_consts;
- uint32x4_t halff;
#if WANT_SIMD_EXCEPT
uint32x4_t tiny_bound, thresh;
#else
- uint32x4_t oflow_bound;
+ float32x4_t oflow_bound;
#endif
} data = {
.expm1f_consts = V_EXPM1F_DATA,
- .halff = V4 (0x3f000000),
#if WANT_SIMD_EXCEPT
/* 0x1.6a09e8p-32, below which expm1f underflows. */
.tiny_bound = V4 (0x2fb504f4),
@@ -39,14 +37,15 @@ static const struct data
.thresh = V4 (0x12fbbbb3),
#else
/* 0x1.61814ep+6, above which expm1f helper overflows. */
- .oflow_bound = V4 (0x42b0c0a7),
+ .oflow_bound = V4 (0x1.61814ep+6),
#endif
};
static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, float32x4_t t, float32x4_t halfsign,
+ uint32x4_t special)
{
- return v_call_f32 (sinhf, x, y, special);
+ return v_call_f32 (sinhf, x, vmulq_f32 (t, halfsign), special);
}
/* Approximation for vector single-precision sinh(x) using expm1.
@@ -60,15 +59,15 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x)
uint32x4_t ix = vreinterpretq_u32_f32 (x);
float32x4_t ax = vabsq_f32 (x);
- uint32x4_t iax = vreinterpretq_u32_f32 (ax);
- uint32x4_t sign = veorq_u32 (ix, iax);
- float32x4_t halfsign = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->halff));
+ float32x4_t halfsign = vreinterpretq_f32_u32 (
+ vbslq_u32 (v_u32 (0x80000000), ix, vreinterpretq_u32_f32 (v_f32 (0.5))));
#if WANT_SIMD_EXCEPT
- uint32x4_t special = vcgeq_u32 (vsubq_u32 (iax, d->tiny_bound), d->thresh);
+ uint32x4_t special = vcgeq_u32 (
+ vsubq_u32 (vreinterpretq_u32_f32 (ax), d->tiny_bound), d->thresh);
ax = v_zerofy_f32 (ax, special);
#else
- uint32x4_t special = vcgeq_u32 (iax, d->oflow_bound);
+ uint32x4_t special = vcageq_f32 (x, d->oflow_bound);
#endif
/* Up to the point that expm1f overflows, we can use it to calculate sinhf
@@ -80,7 +79,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x)
/* Fall back to the scalar variant for any lanes that should trigger an
exception. */
if (__glibc_unlikely (v_any_u32 (special)))
- return special_case (x, vmulq_f32 (t, halfsign), special);
+ return special_case (x, t, halfsign, special);
return vmulq_f32 (t, halfsign);
}
diff --git a/sysdeps/aarch64/fpu/tanhf_advsimd.c b/sysdeps/aarch64/fpu/tanhf_advsimd.c
index 50defd6ef03926f4..3ced9b7a414c812c 100644
--- a/sysdeps/aarch64/fpu/tanhf_advsimd.c
+++ b/sysdeps/aarch64/fpu/tanhf_advsimd.c
@@ -28,13 +28,16 @@ static const struct data
/* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative). */
.boring_bound = V4 (0x41102cb3),
.large_bound = V4 (0x7f800000),
- .onef = V4 (0x3f800000),
};
static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, uint32x4_t is_boring, float32x4_t boring,
+ float32x4_t q, uint32x4_t special)
{
- return v_call_f32 (tanhf, x, y, special);
+ return v_call_f32 (
+ tanhf, x,
+ vbslq_f32 (is_boring, boring, vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)))),
+ special);
}
/* Approximation for single-precision vector tanh(x), using a simplified
@@ -50,7 +53,9 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x)
uint32x4_t iax = vreinterpretq_u32_f32 (ax);
uint32x4_t sign = veorq_u32 (ix, iax);
uint32x4_t is_boring = vcgtq_u32 (iax, d->boring_bound);
- float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->onef));
+ /* expm1 exponent bias is 1.0f reinterpreted to int. */
+ float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (
+ sign, vreinterpretq_u32_s32 (d->expm1f_consts.exponent_bias)));
#if WANT_SIMD_EXCEPT
/* If fp exceptions are to be triggered properly, set all special and boring
@@ -66,10 +71,12 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x)
/* tanh(x) = (e^2x - 1) / (e^2x + 1). */
float32x4_t q = expm1f_inline (vmulq_n_f32 (x, 2), &d->expm1f_consts);
- float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)));
+
if (__glibc_unlikely (v_any_u32 (special)))
- return special_case (vreinterpretq_f32_u32 (ix),
- vbslq_f32 (is_boring, boring, y), special);
+ return special_case (vreinterpretq_f32_u32 (ix), is_boring, boring, q,
+ special);
+
+ float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)));
return vbslq_f32 (is_boring, boring, y);
}
libmvec_hidden_def (V_NAME_F1 (tanh))
diff --git a/sysdeps/aarch64/fpu/v_expm1f_inline.h b/sysdeps/aarch64/fpu/v_expm1f_inline.h
index 59b552da6b74785e..1daedfdd51cfc54b 100644
--- a/sysdeps/aarch64/fpu/v_expm1f_inline.h
+++ b/sysdeps/aarch64/fpu/v_expm1f_inline.h
@@ -21,48 +21,47 @@
#define AARCH64_FPU_V_EXPM1F_INLINE_H
#include "v_math.h"
-#include "poly_advsimd_f32.h"
+#include "math_config.h"
struct v_expm1f_data
{
- float32x4_t poly[5];
- float invln2_and_ln2[4];
- float32x4_t shift;
+ float32x4_t c0, c2;
int32x4_t exponent_bias;
+ float c1, c3, inv_ln2, c4;
+ float ln2_hi, ln2_lo;
};
/* Coefficients generated using fpminimax with degree=5 in [-log(2)/2,
- log(2)/2]. Exponent bias is asuint(1.0f).
- invln2_and_ln2 Stores constants: invln2, ln2_lo, ln2_hi, 0. */
+ log(2)/2]. Exponent bias is asuint(1.0f). */
#define V_EXPM1F_DATA \
{ \
- .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), \
- V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, \
- .shift = V4 (0x1.8p23f), .exponent_bias = V4 (0x3f800000), \
- .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, \
+ .c0 = V4 (0x1.fffffep-2), .c1 = 0x1.5554aep-3, .c2 = V4 (0x1.555736p-5), \
+ .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10, \
+ .exponent_bias = V4 (0x3f800000), .inv_ln2 = 0x1.715476p+0f, \
+ .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \
}
static inline float32x4_t
expm1f_inline (float32x4_t x, const struct v_expm1f_data *d)
{
- /* Helper routine for calculating exp(x) - 1.
- Copied from v_expm1f_1u6.c, with all special-case handling removed - the
- calling routine should handle special values if required. */
+ /* Helper routine for calculating exp(x) - 1. */
+
+ float32x2_t ln2 = vld1_f32 (&d->ln2_hi);
+ float32x4_t lane_consts = vld1q_f32 (&d->c1);
/* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
- float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2);
- float32x4_t j
- = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift);
+ float32x4_t j = vrndaq_f32 (vmulq_laneq_f32 (x, lane_consts, 2));
int32x4_t i = vcvtq_s32_f32 (j);
- float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1);
- f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2);
+ float32x4_t f = vfmsq_lane_f32 (x, j, ln2, 0);
+ f = vfmsq_lane_f32 (f, j, ln2, 1);
- /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
- Uses Estrin scheme, where the main _ZGVnN4v_expm1f routine uses
- Horner. */
+ /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). */
float32x4_t f2 = vmulq_f32 (f, f);
float32x4_t f4 = vmulq_f32 (f2, f2);
- float32x4_t p = v_estrin_4_f32 (f, f2, f4, d->poly);
+ float32x4_t p01 = vfmaq_laneq_f32 (d->c0, f, lane_consts, 0);
+ float32x4_t p23 = vfmaq_laneq_f32 (d->c2, f, lane_consts, 1);
+ float32x4_t p = vfmaq_f32 (p01, f2, p23);
+ p = vfmaq_laneq_f32 (p, f4, lane_consts, 3);
p = vfmaq_f32 (f, f2, p);
/* t = 2^i. */

495
glibc-RHEL-118273-14.patch Normal file
View File

@ -0,0 +1,495 @@
commit 5bc100bd4b7e00db3009ae93d25d303341545d23
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Mon Sep 23 15:32:14 2024 +0100
AArch64: Improve codegen in users of AdvSIMD log1pf helper
log1pf is quite register-intensive - use fewer registers for the
polynomial, and make various changes to shorten dependency chains in
parent routines. There is now no spilling with GCC 14. Accuracy moves
around a little - comments adjusted accordingly but does not require
regen-ulps.
Use the helper in log1pf as well, instead of having separate
implementations. The more accurate polynomial means special-casing can
be simplified, and the shorter dependency chain avoids the usual dance
around v0, which is otherwise difficult.
There is a small duplication of vectors containing 1.0f (or 0x3f800000) -
GCC is not currently able to efficiently handle values which fit in FMOV
but not MOVI, and are reinterpreted to integer. There may be potential
for more optimisation if this is fixed.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
Conflicts:
sysdeps/aarch64/fpu/log1pf_advsimd.c
(Fixup context to apply without out-of-scope dependency 751a5502)
diff --git a/sysdeps/aarch64/fpu/acoshf_advsimd.c b/sysdeps/aarch64/fpu/acoshf_advsimd.c
index 8916dcbf409922a9..004474acf9e9322b 100644
--- a/sysdeps/aarch64/fpu/acoshf_advsimd.c
+++ b/sysdeps/aarch64/fpu/acoshf_advsimd.c
@@ -25,35 +25,32 @@ const static struct data
{
struct v_log1pf_data log1pf_consts;
uint32x4_t one;
- uint16x4_t thresh;
-} data = {
- .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
- .one = V4 (0x3f800000),
- .thresh = V4 (0x2000) /* top(asuint(SquareLim) - asuint(1)). */
-};
+} data = { .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE, .one = V4 (0x3f800000) };
+
+#define Thresh vdup_n_u16 (0x2000) /* top(asuint(SquareLim) - asuint(1)). */
static float32x4_t NOINLINE VPCS_ATTR
special_case (float32x4_t x, float32x4_t y, uint16x4_t special,
- const struct v_log1pf_data d)
+ const struct v_log1pf_data *d)
{
return v_call_f32 (acoshf, x, log1pf_inline (y, d), vmovl_u16 (special));
}
/* Vector approximation for single-precision acosh, based on log1p. Maximum
error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it
- is 2.78 ULP:
- __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3
- want 0x1.ef9ea2p-3.
+ is 3.00 ULP:
+ _ZGVnN4v_acoshf(0x1.01df3ap+0) got 0x1.ef0a82p-4
+ want 0x1.ef0a7cp-4.
With exceptions disabled, we can compute u with a shorter dependency chain,
- which gives maximum error of 3.07 ULP:
- __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4
- want 0x1.fbc7f4p-4. */
+ which gives maximum error of 3.22 ULP:
+ _ZGVnN4v_acoshf(0x1.007ef2p+0) got 0x1.fdcdccp-5
+ want 0x1.fdcdd2p-5. */
VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x)
{
const struct data *d = ptr_barrier (&data);
uint32x4_t ix = vreinterpretq_u32_f32 (x);
- uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), d->thresh);
+ uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), Thresh);
#if WANT_SIMD_EXCEPT
/* Mask special lanes with 1 to side-step spurious invalid or overflow. Use
@@ -64,15 +61,16 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x)
float32x4_t xm1 = v_zerofy_f32 (vsubq_f32 (x, v_f32 (1)), p);
float32x4_t u = vfmaq_f32 (vaddq_f32 (xm1, xm1), xm1, xm1);
#else
- float32x4_t xm1 = vsubq_f32 (x, v_f32 (1));
- float32x4_t u = vmulq_f32 (xm1, vaddq_f32 (x, v_f32 (1.0f)));
+ float32x4_t xm1 = vsubq_f32 (x, vreinterpretq_f32_u32 (d->one));
+ float32x4_t u
+ = vmulq_f32 (xm1, vaddq_f32 (x, vreinterpretq_f32_u32 (d->one)));
#endif
float32x4_t y = vaddq_f32 (xm1, vsqrtq_f32 (u));
if (__glibc_unlikely (v_any_u16h (special)))
- return special_case (x, y, special, d->log1pf_consts);
- return log1pf_inline (y, d->log1pf_consts);
+ return special_case (x, y, special, &d->log1pf_consts);
+ return log1pf_inline (y, &d->log1pf_consts);
}
libmvec_hidden_def (V_NAME_F1 (acosh))
HALF_WIDTH_ALIAS_F1 (acosh)
diff --git a/sysdeps/aarch64/fpu/asinhf_advsimd.c b/sysdeps/aarch64/fpu/asinhf_advsimd.c
index 09fd8a614305563d..eb789b91b600af52 100644
--- a/sysdeps/aarch64/fpu/asinhf_advsimd.c
+++ b/sysdeps/aarch64/fpu/asinhf_advsimd.c
@@ -20,16 +20,16 @@
#include "v_math.h"
#include "v_log1pf_inline.h"
-#define SignMask v_u32 (0x80000000)
-
const static struct data
{
struct v_log1pf_data log1pf_consts;
+ float32x4_t one;
uint32x4_t big_bound;
#if WANT_SIMD_EXCEPT
uint32x4_t tiny_bound;
#endif
} data = {
+ .one = V4 (1),
.log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
.big_bound = V4 (0x5f800000), /* asuint(0x1p64). */
#if WANT_SIMD_EXCEPT
@@ -38,20 +38,27 @@ const static struct data
};
static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, uint32x4_t sign, float32x4_t y,
+ uint32x4_t special, const struct data *d)
{
- return v_call_f32 (asinhf, x, y, special);
+ return v_call_f32 (
+ asinhf, x,
+ vreinterpretq_f32_u32 (veorq_u32 (
+ sign, vreinterpretq_u32_f32 (log1pf_inline (y, &d->log1pf_consts)))),
+ special);
}
/* Single-precision implementation of vector asinh(x), using vector log1p.
- Worst-case error is 2.66 ULP, at roughly +/-0.25:
- __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3. */
+ Worst-case error is 2.59 ULP:
+ _ZGVnN4v_asinhf(0x1.d86124p-3) got 0x1.d449bep-3
+ want 0x1.d449c4p-3. */
VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x)
{
const struct data *dat = ptr_barrier (&data);
- uint32x4_t iax = vbicq_u32 (vreinterpretq_u32_f32 (x), SignMask);
- float32x4_t ax = vreinterpretq_f32_u32 (iax);
+ float32x4_t ax = vabsq_f32 (x);
+ uint32x4_t iax = vreinterpretq_u32_f32 (ax);
uint32x4_t special = vcgeq_u32 (iax, dat->big_bound);
+ uint32x4_t sign = veorq_u32 (vreinterpretq_u32_f32 (x), iax);
float32x4_t special_arg = x;
#if WANT_SIMD_EXCEPT
@@ -68,13 +75,13 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x)
/* asinh(x) = log(x + sqrt(x * x + 1)).
For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))). */
float32x4_t d
- = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (v_f32 (1), x, x)));
- float32x4_t y = log1pf_inline (
- vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)), dat->log1pf_consts);
+ = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (dat->one, ax, ax)));
+ float32x4_t y = vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d));
if (__glibc_unlikely (v_any_u32 (special)))
- return special_case (special_arg, vbslq_f32 (SignMask, x, y), special);
- return vbslq_f32 (SignMask, x, y);
+ return special_case (special_arg, sign, y, special, dat);
+ return vreinterpretq_f32_u32 (veorq_u32 (
+ sign, vreinterpretq_u32_f32 (log1pf_inline (y, &dat->log1pf_consts))));
}
libmvec_hidden_def (V_NAME_F1 (asinh))
HALF_WIDTH_ALIAS_F1 (asinh)
diff --git a/sysdeps/aarch64/fpu/atanhf_advsimd.c b/sysdeps/aarch64/fpu/atanhf_advsimd.c
index ae488f7b54ddce26..818b6c92adcd48bb 100644
--- a/sysdeps/aarch64/fpu/atanhf_advsimd.c
+++ b/sysdeps/aarch64/fpu/atanhf_advsimd.c
@@ -40,15 +40,17 @@ const static struct data
#define Half v_u32 (0x3f000000)
static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, float32x4_t halfsign, float32x4_t y,
+ uint32x4_t special)
{
- return v_call_f32 (atanhf, x, y, special);
+ return v_call_f32 (atanhf, vbslq_f32 (AbsMask, x, halfsign),
+ vmulq_f32 (halfsign, y), special);
}
/* Approximation for vector single-precision atanh(x) using modified log1p.
- The maximum error is 3.08 ULP:
- __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5
- want 0x1.ffcb82p-5. */
+ The maximum error is 2.93 ULP:
+ _ZGVnN4v_atanhf(0x1.f43d7p-5) got 0x1.f4dcfep-5
+ want 0x1.f4dcf8p-5. */
VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x)
{
const struct data *d = ptr_barrier (&data);
@@ -68,11 +70,19 @@ VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x)
uint32x4_t special = vcgeq_u32 (iax, d->one);
#endif
- float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax));
- y = log1pf_inline (y, d->log1pf_consts);
+ float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax),
+ vsubq_f32 (vreinterpretq_f32_u32 (d->one), ax));
+ y = log1pf_inline (y, &d->log1pf_consts);
+ /* If exceptions not required, pass ax to special-case for shorter dependency
+ chain. If exceptions are required ax will have been zerofied, so have to
+ pass x. */
if (__glibc_unlikely (v_any_u32 (special)))
- return special_case (x, vmulq_f32 (halfsign, y), special);
+#if WANT_SIMD_EXCEPT
+ return special_case (x, halfsign, y, special);
+#else
+ return special_case (ax, halfsign, y, special);
+#endif
return vmulq_f32 (halfsign, y);
}
libmvec_hidden_def (V_NAME_F1 (atanh))
diff --git a/sysdeps/aarch64/fpu/log1pf_advsimd.c b/sysdeps/aarch64/fpu/log1pf_advsimd.c
index dc15334a8537b1fc..f2d47962fe13fbdd 100644
--- a/sysdeps/aarch64/fpu/log1pf_advsimd.c
+++ b/sysdeps/aarch64/fpu/log1pf_advsimd.c
@@ -18,113 +18,78 @@
<https://www.gnu.org/licenses/>. */
#include "v_math.h"
-#include "poly_advsimd_f32.h"
+#include "v_log1pf_inline.h"
+
+#if WANT_SIMD_EXCEPT
const static struct data
{
- float32x4_t poly[8], ln2;
- uint32x4_t tiny_bound, minus_one, four, thresh;
- int32x4_t three_quarters;
+ uint32x4_t minus_one, thresh;
+ struct v_log1pf_data d;
} data = {
- .poly = { /* Generated using FPMinimax in [-0.25, 0.5]. First two coefficients
- (1, -0.5) are not stored as they can be generated more
- efficiently. */
- V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f),
- V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f),
- V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) },
- .ln2 = V4 (0x1.62e43p-1f),
- .tiny_bound = V4 (0x34000000), /* asuint32(0x1p-23). ulp=0.5 at 0x1p-23. */
- .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - tiny_bound. */
+ .d = V_LOG1PF_CONSTANTS_TABLE,
+ .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - TinyBound. */
.minus_one = V4 (0xbf800000),
- .four = V4 (0x40800000),
- .three_quarters = V4 (0x3f400000)
};
-static inline float32x4_t
-eval_poly (float32x4_t m, const float32x4_t *p)
-{
- /* Approximate log(1+m) on [-0.25, 0.5] using split Estrin scheme. */
- float32x4_t p_12 = vfmaq_f32 (v_f32 (-0.5), m, p[0]);
- float32x4_t p_34 = vfmaq_f32 (p[1], m, p[2]);
- float32x4_t p_56 = vfmaq_f32 (p[3], m, p[4]);
- float32x4_t p_78 = vfmaq_f32 (p[5], m, p[6]);
-
- float32x4_t m2 = vmulq_f32 (m, m);
- float32x4_t p_02 = vfmaq_f32 (m, m2, p_12);
- float32x4_t p_36 = vfmaq_f32 (p_34, m2, p_56);
- float32x4_t p_79 = vfmaq_f32 (p_78, m2, p[7]);
-
- float32x4_t m4 = vmulq_f32 (m2, m2);
- float32x4_t p_06 = vfmaq_f32 (p_02, m4, p_36);
- return vfmaq_f32 (p_06, m4, vmulq_f32 (m4, p_79));
-}
+/* asuint32(0x1p-23). ulp=0.5 at 0x1p-23. */
+# define TinyBound v_u32 (0x34000000)
static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, uint32x4_t cmp, const struct data *d)
{
- return v_call_f32 (log1pf, x, y, special);
+ /* Side-step special lanes so fenv exceptions are not triggered
+ inadvertently. */
+ float32x4_t x_nospecial = v_zerofy_f32 (x, cmp);
+ return v_call_f32 (log1pf, x, log1pf_inline (x_nospecial, &d->d), cmp);
}
-/* Vector log1pf approximation using polynomial on reduced interval. Accuracy
- is roughly 2.02 ULP:
- log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */
+/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
+ error is 1.69 ULP:
+ _ZGVnN4v_log1pf(0x1.04418ap-2) got 0x1.cfcbd8p-3
+ want 0x1.cfcbdcp-3. */
VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x)
{
const struct data *d = ptr_barrier (&data);
-
uint32x4_t ix = vreinterpretq_u32_f32 (x);
uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x));
+
uint32x4_t special_cases
- = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, d->tiny_bound), d->thresh),
+ = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, TinyBound), d->thresh),
vcgeq_u32 (ix, d->minus_one));
- float32x4_t special_arg = x;
-#if WANT_SIMD_EXCEPT
if (__glibc_unlikely (v_any_u32 (special_cases)))
- /* Side-step special lanes so fenv exceptions are not triggered
- inadvertently. */
- x = v_zerofy_f32 (x, special_cases);
-#endif
+ return special_case (x, special_cases, d);
- /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
- is in [-0.25, 0.5]):
- log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
-
- We approximate log1p(m) with a polynomial, then scale by
- k*log(2). Instead of doing this directly, we use an intermediate
- scale factor s = 4*k*log(2) to ensure the scale is representable
- as a normalised fp32 number. */
+ return log1pf_inline (x, &d->d);
+}
- float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
+#else
- /* Choose k to scale x to the range [-1/4, 1/2]. */
- int32x4_t k
- = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters),
- v_s32 (0xff800000));
- uint32x4_t ku = vreinterpretq_u32_s32 (k);
+const static struct v_log1pf_data data = V_LOG1PF_CONSTANTS_TABLE;
- /* Scale x by exponent manipulation. */
- float32x4_t m_scale
- = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, uint32x4_t cmp)
+{
+ return v_call_f32 (log1pf, x, log1pf_inline (x, ptr_barrier (&data)), cmp);
+}
- /* Scale up to ensure that the scale factor is representable as normalised
- fp32 number, and scale m down accordingly. */
- float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku));
- m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
+/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
+ error is 1.63 ULP:
+ _ZGVnN4v_log1pf(0x1.216d12p-2) got 0x1.fdcb12p-3
+ want 0x1.fdcb16p-3. */
+VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x)
+{
+ uint32x4_t special_cases = vornq_u32 (vcleq_f32 (x, v_f32 (-1)),
+ vcaleq_f32 (x, v_f32 (0x1p127f)));
- /* Evaluate polynomial on the reduced interval. */
- float32x4_t p = eval_poly (m_scale, d->poly);
+ if (__glibc_unlikely (v_any_u32 (special_cases)))
+ return special_case (x, special_cases);
- /* The scale factor to be applied back at the end - by multiplying float(k)
- by 2^-23 we get the unbiased exponent of k. */
- float32x4_t scale_back = vcvtq_f32_s32 (vshrq_n_s32 (k, 23));
+ return log1pf_inline (x, ptr_barrier (&data));
+}
- /* Apply the scaling back. */
- float32x4_t y = vfmaq_f32 (p, scale_back, d->ln2);
+#endif
- if (__glibc_unlikely (v_any_u32 (special_cases)))
- return special_case (special_arg, y, special_cases);
- return y;
-}
libmvec_hidden_def (V_NAME_F1 (log1p))
HALF_WIDTH_ALIAS_F1 (log1p)
diff --git a/sysdeps/aarch64/fpu/v_log1pf_inline.h b/sysdeps/aarch64/fpu/v_log1pf_inline.h
index 643a6cdcfc498970..73e45a942e24a26f 100644
--- a/sysdeps/aarch64/fpu/v_log1pf_inline.h
+++ b/sysdeps/aarch64/fpu/v_log1pf_inline.h
@@ -25,54 +25,81 @@
struct v_log1pf_data
{
- float32x4_t poly[8], ln2;
uint32x4_t four;
int32x4_t three_quarters;
+ float c0, c3, c5, c7;
+ float32x4_t c4, c6, c1, c2, ln2;
};
/* Polynomial generated using FPMinimax in [-0.25, 0.5]. First two coefficients
(1, -0.5) are not stored as they can be generated more efficiently. */
#define V_LOG1PF_CONSTANTS_TABLE \
{ \
- .poly \
- = { V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f), \
- V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f), \
- V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) }, \
- .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000), \
- .three_quarters = V4 (0x3f400000) \
+ .c0 = 0x1.5555aap-2f, .c1 = V4 (-0x1.000038p-2f), \
+ .c2 = V4 (0x1.99675cp-3f), .c3 = -0x1.54ef78p-3f, \
+ .c4 = V4 (0x1.28a1f4p-3f), .c5 = -0x1.0da91p-3f, \
+ .c6 = V4 (0x1.abcb6p-4f), .c7 = -0x1.6f0d5ep-5f, \
+ .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000), \
+ .three_quarters = V4 (0x3f400000) \
}
static inline float32x4_t
-eval_poly (float32x4_t m, const float32x4_t *c)
+eval_poly (float32x4_t m, const struct v_log1pf_data *d)
{
- /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner (main routine
- uses split Estrin, but this way reduces register pressure in the calling
- routine). */
- float32x4_t q = vfmaq_f32 (v_f32 (-0.5), m, c[0]);
+ /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner. */
+ float32x4_t c0357 = vld1q_f32 (&d->c0);
+ float32x4_t q = vfmaq_laneq_f32 (v_f32 (-0.5), m, c0357, 0);
float32x4_t m2 = vmulq_f32 (m, m);
- q = vfmaq_f32 (m, m2, q);
- float32x4_t p = v_pw_horner_6_f32 (m, m2, c + 1);
+ float32x4_t p67 = vfmaq_laneq_f32 (d->c6, m, c0357, 3);
+ float32x4_t p45 = vfmaq_laneq_f32 (d->c4, m, c0357, 2);
+ float32x4_t p23 = vfmaq_laneq_f32 (d->c2, m, c0357, 1);
+ float32x4_t p = vfmaq_f32 (p45, m2, p67);
+ p = vfmaq_f32 (p23, m2, p);
+ p = vfmaq_f32 (d->c1, m, p);
p = vmulq_f32 (m2, p);
- return vfmaq_f32 (q, m2, p);
+ p = vfmaq_f32 (m, m2, p);
+ return vfmaq_f32 (p, m2, q);
}
static inline float32x4_t
-log1pf_inline (float32x4_t x, const struct v_log1pf_data d)
+log1pf_inline (float32x4_t x, const struct v_log1pf_data *d)
{
- /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no
- special-case handling. See that file for details of the algorithm. */
+ /* Helper for calculating log(x + 1). */
+
+ /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+ is in [-0.25, 0.5]):
+ log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
+
+ We approximate log1p(m) with a polynomial, then scale by
+ k*log(2). Instead of doing this directly, we use an intermediate
+ scale factor s = 4*k*log(2) to ensure the scale is representable
+ as a normalised fp32 number. */
float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
+
+ /* Choose k to scale x to the range [-1/4, 1/2]. */
int32x4_t k
- = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d.three_quarters),
+ = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters),
v_s32 (0xff800000));
uint32x4_t ku = vreinterpretq_u32_s32 (k);
- float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d.four, ku));
+
+ /* Scale up to ensure that the scale factor is representable as normalised
+ fp32 number, and scale m down accordingly. */
+ float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku));
+
+ /* Scale x by exponent manipulation. */
float32x4_t m_scale
= vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
- float32x4_t p = eval_poly (m_scale, d.poly);
+
+ /* Evaluate polynomial on the reduced interval. */
+ float32x4_t p = eval_poly (m_scale, d);
+
+ /* The scale factor to be applied back at the end - by multiplying float(k)
+ by 2^-23 we get the unbiased exponent of k. */
float32x4_t scale_back = vmulq_f32 (vcvtq_f32_s32 (k), v_f32 (0x1.0p-23f));
- return vfmaq_f32 (p, scale_back, d.ln2);
+
+ /* Apply the scaling back. */
+ return vfmaq_f32 (p, scale_back, d->ln2);
}
#endif

261
glibc-RHEL-118273-15.patch Normal file
View File

@ -0,0 +1,261 @@
commit a15b1394b5eba98ffe28a02a392b587e4fe13c0d
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Mon Sep 23 15:30:20 2024 +0100
AArch64: Improve codegen in SVE F32 logs
Reduce MOVPRFXs by using unpredicated (non-destructive) instructions
where possible. Similar to the recent change to AdvSIMD F32 logs,
adjust special-case arguments and bounds to allow for more optimal
register usage. For all 3 routines one MOVPRFX remains in the
reduction, which cannot be avoided as immediate AND and ASR are both
destructive.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/log10f_sve.c b/sysdeps/aarch64/fpu/log10f_sve.c
index bdbb49cd32feccb4..7913679f6795502a 100644
--- a/sysdeps/aarch64/fpu/log10f_sve.c
+++ b/sysdeps/aarch64/fpu/log10f_sve.c
@@ -24,6 +24,7 @@ static const struct data
float poly_0246[4];
float poly_1357[4];
float ln2, inv_ln10;
+ uint32_t off, lower;
} data = {
.poly_1357 = {
/* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs
@@ -35,18 +36,23 @@ static const struct data
-0x1.0fc92cp-4f },
.ln2 = 0x1.62e43p-1f,
.inv_ln10 = 0x1.bcb7b2p-2f,
+ .off = 0x3f2aaaab,
+ /* Lower bound is the smallest positive normal float 0x00800000. For
+ optimised register use subnormals are detected after offset has been
+ subtracted, so lower bound is 0x0080000 - offset (which wraps around). */
+ .lower = 0x00800000 - 0x3f2aaaab
};
-#define Min 0x00800000
-#define Max 0x7f800000
-#define Thres 0x7f000000 /* Max - Min. */
-#define Offset 0x3f2aaaab /* 0.666667. */
+#define Thres 0x7f000000 /* asuint32(inf) - 0x00800000. */
#define MantissaMask 0x007fffff
static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y,
+ svbool_t cmp)
{
- return sv_call_f32 (log10f, x, y, special);
+ return sv_call_f32 (
+ log10f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)),
+ svmla_x (svptrue_b32 (), p, r2, y), cmp);
}
/* Optimised implementation of SVE log10f using the same algorithm and
@@ -57,23 +63,25 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
- svuint32_t ix = svreinterpret_u32 (x);
- svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres);
+
+ svuint32_t u_off = svreinterpret_u32 (x);
+
+ u_off = svsub_x (pg, u_off, d->off);
+ svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thres);
/* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
- ix = svsub_x (pg, ix, Offset);
svfloat32_t n = svcvt_f32_x (
- pg, svasr_x (pg, svreinterpret_s32 (ix), 23)); /* signextend. */
- ix = svand_x (pg, ix, MantissaMask);
- ix = svadd_x (pg, ix, Offset);
+ pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* signextend. */
+ svuint32_t ix = svand_x (pg, u_off, MantissaMask);
+ ix = svadd_x (pg, ix, d->off);
svfloat32_t r = svsub_x (pg, svreinterpret_f32 (ix), 1.0f);
/* y = log10(1+r) + n*log10(2)
log10(1+r) ~ r * InvLn(10) + P(r)
where P(r) is a polynomial. Use order 9 for log10(1+x), i.e. order 8 for
log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3). */
- svfloat32_t r2 = svmul_x (pg, r, r);
- svfloat32_t r4 = svmul_x (pg, r2, r2);
+ svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+ svfloat32_t r4 = svmul_x (svptrue_b32 (), r2, r2);
svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]);
svfloat32_t q_01 = svmla_lane (sv_f32 (d->poly_0246[0]), r, p_1357, 0);
svfloat32_t q_23 = svmla_lane (sv_f32 (d->poly_0246[1]), r, p_1357, 1);
@@ -88,7 +96,6 @@ svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg)
hi = svmul_x (pg, hi, d->inv_ln10);
if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y),
- special);
- return svmla_x (pg, hi, r2, y);
+ return special_case (u_off, hi, r2, y, special);
+ return svmla_x (svptrue_b32 (), hi, r2, y);
}
diff --git a/sysdeps/aarch64/fpu/log2f_sve.c b/sysdeps/aarch64/fpu/log2f_sve.c
index 5031c4248359295e..939d89bfb9a95a11 100644
--- a/sysdeps/aarch64/fpu/log2f_sve.c
+++ b/sysdeps/aarch64/fpu/log2f_sve.c
@@ -23,6 +23,7 @@ static const struct data
{
float poly_02468[5];
float poly_1357[4];
+ uint32_t off, lower;
} data = {
.poly_1357 = {
/* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs
@@ -32,18 +33,23 @@ static const struct data
},
.poly_02468 = { 0x1.715476p0f, 0x1.ec701cp-2f, 0x1.27a0b8p-2f,
0x1.9d8ecap-3f, 0x1.9e495p-3f },
+ .off = 0x3f2aaaab,
+ /* Lower bound is the smallest positive normal float 0x00800000. For
+ optimised register use subnormals are detected after offset has been
+ subtracted, so lower bound is 0x0080000 - offset (which wraps around). */
+ .lower = 0x00800000 - 0x3f2aaaab
};
-#define Min (0x00800000)
-#define Max (0x7f800000)
-#define Thres (0x7f000000) /* Max - Min. */
+#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000. */
#define MantissaMask (0x007fffff)
-#define Off (0x3f2aaaab) /* 0.666667. */
static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y,
+ svbool_t cmp)
{
- return sv_call_f32 (log2f, x, y, cmp);
+ return sv_call_f32 (
+ log2f, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)),
+ svmla_x (svptrue_b32 (), p, r2, y), cmp);
}
/* Optimised implementation of SVE log2f, using the same algorithm
@@ -55,19 +61,20 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
- svuint32_t u = svreinterpret_u32 (x);
- svbool_t special = svcmpge (pg, svsub_x (pg, u, Min), Thres);
+ svuint32_t u_off = svreinterpret_u32 (x);
+
+ u_off = svsub_x (pg, u_off, d->off);
+ svbool_t special = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh);
/* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
- u = svsub_x (pg, u, Off);
svfloat32_t n = svcvt_f32_x (
- pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend. */
- u = svand_x (pg, u, MantissaMask);
- u = svadd_x (pg, u, Off);
+ pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend. */
+ svuint32_t u = svand_x (pg, u_off, MantissaMask);
+ u = svadd_x (pg, u, d->off);
svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
/* y = log2(1+r) + n. */
- svfloat32_t r2 = svmul_x (pg, r, r);
+ svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
/* Evaluate polynomial using pairwise Horner scheme. */
svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]);
@@ -81,6 +88,6 @@ svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
y = svmla_x (pg, q_01, r2, y);
if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, svmla_x (svnot_z (pg, special), n, r, y), special);
- return svmla_x (pg, n, r, y);
+ return special_case (u_off, n, r, y, special);
+ return svmla_x (svptrue_b32 (), n, r, y);
}
diff --git a/sysdeps/aarch64/fpu/logf_sve.c b/sysdeps/aarch64/fpu/logf_sve.c
index d64e810cfec9aa19..5b9324678d99455b 100644
--- a/sysdeps/aarch64/fpu/logf_sve.c
+++ b/sysdeps/aarch64/fpu/logf_sve.c
@@ -24,6 +24,7 @@ static const struct data
float poly_0135[4];
float poly_246[3];
float ln2;
+ uint32_t off, lower;
} data = {
.poly_0135 = {
/* Coefficients copied from the AdvSIMD routine in math/, then rearranged so
@@ -32,19 +33,24 @@ static const struct data
-0x1.3e737cp-3f, 0x1.5a9aa2p-3f, 0x1.961348p-3f, 0x1.555d7cp-2f
},
.poly_246 = { -0x1.4f9934p-3f, -0x1.00187cp-2f, -0x1.ffffc8p-2f },
- .ln2 = 0x1.62e43p-1f
+ .ln2 = 0x1.62e43p-1f,
+ .off = 0x3f2aaaab,
+ /* Lower bound is the smallest positive normal float 0x00800000. For
+ optimised register use subnormals are detected after offset has been
+ subtracted, so lower bound is 0x0080000 - offset (which wraps around). */
+ .lower = 0x00800000 - 0x3f2aaaab
};
-#define Min (0x00800000)
-#define Max (0x7f800000)
-#define Thresh (0x7f000000) /* Max - Min. */
+#define Thresh (0x7f000000) /* asuint32(inf) - 0x00800000. */
#define Mask (0x007fffff)
-#define Off (0x3f2aaaab) /* 0.666667. */
static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+special_case (svuint32_t u_off, svfloat32_t p, svfloat32_t r2, svfloat32_t y,
+ svbool_t cmp)
{
- return sv_call_f32 (logf, x, y, cmp);
+ return sv_call_f32 (
+ logf, svreinterpret_f32 (svadd_x (svptrue_b32 (), u_off, data.off)),
+ svmla_x (svptrue_b32 (), p, r2, y), cmp);
}
/* Optimised implementation of SVE logf, using the same algorithm and
@@ -55,19 +61,21 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
- svuint32_t u = svreinterpret_u32 (x);
- svbool_t cmp = svcmpge (pg, svsub_x (pg, u, Min), Thresh);
+ svuint32_t u_off = svreinterpret_u32 (x);
+
+ u_off = svsub_x (pg, u_off, d->off);
+ svbool_t cmp = svcmpge (pg, svsub_x (pg, u_off, d->lower), Thresh);
/* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
- u = svsub_x (pg, u, Off);
svfloat32_t n = svcvt_f32_x (
- pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend. */
- u = svand_x (pg, u, Mask);
- u = svadd_x (pg, u, Off);
+ pg, svasr_x (pg, svreinterpret_s32 (u_off), 23)); /* Sign-extend. */
+
+ svuint32_t u = svand_x (pg, u_off, Mask);
+ u = svadd_x (pg, u, d->off);
svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
/* y = log(1+r) + n*ln2. */
- svfloat32_t r2 = svmul_x (pg, r, r);
+ svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
/* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))). */
svfloat32_t p_0135 = svld1rq (svptrue_b32 (), &d->poly_0135[0]);
svfloat32_t p = svmla_lane (sv_f32 (d->poly_246[0]), r, p_0135, 1);
@@ -80,6 +88,6 @@ svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
p = svmla_x (pg, r, n, d->ln2);
if (__glibc_unlikely (svptest_any (pg, cmp)))
- return special_case (x, svmla_x (svnot_z (pg, cmp), p, r2, y), cmp);
+ return special_case (u_off, p, r2, y, cmp);
return svmla_x (pg, p, r2, y);
}

467
glibc-RHEL-118273-16.patch Normal file
View File

@ -0,0 +1,467 @@
commit 7b8c134b5460ed933d610fa92ed1227372b68fdc
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Mon Sep 23 15:26:12 2024 +0100
AArch64: Improve codegen in SVE expf & related routines
Reduce MOV and MOVPRFX by improving special-case handling. Use inline
helper to duplicate the entire computation between the special- and
non-special case branches, removing the contention for z0 between x
and the return value.
Also rearrange some MLAs and MLSs - by making the multiplicand the
destination we can avoid a MOVPRFX in several cases. Also change which
constants go in the vector used for lanewise ops - the last lane is no
longer wasted.
Spotted that shift was incorrect in exp2f and exp10f, w.r.t. to the
comment that explains it. Fixed - worst-case ULP for exp2f moves
around but it doesn't change significantly for either routine.
Worst-case error for coshf increases due to passing x to exp rather
than abs(x) - updated the comment, but does not require regen-ulps.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/coshf_sve.c b/sysdeps/aarch64/fpu/coshf_sve.c
index e5d8a299c6aa7ceb..7ad6efa0fc218278 100644
--- a/sysdeps/aarch64/fpu/coshf_sve.c
+++ b/sysdeps/aarch64/fpu/coshf_sve.c
@@ -23,37 +23,42 @@
static const struct data
{
struct sv_expf_data expf_consts;
- uint32_t special_bound;
+ float special_bound;
} data = {
.expf_consts = SV_EXPF_DATA,
/* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */
- .special_bound = 0x42ad496c,
+ .special_bound = 0x1.5a92d8p+6,
};
static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t pg)
+special_case (svfloat32_t x, svfloat32_t half_e, svfloat32_t half_over_e,
+ svbool_t pg)
{
- return sv_call_f32 (coshf, x, y, pg);
+ return sv_call_f32 (coshf, x, svadd_x (svptrue_b32 (), half_e, half_over_e),
+ pg);
}
/* Single-precision vector cosh, using vector expf.
- Maximum error is 1.89 ULP:
- _ZGVsMxv_coshf (-0x1.65898cp+6) got 0x1.f00aep+127
- want 0x1.f00adcp+127. */
+ Maximum error is 2.77 ULP:
+ _ZGVsMxv_coshf(-0x1.5b38f4p+1) got 0x1.e45946p+2
+ want 0x1.e4594cp+2. */
svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
- svfloat32_t ax = svabs_x (pg, x);
- svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->special_bound);
+ svbool_t special = svacge (pg, x, d->special_bound);
- /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */
- svfloat32_t t = expf_inline (ax, pg, &d->expf_consts);
- svfloat32_t half_t = svmul_x (pg, t, 0.5);
- svfloat32_t half_over_t = svdivr_x (pg, t, 0.5);
+ /* Calculate cosh by exp(x) / 2 + exp(-x) / 2.
+ Note that x is passed to exp here, rather than |x|. This is to avoid using
+ destructive unary ABS for better register usage. However it means the
+ routine is not exactly symmetrical, as the exp helper is slightly less
+ accurate in the negative range. */
+ svfloat32_t e = expf_inline (x, pg, &d->expf_consts);
+ svfloat32_t half_e = svmul_x (svptrue_b32 (), e, 0.5);
+ svfloat32_t half_over_e = svdivr_x (pg, e, 0.5);
if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, svadd_x (pg, half_t, half_over_t), special);
+ return special_case (x, half_e, half_over_e, special);
- return svadd_x (pg, half_t, half_over_t);
+ return svadd_x (svptrue_b32 (), half_e, half_over_e);
}
diff --git a/sysdeps/aarch64/fpu/exp10f_sve.c b/sysdeps/aarch64/fpu/exp10f_sve.c
index e09b2f3b2705515a..8aa3fa9c4335cfb8 100644
--- a/sysdeps/aarch64/fpu/exp10f_sve.c
+++ b/sysdeps/aarch64/fpu/exp10f_sve.c
@@ -18,74 +18,83 @@
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
-#include "poly_sve_f32.h"
-/* For x < -SpecialBound, the result is subnormal and not handled correctly by
+/* For x < -Thres, the result is subnormal and not handled correctly by
FEXPA. */
-#define SpecialBound 37.9
+#define Thres 37.9
static const struct data
{
- float poly[5];
- float shift, log10_2, log2_10_hi, log2_10_lo, special_bound;
+ float log2_10_lo, c0, c2, c4;
+ float c1, c3, log10_2;
+ float shift, log2_10_hi, thres;
} data = {
/* Coefficients generated using Remez algorithm with minimisation of relative
error.
rel error: 0x1.89dafa3p-24
abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
maxerr: 0.52 +0.5 ulp. */
- .poly = { 0x1.26bb16p+1f, 0x1.5350d2p+1f, 0x1.04744ap+1f, 0x1.2d8176p+0f,
- 0x1.12b41ap-1f },
+ .c0 = 0x1.26bb16p+1f,
+ .c1 = 0x1.5350d2p+1f,
+ .c2 = 0x1.04744ap+1f,
+ .c3 = 0x1.2d8176p+0f,
+ .c4 = 0x1.12b41ap-1f,
/* 1.5*2^17 + 127, a shift value suitable for FEXPA. */
- .shift = 0x1.903f8p17f,
+ .shift = 0x1.803f8p17f,
.log10_2 = 0x1.a934fp+1,
.log2_10_hi = 0x1.344136p-2,
.log2_10_lo = -0x1.ec10cp-27,
- .special_bound = SpecialBound,
+ .thres = Thres,
};
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+static inline svfloat32_t
+sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
{
- return sv_call_f32 (exp10f, x, y, special);
-}
-
-/* Single-precision SVE exp10f routine. Implements the same algorithm
- as AdvSIMD exp10f.
- Worst case error is 1.02 ULPs.
- _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
- want 0x1.ba5f9cp-1. */
-svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
-{
- const struct data *d = ptr_barrier (&data);
/* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
with poly(r) in [1/sqrt(2), sqrt(2)] and
x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N]. */
- /* Load some constants in quad-word chunks to minimise memory access (last
- lane is wasted). */
- svfloat32_t log10_2_and_inv = svld1rq (svptrue_b32 (), &d->log10_2);
+ svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log2_10_lo);
/* n = round(x/(log10(2)/N)). */
svfloat32_t shift = sv_f32 (d->shift);
- svfloat32_t z = svmla_lane (shift, x, log10_2_and_inv, 0);
- svfloat32_t n = svsub_x (pg, z, shift);
+ svfloat32_t z = svmad_x (pg, sv_f32 (d->log10_2), x, shift);
+ svfloat32_t n = svsub_x (svptrue_b32 (), z, shift);
/* r = x - n*log10(2)/N. */
- svfloat32_t r = svmls_lane (x, n, log10_2_and_inv, 1);
- r = svmls_lane (r, n, log10_2_and_inv, 2);
+ svfloat32_t r = svmsb_x (pg, sv_f32 (d->log2_10_hi), n, x);
+ r = svmls_lane (r, n, lane_consts, 0);
- svbool_t special = svacgt (pg, x, d->special_bound);
svfloat32_t scale = svexpa (svreinterpret_u32 (z));
/* Polynomial evaluation: poly(r) ~ exp10(r)-1. */
- svfloat32_t r2 = svmul_x (pg, r, r);
- svfloat32_t poly
- = svmla_x (pg, svmul_x (pg, r, d->poly[0]),
- sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1), r2);
-
- if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, svmla_x (pg, scale, scale, poly), special);
+ svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
+ svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
+ svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+ svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
+ svfloat32_t p0 = svmul_lane (r, lane_consts, 1);
+ svfloat32_t poly = svmla_x (pg, p0, r2, p14);
return svmla_x (pg, scale, scale, poly);
}
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t special, const struct data *d)
+{
+ return sv_call_f32 (exp10f, x, sv_exp10f_inline (x, svptrue_b32 (), d),
+ special);
+}
+
+/* Single-precision SVE exp10f routine. Implements the same algorithm
+ as AdvSIMD exp10f.
+ Worst case error is 1.02 ULPs.
+ _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
+ want 0x1.ba5f9cp-1. */
+svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+ svbool_t special = svacgt (pg, x, d->thres);
+ if (__glibc_unlikely (svptest_any (special, special)))
+ return special_case (x, special, d);
+ return sv_exp10f_inline (x, pg, d);
+}
diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c
index 8a686e3e054cb7f5..c6216bed9e9e7538 100644
--- a/sysdeps/aarch64/fpu/exp2f_sve.c
+++ b/sysdeps/aarch64/fpu/exp2f_sve.c
@@ -24,54 +24,64 @@
static const struct data
{
- float poly[5];
+ float c0, c2, c4, c1, c3;
float shift, thres;
} data = {
- /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
- compatibility with polynomial helpers. */
- .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f,
- 0x1.59977ap-10f },
+ /* Coefficients copied from the polynomial in AdvSIMD variant. */
+ .c0 = 0x1.62e422p-1f,
+ .c1 = 0x1.ebf9bcp-3f,
+ .c2 = 0x1.c6bd32p-5f,
+ .c3 = 0x1.3ce9e4p-7f,
+ .c4 = 0x1.59977ap-10f,
/* 1.5*2^17 + 127. */
- .shift = 0x1.903f8p17f,
+ .shift = 0x1.803f8p17f,
/* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
correctly by FEXPA. */
.thres = Thres,
};
-static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
-{
- return sv_call_f32 (exp2f, x, y, special);
-}
-
-/* Single-precision SVE exp2f routine. Implements the same algorithm
- as AdvSIMD exp2f.
- Worst case error is 1.04 ULPs.
- SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0
- want 0x1.ba7ebp+0. */
-svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
+static inline svfloat32_t
+sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
{
- const struct data *d = ptr_barrier (&data);
/* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
x = n + r, with r in [-1/2, 1/2]. */
- svfloat32_t shift = sv_f32 (d->shift);
- svfloat32_t z = svadd_x (pg, x, shift);
- svfloat32_t n = svsub_x (pg, z, shift);
- svfloat32_t r = svsub_x (pg, x, n);
+ svfloat32_t z = svadd_x (svptrue_b32 (), x, d->shift);
+ svfloat32_t n = svsub_x (svptrue_b32 (), z, d->shift);
+ svfloat32_t r = svsub_x (svptrue_b32 (), x, n);
- svbool_t special = svacgt (pg, x, d->thres);
svfloat32_t scale = svexpa (svreinterpret_u32 (z));
/* Polynomial evaluation: poly(r) ~ exp2(r)-1.
Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for
coefficients 1 to 4, and apply most significant coefficient directly. */
- svfloat32_t r2 = svmul_x (pg, r, r);
- svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1);
- svfloat32_t p0 = svmul_x (pg, r, d->poly[0]);
+ svfloat32_t even_coeffs = svld1rq (svptrue_b32 (), &d->c0);
+ svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
+ svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, even_coeffs, 1);
+ svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, even_coeffs, 2);
+ svfloat32_t p14 = svmla_x (pg, p12, r2, p34);
+ svfloat32_t p0 = svmul_lane (r, even_coeffs, 0);
svfloat32_t poly = svmla_x (pg, p0, r2, p14);
- if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, svmla_x (pg, scale, scale, poly), special);
-
return svmla_x (pg, scale, scale, poly);
}
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t special, const struct data *d)
+{
+ return sv_call_f32 (exp2f, x, sv_exp2f_inline (x, svptrue_b32 (), d),
+ special);
+}
+
+/* Single-precision SVE exp2f routine. Implements the same algorithm
+ as AdvSIMD exp2f.
+ Worst case error is 1.04 ULPs.
+ _ZGVsMxv_exp2f(-0x1.af994ap-3) got 0x1.ba6a66p-1
+ want 0x1.ba6a64p-1. */
+svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+ svbool_t special = svacgt (pg, x, d->thres);
+ if (__glibc_unlikely (svptest_any (special, special)))
+ return special_case (x, special, d);
+ return sv_exp2f_inline (x, pg, d);
+}
diff --git a/sysdeps/aarch64/fpu/expf_sve.c b/sysdeps/aarch64/fpu/expf_sve.c
index 3ba79bc4f11a05f9..da93e01b87e0e890 100644
--- a/sysdeps/aarch64/fpu/expf_sve.c
+++ b/sysdeps/aarch64/fpu/expf_sve.c
@@ -18,33 +18,25 @@
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
+#include "sv_expf_inline.h"
+
+/* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
+ correctly by FEXPA. */
+#define Thres 0x1.5d5e2ap+6f
static const struct data
{
- float poly[5];
- float inv_ln2, ln2_hi, ln2_lo, shift, thres;
+ struct sv_expf_data d;
+ float thres;
} data = {
- /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
- compatibility with polynomial helpers. */
- .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f,
- 0x1.0e4020p-7f },
- .inv_ln2 = 0x1.715476p+0f,
- .ln2_hi = 0x1.62e4p-1f,
- .ln2_lo = 0x1.7f7d1cp-20f,
- /* 1.5*2^17 + 127. */
- .shift = 0x1.903f8p17f,
- /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
- correctly by FEXPA. */
- .thres = 0x1.5d5e2ap+6f,
+ .d = SV_EXPF_DATA,
+ .thres = Thres,
};
-#define C(i) sv_f32 (d->poly[i])
-#define ExponentBias 0x3f800000
-
static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d)
{
- return sv_call_f32 (expf, x, y, special);
+ return sv_call_f32 (expf, x, expf_inline (x, svptrue_b32 (), d), special);
}
/* Optimised single-precision SVE exp function.
@@ -54,36 +46,8 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
-
- /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
- x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
-
- /* Load some constants in quad-word chunks to minimise memory access (last
- lane is wasted). */
- svfloat32_t invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->inv_ln2);
-
- /* n = round(x/(ln2/N)). */
- svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, invln2_and_ln2, 0);
- svfloat32_t n = svsub_x (pg, z, d->shift);
-
- /* r = x - n*ln2/N. */
- svfloat32_t r = svmls_lane (x, n, invln2_and_ln2, 1);
- r = svmls_lane (r, n, invln2_and_ln2, 2);
-
- /* scale = 2^(n/N). */
svbool_t is_special_case = svacgt (pg, x, d->thres);
- svfloat32_t scale = svexpa (svreinterpret_u32 (z));
-
- /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */
- svfloat32_t p12 = svmla_x (pg, C (1), C (2), r);
- svfloat32_t p34 = svmla_x (pg, C (3), C (4), r);
- svfloat32_t r2 = svmul_x (pg, r, r);
- svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
- svfloat32_t p0 = svmul_x (pg, r, C (0));
- svfloat32_t poly = svmla_x (pg, p0, r2, p14);
-
if (__glibc_unlikely (svptest_any (pg, is_special_case)))
- return special_case (x, svmla_x (pg, scale, scale, poly), is_special_case);
-
- return svmla_x (pg, scale, scale, poly);
+ return special_case (x, is_special_case, &d->d);
+ return expf_inline (x, pg, &d->d);
}
diff --git a/sysdeps/aarch64/fpu/sv_expf_inline.h b/sysdeps/aarch64/fpu/sv_expf_inline.h
index 23963b5f8ec89ead..6166df65533555a6 100644
--- a/sysdeps/aarch64/fpu/sv_expf_inline.h
+++ b/sysdeps/aarch64/fpu/sv_expf_inline.h
@@ -24,19 +24,20 @@
struct sv_expf_data
{
- float poly[5];
- float inv_ln2, ln2_hi, ln2_lo, shift;
+ float c1, c3, inv_ln2;
+ float ln2_lo, c0, c2, c4;
+ float ln2_hi, shift;
};
/* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
compatibility with polynomial helpers. Shift is 1.5*2^17 + 127. */
#define SV_EXPF_DATA \
{ \
- .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, \
- 0x1.0e4020p-7f }, \
- \
- .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \
- .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f, \
+ /* Coefficients copied from the polynomial in AdvSIMD variant. */ \
+ .c0 = 0x1.ffffecp-1f, .c1 = 0x1.fffdb6p-2f, .c2 = 0x1.555e66p-3f, \
+ .c3 = 0x1.573e2ep-5f, .c4 = 0x1.0e4020p-7f, .inv_ln2 = 0x1.715476p+0f, \
+ .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \
+ .shift = 0x1.803f8p17f, \
}
#define C(i) sv_f32 (d->poly[i])
@@ -47,26 +48,25 @@ expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
/* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
- /* Load some constants in quad-word chunks to minimise memory access. */
- svfloat32_t c4_invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->poly[4]);
+ svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2_lo);
/* n = round(x/(ln2/N)). */
- svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, c4_invln2_and_ln2, 1);
+ svfloat32_t z = svmad_x (pg, sv_f32 (d->inv_ln2), x, d->shift);
svfloat32_t n = svsub_x (pg, z, d->shift);
/* r = x - n*ln2/N. */
- svfloat32_t r = svmls_lane (x, n, c4_invln2_and_ln2, 2);
- r = svmls_lane (r, n, c4_invln2_and_ln2, 3);
+ svfloat32_t r = svmsb_x (pg, sv_f32 (d->ln2_hi), n, x);
+ r = svmls_lane (r, n, lane_consts, 0);
/* scale = 2^(n/N). */
- svfloat32_t scale = svexpa (svreinterpret_u32_f32 (z));
+ svfloat32_t scale = svexpa (svreinterpret_u32 (z));
/* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */
- svfloat32_t p12 = svmla_x (pg, C (1), C (2), r);
- svfloat32_t p34 = svmla_lane (C (3), r, c4_invln2_and_ln2, 0);
- svfloat32_t r2 = svmul_f32_x (pg, r, r);
+ svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
+ svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
+ svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
- svfloat32_t p0 = svmul_f32_x (pg, r, C (0));
+ svfloat32_t p0 = svmul_lane (r, lane_consts, 1);
svfloat32_t poly = svmla_x (pg, p0, r2, p14);
return svmla_x (pg, scale, scale, poly);

124
glibc-RHEL-118273-17.patch Normal file
View File

@ -0,0 +1,124 @@
commit 1cf29fbc5be23db775d1dfa6b332ded6e6554252
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Mon Oct 28 14:58:35 2024 +0000
AArch64: Small optimisation in AdvSIMD erf and erfc
In both routines, reduce register pressure such that GCC 14 emits no
spills for erf and fewer spills for erfc. Also use more efficient
comparison for the special-case in erf.
Benchtests show erf improves by 6.4%, erfc by 1.0%.
diff --git a/sysdeps/aarch64/fpu/erf_advsimd.c b/sysdeps/aarch64/fpu/erf_advsimd.c
index 19cbb7d0f42eb4e2..c0116735e408066d 100644
--- a/sysdeps/aarch64/fpu/erf_advsimd.c
+++ b/sysdeps/aarch64/fpu/erf_advsimd.c
@@ -22,19 +22,21 @@
static const struct data
{
float64x2_t third;
- float64x2_t tenth, two_over_five, two_over_fifteen;
- float64x2_t two_over_nine, two_over_fortyfive;
+ float64x2_t tenth, two_over_five, two_over_nine;
+ double two_over_fifteen, two_over_fortyfive;
float64x2_t max, shift;
+ uint64x2_t max_idx;
#if WANT_SIMD_EXCEPT
float64x2_t tiny_bound, huge_bound, scale_minus_one;
#endif
} data = {
+ .max_idx = V2 (768),
.third = V2 (0x1.5555555555556p-2), /* used to compute 2/3 and 1/6 too. */
- .two_over_fifteen = V2 (0x1.1111111111111p-3),
+ .two_over_fifteen = 0x1.1111111111111p-3,
.tenth = V2 (-0x1.999999999999ap-4),
.two_over_five = V2 (-0x1.999999999999ap-2),
.two_over_nine = V2 (-0x1.c71c71c71c71cp-3),
- .two_over_fortyfive = V2 (0x1.6c16c16c16c17p-5),
+ .two_over_fortyfive = 0x1.6c16c16c16c17p-5,
.max = V2 (5.9921875), /* 6 - 1/128. */
.shift = V2 (0x1p45),
#if WANT_SIMD_EXCEPT
@@ -87,8 +89,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x)
float64x2_t a = vabsq_f64 (x);
/* Reciprocal conditions that do not catch NaNs so they can be used in BSLs
to return expected results. */
- uint64x2_t a_le_max = vcleq_f64 (a, dat->max);
- uint64x2_t a_gt_max = vcgtq_f64 (a, dat->max);
+ uint64x2_t a_le_max = vcaleq_f64 (x, dat->max);
+ uint64x2_t a_gt_max = vcagtq_f64 (x, dat->max);
#if WANT_SIMD_EXCEPT
/* |x| huge or tiny. */
@@ -115,7 +117,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x)
segfault. */
uint64x2_t i
= vsubq_u64 (vreinterpretq_u64_f64 (z), vreinterpretq_u64_f64 (shift));
- i = vbslq_u64 (a_le_max, i, v_u64 (768));
+ i = vbslq_u64 (a_le_max, i, dat->max_idx);
struct entry e = lookup (i);
float64x2_t r = vsubq_f64 (z, shift);
@@ -125,14 +127,19 @@ float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x)
float64x2_t d2 = vmulq_f64 (d, d);
float64x2_t r2 = vmulq_f64 (r, r);
+ float64x2_t two_over_fifteen_and_fortyfive
+ = vld1q_f64 (&dat->two_over_fifteen);
+
/* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p5(r) * d^5. */
float64x2_t p1 = r;
float64x2_t p2
= vfmsq_f64 (dat->third, r2, vaddq_f64 (dat->third, dat->third));
float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->third));
- float64x2_t p4 = vfmaq_f64 (dat->two_over_five, r2, dat->two_over_fifteen);
+ float64x2_t p4 = vfmaq_laneq_f64 (dat->two_over_five, r2,
+ two_over_fifteen_and_fortyfive, 0);
p4 = vfmsq_f64 (dat->tenth, r2, p4);
- float64x2_t p5 = vfmaq_f64 (dat->two_over_nine, r2, dat->two_over_fortyfive);
+ float64x2_t p5 = vfmaq_laneq_f64 (dat->two_over_nine, r2,
+ two_over_fifteen_and_fortyfive, 1);
p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->third), r2, p5));
float64x2_t p34 = vfmaq_f64 (p3, d, p4);
diff --git a/sysdeps/aarch64/fpu/erfc_advsimd.c b/sysdeps/aarch64/fpu/erfc_advsimd.c
index f1b3bfe8304c73b5..2f2f755c46e71b58 100644
--- a/sysdeps/aarch64/fpu/erfc_advsimd.c
+++ b/sysdeps/aarch64/fpu/erfc_advsimd.c
@@ -24,8 +24,8 @@ static const struct data
{
uint64x2_t offset, table_scale;
float64x2_t max, shift;
- float64x2_t p20, p40, p41, p42;
- float64x2_t p51, p52;
+ float64x2_t p20, p40, p41, p51;
+ double p42, p52;
double qr5[2], qr6[2], qr7[2], qr8[2], qr9[2];
#if WANT_SIMD_EXCEPT
float64x2_t uflow_bound;
@@ -41,9 +41,9 @@ static const struct data
.p20 = V2 (0x1.5555555555555p-2), /* 1/3, used to compute 2/3 and 1/6. */
.p40 = V2 (-0x1.999999999999ap-4), /* 1/10. */
.p41 = V2 (-0x1.999999999999ap-2), /* 2/5. */
- .p42 = V2 (0x1.1111111111111p-3), /* 2/15. */
+ .p42 = 0x1.1111111111111p-3, /* 2/15. */
.p51 = V2 (-0x1.c71c71c71c71cp-3), /* 2/9. */
- .p52 = V2 (0x1.6c16c16c16c17p-5), /* 2/45. */
+ .p52 = 0x1.6c16c16c16c17p-5, /* 2/45. */
/* Qi = (i+1) / i, Ri = -2 * i / ((i+1)*(i+2)), for i = 5, ..., 9. */
.qr5 = { 0x1.3333333333333p0, -0x1.e79e79e79e79ep-3 },
.qr6 = { 0x1.2aaaaaaaaaaabp0, -0x1.b6db6db6db6dbp-3 },
@@ -157,9 +157,10 @@ float64x2_t V_NAME_D1 (erfc) (float64x2_t x)
float64x2_t p1 = r;
float64x2_t p2 = vfmsq_f64 (dat->p20, r2, vaddq_f64 (dat->p20, dat->p20));
float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->p20));
- float64x2_t p4 = vfmaq_f64 (dat->p41, r2, dat->p42);
+ float64x2_t p42_p52 = vld1q_f64 (&dat->p42);
+ float64x2_t p4 = vfmaq_laneq_f64 (dat->p41, r2, p42_p52, 0);
p4 = vfmsq_f64 (dat->p40, r2, p4);
- float64x2_t p5 = vfmaq_f64 (dat->p51, r2, dat->p52);
+ float64x2_t p5 = vfmaq_laneq_f64 (dat->p51, r2, p42_p52, 1);
p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->p20), r2, p5));
/* Compute p_i using recurrence relation:
p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}. */

2973
glibc-RHEL-118273-18.patch Normal file

File diff suppressed because it is too large Load Diff

461
glibc-RHEL-118273-19.patch Normal file
View File

@ -0,0 +1,461 @@
commit 13a7ef5999de56add448a24fefb0250236271a06
Author: Pierre Blanchard <pierre.blanchard@arm.com>
Date: Mon Dec 9 15:58:47 2024 +0000
AArch64: Improve codegen in users of ADVSIMD expm1 helper
Add inline helper for expm1 and rearrange operations so MOV
is not necessary in reduction or around the special-case handler.
Reduce memory access by using more indexed MLAs in polynomial.
Speedup on Neoverse V1 for expm1 (19%), sinh (8.5%), and tanh (7.5%).
diff --git a/sysdeps/aarch64/fpu/expm1_advsimd.c b/sysdeps/aarch64/fpu/expm1_advsimd.c
index 3db3b80c49292947..f2042db8bcc8466a 100644
--- a/sysdeps/aarch64/fpu/expm1_advsimd.c
+++ b/sysdeps/aarch64/fpu/expm1_advsimd.c
@@ -18,31 +18,18 @@
<https://www.gnu.org/licenses/>. */
#include "v_math.h"
-#include "poly_advsimd_f64.h"
+#include "v_expm1_inline.h"
static const struct data
{
- float64x2_t poly[11];
- float64x2_t invln2;
- double ln2[2];
- float64x2_t shift;
- int64x2_t exponent_bias;
+ struct v_expm1_data d;
#if WANT_SIMD_EXCEPT
uint64x2_t thresh, tiny_bound;
#else
float64x2_t oflow_bound;
#endif
} data = {
- /* Generated using fpminimax, with degree=12 in [log(2)/2, log(2)/2]. */
- .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
- V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
- V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
- V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
- V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29) },
- .invln2 = V2 (0x1.71547652b82fep0),
- .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 },
- .shift = V2 (0x1.8p52),
- .exponent_bias = V2 (0x3ff0000000000000),
+ .d = V_EXPM1_DATA,
#if WANT_SIMD_EXCEPT
/* asuint64(oflow_bound) - asuint64(0x1p-51), shifted left by 1 for abs
compare. */
@@ -58,67 +45,36 @@ static const struct data
};
static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+special_case (float64x2_t x, uint64x2_t special, const struct data *d)
{
- return v_call_f64 (expm1, x, y, special);
+ return v_call_f64 (expm1, x, expm1_inline (v_zerofy_f64 (x, special), &d->d),
+ special);
}
/* Double-precision vector exp(x) - 1 function.
- The maximum error observed error is 2.18 ULP:
- _ZGVnN2v_expm1 (0x1.634ba0c237d7bp-2) got 0x1.a8b9ea8d66e22p-2
- want 0x1.a8b9ea8d66e2p-2. */
+ The maximum error observed error is 2.05 ULP:
+ _ZGVnN2v_expm1(0x1.634902eaff3adp-2) got 0x1.a8b636e2a9388p-2
+ want 0x1.a8b636e2a9386p-2. */
float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
{
const struct data *d = ptr_barrier (&data);
- uint64x2_t ix = vreinterpretq_u64_f64 (x);
-
#if WANT_SIMD_EXCEPT
+ uint64x2_t ix = vreinterpretq_u64_f64 (x);
/* If fp exceptions are to be triggered correctly, fall back to scalar for
|x| < 2^-51, |x| > oflow_bound, Inf & NaN. Add ix to itself for
shift-left by 1, and compare with thresh which was left-shifted offline -
this is effectively an absolute compare. */
uint64x2_t special
= vcgeq_u64 (vsubq_u64 (vaddq_u64 (ix, ix), d->tiny_bound), d->thresh);
- if (__glibc_unlikely (v_any_u64 (special)))
- x = v_zerofy_f64 (x, special);
#else
/* Large input, NaNs and Infs. */
uint64x2_t special = vcageq_f64 (x, d->oflow_bound);
#endif
- /* Reduce argument to smaller range:
- Let i = round(x / ln2)
- and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
- exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
- where 2^i is exact because i is an integer. */
- float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift);
- int64x2_t i = vcvtq_s64_f64 (n);
- float64x2_t ln2 = vld1q_f64 (&d->ln2[0]);
- float64x2_t f = vfmsq_laneq_f64 (x, n, ln2, 0);
- f = vfmsq_laneq_f64 (f, n, ln2, 1);
-
- /* Approximate expm1(f) using polynomial.
- Taylor expansion for expm1(x) has the form:
- x + ax^2 + bx^3 + cx^4 ....
- So we calculate the polynomial P(f) = a + bf + cf^2 + ...
- and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
- float64x2_t f2 = vmulq_f64 (f, f);
- float64x2_t f4 = vmulq_f64 (f2, f2);
- float64x2_t f8 = vmulq_f64 (f4, f4);
- float64x2_t p = vfmaq_f64 (f, f2, v_estrin_10_f64 (f, f2, f4, f8, d->poly));
-
- /* Assemble the result.
- expm1(x) ~= 2^i * (p + 1) - 1
- Let t = 2^i. */
- int64x2_t u = vaddq_s64 (vshlq_n_s64 (i, 52), d->exponent_bias);
- float64x2_t t = vreinterpretq_f64_s64 (u);
-
if (__glibc_unlikely (v_any_u64 (special)))
- return special_case (vreinterpretq_f64_u64 (ix),
- vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t),
- special);
+ return special_case (x, special, d);
/* expm1(x) ~= p * t + (t - 1). */
- return vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t);
+ return expm1_inline (x, &d->d);
}
diff --git a/sysdeps/aarch64/fpu/sinh_advsimd.c b/sysdeps/aarch64/fpu/sinh_advsimd.c
index 3e3b76c502b01e16..7adf771517de2507 100644
--- a/sysdeps/aarch64/fpu/sinh_advsimd.c
+++ b/sysdeps/aarch64/fpu/sinh_advsimd.c
@@ -18,72 +18,31 @@
<https://www.gnu.org/licenses/>. */
#include "v_math.h"
-#include "poly_advsimd_f64.h"
+#include "v_expm1_inline.h"
static const struct data
{
- float64x2_t poly[11], inv_ln2;
- double m_ln2[2];
- float64x2_t shift;
+ struct v_expm1_data d;
uint64x2_t halff;
- int64x2_t onef;
#if WANT_SIMD_EXCEPT
uint64x2_t tiny_bound, thresh;
#else
- uint64x2_t large_bound;
+ float64x2_t large_bound;
#endif
} data = {
- /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */
- .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
- V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
- V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
- V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
- V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29), },
-
- .inv_ln2 = V2 (0x1.71547652b82fep0),
- .m_ln2 = {-0x1.62e42fefa39efp-1, -0x1.abc9e3b39803fp-56},
- .shift = V2 (0x1.8p52),
-
+ .d = V_EXPM1_DATA,
.halff = V2 (0x3fe0000000000000),
- .onef = V2 (0x3ff0000000000000),
#if WANT_SIMD_EXCEPT
/* 2^-26, below which sinh(x) rounds to x. */
.tiny_bound = V2 (0x3e50000000000000),
/* asuint(large_bound) - asuint(tiny_bound). */
.thresh = V2 (0x0230000000000000),
#else
-/* 2^9. expm1 helper overflows for large input. */
- .large_bound = V2 (0x4080000000000000),
+ /* 2^9. expm1 helper overflows for large input. */
+ .large_bound = V2 (0x1p+9),
#endif
};
-static inline float64x2_t
-expm1_inline (float64x2_t x)
-{
- const struct data *d = ptr_barrier (&data);
-
- /* Reduce argument:
- exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
- where i = round(x / ln2)
- and f = x - i * ln2 (f in [-ln2/2, ln2/2]). */
- float64x2_t j = vsubq_f64 (vfmaq_f64 (d->shift, d->inv_ln2, x), d->shift);
- int64x2_t i = vcvtq_s64_f64 (j);
-
- float64x2_t m_ln2 = vld1q_f64 (d->m_ln2);
- float64x2_t f = vfmaq_laneq_f64 (x, j, m_ln2, 0);
- f = vfmaq_laneq_f64 (f, j, m_ln2, 1);
- /* Approximate expm1(f) using polynomial. */
- float64x2_t f2 = vmulq_f64 (f, f);
- float64x2_t f4 = vmulq_f64 (f2, f2);
- float64x2_t f8 = vmulq_f64 (f4, f4);
- float64x2_t p = vfmaq_f64 (f, f2, v_estrin_10_f64 (f, f2, f4, f8, d->poly));
- /* t = 2^i. */
- float64x2_t t = vreinterpretq_f64_u64 (
- vreinterpretq_u64_s64 (vaddq_s64 (vshlq_n_s64 (i, 52), d->onef)));
- /* expm1(x) ~= p * t + (t - 1). */
- return vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t);
-}
-
static float64x2_t NOINLINE VPCS_ATTR
special_case (float64x2_t x)
{
@@ -92,23 +51,23 @@ special_case (float64x2_t x)
/* Approximation for vector double-precision sinh(x) using expm1.
sinh(x) = (exp(x) - exp(-x)) / 2.
- The greatest observed error is 2.57 ULP:
- _ZGVnN2v_sinh (0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
- want 0x1.ab34e59d678d9p-2. */
+ The greatest observed error is 2.52 ULP:
+ _ZGVnN2v_sinh(-0x1.a098a2177a2b9p-2) got -0x1.ac2f05bb66fccp-2
+ want -0x1.ac2f05bb66fc9p-2. */
float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
{
const struct data *d = ptr_barrier (&data);
float64x2_t ax = vabsq_f64 (x);
- uint64x2_t sign
- = veorq_u64 (vreinterpretq_u64_f64 (x), vreinterpretq_u64_f64 (ax));
- float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->halff));
+ uint64x2_t ix = vreinterpretq_u64_f64 (x);
+ float64x2_t halfsign = vreinterpretq_f64_u64 (
+ vbslq_u64 (v_u64 (0x8000000000000000), ix, d->halff));
#if WANT_SIMD_EXCEPT
uint64x2_t special = vcgeq_u64 (
vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh);
#else
- uint64x2_t special = vcgeq_u64 (vreinterpretq_u64_f64 (ax), d->large_bound);
+ uint64x2_t special = vcageq_f64 (x, d->large_bound);
#endif
/* Fall back to scalar variant for all lanes if any of them are special. */
@@ -118,7 +77,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
/* Up to the point that expm1 overflows, we can use it to calculate sinh
using a slight rearrangement of the definition of sinh. This allows us to
retain acceptable accuracy for very small inputs. */
- float64x2_t t = expm1_inline (ax);
+ float64x2_t t = expm1_inline (ax, &d->d);
t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0))));
return vmulq_f64 (t, halfsign);
}
diff --git a/sysdeps/aarch64/fpu/tanh_advsimd.c b/sysdeps/aarch64/fpu/tanh_advsimd.c
index 1da1dfa5dbe418b6..402ba9d8ad2478a8 100644
--- a/sysdeps/aarch64/fpu/tanh_advsimd.c
+++ b/sysdeps/aarch64/fpu/tanh_advsimd.c
@@ -18,68 +18,30 @@
<https://www.gnu.org/licenses/>. */
#include "v_math.h"
-#include "poly_advsimd_f64.h"
+#include "v_expm1_inline.h"
static const struct data
{
- float64x2_t poly[11];
- float64x2_t inv_ln2, ln2_hi, ln2_lo, shift;
- uint64x2_t onef;
+ struct v_expm1_data d;
uint64x2_t thresh, tiny_bound;
} data = {
- /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */
- .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
- V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
- V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
- V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
- V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29), },
-
- .inv_ln2 = V2 (0x1.71547652b82fep0),
- .ln2_hi = V2 (-0x1.62e42fefa39efp-1),
- .ln2_lo = V2 (-0x1.abc9e3b39803fp-56),
- .shift = V2 (0x1.8p52),
-
- .onef = V2 (0x3ff0000000000000),
+ .d = V_EXPM1_DATA,
.tiny_bound = V2 (0x3e40000000000000), /* asuint64 (0x1p-27). */
/* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound). */
.thresh = V2 (0x01f241bf835f9d5f),
};
-static inline float64x2_t
-expm1_inline (float64x2_t x, const struct data *d)
-{
- /* Helper routine for calculating exp(x) - 1. Vector port of the helper from
- the scalar variant of tanh. */
-
- /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
- float64x2_t j = vsubq_f64 (vfmaq_f64 (d->shift, d->inv_ln2, x), d->shift);
- int64x2_t i = vcvtq_s64_f64 (j);
- float64x2_t f = vfmaq_f64 (x, j, d->ln2_hi);
- f = vfmaq_f64 (f, j, d->ln2_lo);
-
- /* Approximate expm1(f) using polynomial. */
- float64x2_t f2 = vmulq_f64 (f, f);
- float64x2_t f4 = vmulq_f64 (f2, f2);
- float64x2_t p = vfmaq_f64 (
- f, f2, v_estrin_10_f64 (f, f2, f4, vmulq_f64 (f4, f4), d->poly));
-
- /* t = 2 ^ i. */
- float64x2_t t = vreinterpretq_f64_u64 (
- vaddq_u64 (vreinterpretq_u64_s64 (i << 52), d->onef));
- /* expm1(x) = p * t + (t - 1). */
- return vfmaq_f64 (vsubq_f64 (t, v_f64 (1)), p, t);
-}
-
static float64x2_t NOINLINE VPCS_ATTR
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+special_case (float64x2_t x, float64x2_t q, float64x2_t qp2,
+ uint64x2_t special)
{
- return v_call_f64 (tanh, x, y, special);
+ return v_call_f64 (tanh, x, vdivq_f64 (q, qp2), special);
}
/* Vector approximation for double-precision tanh(x), using a simplified
- version of expm1. The greatest observed error is 2.77 ULP:
- _ZGVnN2v_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
- want -0x1.bd6a21a163624p-3. */
+ version of expm1. The greatest observed error is 2.70 ULP:
+ _ZGVnN2v_tanh(-0x1.c59aa220cb177p-3) got -0x1.be5452a6459fep-3
+ want -0x1.be5452a6459fbp-3. */
float64x2_t VPCS_ATTR V_NAME_D1 (tanh) (float64x2_t x)
{
const struct data *d = ptr_barrier (&data);
@@ -100,10 +62,10 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tanh) (float64x2_t x)
u = vaddq_f64 (u, u);
/* tanh(x) = (e^2x - 1) / (e^2x + 1). */
- float64x2_t q = expm1_inline (u, d);
- float64x2_t qp2 = vaddq_f64 (q, v_f64 (2));
+ float64x2_t q = expm1_inline (u, &d->d);
+ float64x2_t qp2 = vaddq_f64 (q, v_f64 (2.0));
if (__glibc_unlikely (v_any_u64 (special)))
- return special_case (x, vdivq_f64 (q, qp2), special);
+ return special_case (x, q, qp2, special);
return vdivq_f64 (q, qp2);
}
diff --git a/sysdeps/aarch64/fpu/v_expm1_inline.h b/sysdeps/aarch64/fpu/v_expm1_inline.h
new file mode 100644
index 0000000000000000..a925183d4e5e4623
--- /dev/null
+++ b/sysdeps/aarch64/fpu/v_expm1_inline.h
@@ -0,0 +1,97 @@
+/* Double-precision inline helper for vector (Advanced SIMD) expm1 function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef AARCH64_FPU_V_EXPM1_INLINE_H
+#define AARCH64_FPU_V_EXPM1_INLINE_H
+
+#include "v_math.h"
+
+struct v_expm1_data
+{
+ float64x2_t c2, c4, c6, c8;
+ float64x2_t invln2;
+ int64x2_t exponent_bias;
+ double c1, c3, c5, c7, c9, c10;
+ double ln2[2];
+};
+
+/* Generated using fpminimax, with degree=12 in [log(2)/2, log(2)/2]. */
+#define V_EXPM1_DATA \
+ { \
+ .c1 = 0x1.5555555555559p-3, .c2 = V2 (0x1.555555555554bp-5), \
+ .c3 = 0x1.111111110f663p-7, .c4 = V2 (0x1.6c16c16c1b5f3p-10), \
+ .c5 = 0x1.a01a01affa35dp-13, .c6 = V2 (0x1.a01a018b4ecbbp-16), \
+ .c7 = 0x1.71ddf82db5bb4p-19, .c8 = V2 (0x1.27e517fc0d54bp-22), \
+ .c9 = 0x1.af5eedae67435p-26, .c10 = 0x1.1f143d060a28ap-29, \
+ .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 }, \
+ .invln2 = V2 (0x1.71547652b82fep0), \
+ .exponent_bias = V2 (0x3ff0000000000000), \
+ }
+
+static inline float64x2_t
+expm1_inline (float64x2_t x, const struct v_expm1_data *d)
+{
+ /* Helper routine for calculating exp(x) - 1. */
+
+ float64x2_t ln2 = vld1q_f64 (&d->ln2[0]);
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ float64x2_t n = vrndaq_f64 (vmulq_f64 (x, d->invln2));
+ int64x2_t i = vcvtq_s64_f64 (n);
+ float64x2_t f = vfmsq_laneq_f64 (x, n, ln2, 0);
+ f = vfmsq_laneq_f64 (f, n, ln2, 1);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+ float64x2_t f2 = vmulq_f64 (f, f);
+ float64x2_t f4 = vmulq_f64 (f2, f2);
+ float64x2_t lane_consts_13 = vld1q_f64 (&d->c1);
+ float64x2_t lane_consts_57 = vld1q_f64 (&d->c5);
+ float64x2_t lane_consts_910 = vld1q_f64 (&d->c9);
+ float64x2_t p01 = vfmaq_laneq_f64 (v_f64 (0.5), f, lane_consts_13, 0);
+ float64x2_t p23 = vfmaq_laneq_f64 (d->c2, f, lane_consts_13, 1);
+ float64x2_t p45 = vfmaq_laneq_f64 (d->c4, f, lane_consts_57, 0);
+ float64x2_t p67 = vfmaq_laneq_f64 (d->c6, f, lane_consts_57, 1);
+ float64x2_t p03 = vfmaq_f64 (p01, f2, p23);
+ float64x2_t p47 = vfmaq_f64 (p45, f2, p67);
+ float64x2_t p89 = vfmaq_laneq_f64 (d->c8, f, lane_consts_910, 0);
+ float64x2_t p = vfmaq_laneq_f64 (p89, f2, lane_consts_910, 1);
+ p = vfmaq_f64 (p47, f4, p);
+ p = vfmaq_f64 (p03, f4, p);
+
+ p = vfmaq_f64 (f, f2, p);
+
+ /* Assemble the result.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^i. */
+ int64x2_t u = vaddq_s64 (vshlq_n_s64 (i, 52), d->exponent_bias);
+ float64x2_t t = vreinterpretq_f64_s64 (u);
+
+ /* expm1(x) ~= p * t + (t - 1). */
+ return vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t);
+}
+
+#endif
diff --git a/sysdeps/aarch64/fpu/v_expm1f_inline.h b/sysdeps/aarch64/fpu/v_expm1f_inline.h
index 1daedfdd51cfc54b..c1fb88b5e027b322 100644
--- a/sysdeps/aarch64/fpu/v_expm1f_inline.h
+++ b/sysdeps/aarch64/fpu/v_expm1f_inline.h
@@ -21,7 +21,6 @@
#define AARCH64_FPU_V_EXPM1F_INLINE_H
#include "v_math.h"
-#include "math_config.h"
struct v_expm1f_data
{

862
glibc-RHEL-118273-2.patch Normal file
View File

@ -0,0 +1,862 @@
commit bdb5705b7bab618ed4445f4b17d4b1e4fbbf94a7
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Tue Feb 20 16:59:39 2024 +0000
aarch64/fpu: Add vector variants of cosh
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index 320b6ed43a9a454c..019c3a51880e2306 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -3,6 +3,7 @@ libmvec-supported-funcs = acos \
atan \
atan2 \
cos \
+ cosh \
erf \
exp \
exp10 \
@@ -32,7 +33,8 @@ libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \
erf_data \
erff_data \
sv_erf_data \
- sv_erff_data
+ sv_erff_data \
+ v_exp_tail_data
endif
sve-cflags = -march=armv8-a+sve
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index d7b1e87191b66439..884b4b57f097635f 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -79,6 +79,11 @@ libmvec {
_ZGVsMxv_tan;
}
GLIBC_2.40 {
+ _ZGVnN2v_cosh;
+ _ZGVnN2v_coshf;
+ _ZGVnN4v_coshf;
+ _ZGVsMxv_cosh;
+ _ZGVsMxv_coshf;
_ZGVnN2v_erf;
_ZGVnN2v_erff;
_ZGVnN4v_erff;
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index d8d88de2181569f9..c63b2948d4938b0d 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -21,6 +21,7 @@ libmvec_hidden_proto (V_NAME_F1(acos));
libmvec_hidden_proto (V_NAME_F1(asin));
libmvec_hidden_proto (V_NAME_F1(atan));
libmvec_hidden_proto (V_NAME_F1(cos));
+libmvec_hidden_proto (V_NAME_F1(cosh));
libmvec_hidden_proto (V_NAME_F1(erf));
libmvec_hidden_proto (V_NAME_F1(exp10));
libmvec_hidden_proto (V_NAME_F1(exp2));
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index 71f53363a071126d..8ca55098706a54c2 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -49,6 +49,10 @@
# define __DECL_SIMD_cos __DECL_SIMD_aarch64
# undef __DECL_SIMD_cosf
# define __DECL_SIMD_cosf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_cosh
+# define __DECL_SIMD_cosh __DECL_SIMD_aarch64
+# undef __DECL_SIMD_coshf
+# define __DECL_SIMD_coshf __DECL_SIMD_aarch64
# undef __DECL_SIMD_erf
# define __DECL_SIMD_erf __DECL_SIMD_aarch64
# undef __DECL_SIMD_erff
@@ -124,6 +128,7 @@ __vpcs __f32x4_t _ZGVnN4v_acosf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_exp10f (__f32x4_t);
@@ -141,6 +146,7 @@ __vpcs __f64x2_t _ZGVnN2v_acos (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_exp10 (__f64x2_t);
@@ -163,6 +169,7 @@ __sv_f32_t _ZGVsMxv_acosf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_asinf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_atanf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_erff (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_expf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_exp10f (__sv_f32_t, __sv_bool_t);
@@ -180,6 +187,7 @@ __sv_f64_t _ZGVsMxv_acos (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_asin (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_atan (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_erf (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_exp (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_exp10 (__sv_f64_t, __sv_bool_t);
diff --git a/sysdeps/aarch64/fpu/cosh_advsimd.c b/sysdeps/aarch64/fpu/cosh_advsimd.c
new file mode 100644
index 0000000000000000..ec7b59637e973da9
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cosh_advsimd.c
@@ -0,0 +1,108 @@
+/* Double-precision vector (AdvSIMD) cosh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_math.h"
+
+static const struct data
+{
+ float64x2_t poly[3];
+ float64x2_t inv_ln2, ln2, shift, thres;
+ uint64x2_t index_mask, special_bound;
+} data = {
+ .poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6b68cp-3),
+ V2 (0x1.5555576a59599p-5), },
+
+ .inv_ln2 = V2 (0x1.71547652b82fep8), /* N/ln2. */
+ /* -ln2/N. */
+ .ln2 = {-0x1.62e42fefa39efp-9, -0x1.abc9e3b39803f3p-64},
+ .shift = V2 (0x1.8p+52),
+ .thres = V2 (704.0),
+
+ .index_mask = V2 (0xff),
+ /* 0x1.6p9, above which exp overflows. */
+ .special_bound = V2 (0x4086000000000000),
+};
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+ return v_call_f64 (cosh, x, y, special);
+}
+
+/* Helper for approximating exp(x). Copied from v_exp_tail, with no
+ special-case handling or tail. */
+static inline float64x2_t
+exp_inline (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* n = round(x/(ln2/N)). */
+ float64x2_t z = vfmaq_f64 (d->shift, x, d->inv_ln2);
+ uint64x2_t u = vreinterpretq_u64_f64 (z);
+ float64x2_t n = vsubq_f64 (z, d->shift);
+
+ /* r = x - n*ln2/N. */
+ float64x2_t r = vfmaq_laneq_f64 (x, n, d->ln2, 0);
+ r = vfmaq_laneq_f64 (r, n, d->ln2, 1);
+
+ uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TAIL_TABLE_BITS);
+ uint64x2_t i = vandq_u64 (u, d->index_mask);
+
+ /* y = tail + exp(r) - 1 ~= r + C1 r^2 + C2 r^3 + C3 r^4. */
+ float64x2_t y = vfmaq_f64 (d->poly[1], d->poly[2], r);
+ y = vfmaq_f64 (d->poly[0], y, r);
+ y = vmulq_f64 (vfmaq_f64 (v_f64 (1), y, r), r);
+
+ /* s = 2^(n/N). */
+ u = v_lookup_u64 (__v_exp_tail_data, i);
+ float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
+
+ return vfmaq_f64 (s, y, s);
+}
+
+/* Approximation for vector double-precision cosh(x) using exp_inline.
+ cosh(x) = (exp(x) + exp(-x)) / 2.
+ The greatest observed error is in the scalar fall-back region, so is the
+ same as the scalar routine, 1.93 ULP:
+ _ZGVnN2v_cosh (0x1.628af341989dap+9) got 0x1.fdf28623ef921p+1021
+ want 0x1.fdf28623ef923p+1021.
+
+ The greatest observed error in the non-special region is 1.54 ULP:
+ _ZGVnN2v_cosh (0x1.8e205b6ecacf7p+2) got 0x1.f711dcb0c77afp+7
+ want 0x1.f711dcb0c77b1p+7. */
+float64x2_t VPCS_ATTR V_NAME_D1 (cosh) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t ax = vabsq_f64 (x);
+ uint64x2_t special
+ = vcgtq_u64 (vreinterpretq_u64_f64 (ax), d->special_bound);
+
+ /* Up to the point that exp overflows, we can use it to calculate cosh by
+ exp(|x|) / 2 + 1 / (2 * exp(|x|)). */
+ float64x2_t t = exp_inline (ax);
+ float64x2_t half_t = vmulq_n_f64 (t, 0.5);
+ float64x2_t half_over_t = vdivq_f64 (v_f64 (0.5), t);
+
+ /* Fall back to scalar for any special cases. */
+ if (__glibc_unlikely (v_any_u64 (special)))
+ return special_case (x, vaddq_f64 (half_t, half_over_t), special);
+
+ return vaddq_f64 (half_t, half_over_t);
+}
diff --git a/sysdeps/aarch64/fpu/cosh_sve.c b/sysdeps/aarch64/fpu/cosh_sve.c
new file mode 100644
index 0000000000000000..919f34604a452b4a
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cosh_sve.c
@@ -0,0 +1,105 @@
+/* Double-precision vector (SVE) cosh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_math.h"
+
+static const struct data
+{
+ float64_t poly[3];
+ float64_t inv_ln2, ln2_hi, ln2_lo, shift, thres;
+ uint64_t index_mask, special_bound;
+} data = {
+ .poly = { 0x1.fffffffffffd4p-2, 0x1.5555571d6b68cp-3,
+ 0x1.5555576a59599p-5, },
+
+ .inv_ln2 = 0x1.71547652b82fep8, /* N/ln2. */
+ /* -ln2/N. */
+ .ln2_hi = -0x1.62e42fefa39efp-9,
+ .ln2_lo = -0x1.abc9e3b39803f3p-64,
+ .shift = 0x1.8p+52,
+ .thres = 704.0,
+
+ .index_mask = 0xff,
+ /* 0x1.6p9, above which exp overflows. */
+ .special_bound = 0x4086000000000000,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (cosh, x, y, special);
+}
+
+/* Helper for approximating exp(x). Copied from sv_exp_tail, with no
+ special-case handling or tail. */
+static inline svfloat64_t
+exp_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
+{
+ /* Calculate exp(x). */
+ svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
+ svfloat64_t n = svsub_x (pg, z, d->shift);
+
+ svfloat64_t r = svmla_x (pg, x, n, d->ln2_hi);
+ r = svmla_x (pg, r, n, d->ln2_lo);
+
+ svuint64_t u = svreinterpret_u64 (z);
+ svuint64_t e = svlsl_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS);
+ svuint64_t i = svand_x (pg, u, d->index_mask);
+
+ svfloat64_t y = svmla_x (pg, sv_f64 (d->poly[1]), r, d->poly[2]);
+ y = svmla_x (pg, sv_f64 (d->poly[0]), r, y);
+ y = svmla_x (pg, sv_f64 (1.0), r, y);
+ y = svmul_x (pg, r, y);
+
+ /* s = 2^(n/N). */
+ u = svld1_gather_index (pg, __v_exp_tail_data, i);
+ svfloat64_t s = svreinterpret_f64 (svadd_x (pg, u, e));
+
+ return svmla_x (pg, s, s, y);
+}
+
+/* Approximation for SVE double-precision cosh(x) using exp_inline.
+ cosh(x) = (exp(x) + exp(-x)) / 2.
+ The greatest observed error is in the scalar fall-back region, so is the
+ same as the scalar routine, 1.93 ULP:
+ _ZGVsMxv_cosh (0x1.628ad45039d2fp+9) got 0x1.fd774e958236dp+1021
+ want 0x1.fd774e958236fp+1021.
+
+ The greatest observed error in the non-special region is 1.54 ULP:
+ _ZGVsMxv_cosh (0x1.ba5651dd4486bp+2) got 0x1.f5e2bb8d5c98fp+8
+ want 0x1.f5e2bb8d5c991p+8. */
+svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat64_t ax = svabs_x (pg, x);
+ svbool_t special = svcmpgt (pg, svreinterpret_u64 (ax), d->special_bound);
+
+ /* Up to the point that exp overflows, we can use it to calculate cosh by
+ exp(|x|) / 2 + 1 / (2 * exp(|x|)). */
+ svfloat64_t t = exp_inline (ax, pg, d);
+ svfloat64_t half_t = svmul_x (pg, t, 0.5);
+ svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
+
+ /* Fall back to scalar for any special cases. */
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (x, svadd_x (pg, half_t, half_over_t), special);
+
+ return svadd_x (pg, half_t, half_over_t);
+}
diff --git a/sysdeps/aarch64/fpu/coshf_advsimd.c b/sysdeps/aarch64/fpu/coshf_advsimd.c
new file mode 100644
index 0000000000000000..c1ab4923b826569b
--- /dev/null
+++ b/sysdeps/aarch64/fpu/coshf_advsimd.c
@@ -0,0 +1,84 @@
+/* Single-precision vector (AdvSIMD) cosh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_expf_inline.h"
+#include "v_math.h"
+
+static const struct data
+{
+ struct v_expf_data expf_consts;
+ uint32x4_t tiny_bound, special_bound;
+} data = {
+ .expf_consts = V_EXPF_DATA,
+ .tiny_bound = V4 (0x20000000), /* 0x1p-63: Round to 1 below this. */
+ /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */
+ .special_bound = V4 (0x42ad496c),
+};
+
+#if !WANT_SIMD_EXCEPT
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (coshf, x, y, special);
+}
+#endif
+
+/* Single-precision vector cosh, using vector expf.
+ Maximum error is 2.38 ULP:
+ _ZGVnN4v_coshf (0x1.e8001ep+1) got 0x1.6a491ep+4
+ want 0x1.6a4922p+4. */
+float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float32x4_t ax = vabsq_f32 (x);
+ uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+ uint32x4_t special = vcgeq_u32 (iax, d->special_bound);
+
+#if WANT_SIMD_EXCEPT
+ /* If fp exceptions are to be triggered correctly, fall back to the scalar
+ variant for all inputs if any input is a special value or above the bound
+ at which expf overflows. */
+ if (__glibc_unlikely (v_any_u32 (special)))
+ return v_call_f32 (coshf, x, x, v_u32 (-1));
+
+ uint32x4_t tiny = vcleq_u32 (iax, d->tiny_bound);
+ /* If any input is tiny, avoid underflow exception by fixing tiny lanes of
+ input to 0, which will generate no exceptions. */
+ if (__glibc_unlikely (v_any_u32 (tiny)))
+ ax = v_zerofy_f32 (ax, tiny);
+#endif
+
+ /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */
+ float32x4_t t = v_expf_inline (ax, &d->expf_consts);
+ float32x4_t half_t = vmulq_n_f32 (t, 0.5);
+ float32x4_t half_over_t = vdivq_f32 (v_f32 (0.5), t);
+
+#if WANT_SIMD_EXCEPT
+ if (__glibc_unlikely (v_any_u32 (tiny)))
+ return vbslq_f32 (tiny, v_f32 (1), vaddq_f32 (half_t, half_over_t));
+#else
+ if (__glibc_unlikely (v_any_u32 (special)))
+ return special_case (x, vaddq_f32 (half_t, half_over_t), special);
+#endif
+
+ return vaddq_f32 (half_t, half_over_t);
+}
+libmvec_hidden_def (V_NAME_F1 (cosh))
+HALF_WIDTH_ALIAS_F1 (cosh)
diff --git a/sysdeps/aarch64/fpu/coshf_sve.c b/sysdeps/aarch64/fpu/coshf_sve.c
new file mode 100644
index 0000000000000000..e5d8a299c6aa7ceb
--- /dev/null
+++ b/sysdeps/aarch64/fpu/coshf_sve.c
@@ -0,0 +1,59 @@
+/* Single-precision vector (SVE) cosh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_math.h"
+#include "sv_expf_inline.h"
+
+static const struct data
+{
+ struct sv_expf_data expf_consts;
+ uint32_t special_bound;
+} data = {
+ .expf_consts = SV_EXPF_DATA,
+ /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */
+ .special_bound = 0x42ad496c,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t pg)
+{
+ return sv_call_f32 (coshf, x, y, pg);
+}
+
+/* Single-precision vector cosh, using vector expf.
+ Maximum error is 1.89 ULP:
+ _ZGVsMxv_coshf (-0x1.65898cp+6) got 0x1.f00aep+127
+ want 0x1.f00adcp+127. */
+svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat32_t ax = svabs_x (pg, x);
+ svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->special_bound);
+
+ /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */
+ svfloat32_t t = expf_inline (ax, pg, &d->expf_consts);
+ svfloat32_t half_t = svmul_x (pg, t, 0.5);
+ svfloat32_t half_over_t = svdivr_x (pg, t, 0.5);
+
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (x, svadd_x (pg, half_t, half_over_t), special);
+
+ return svadd_x (pg, half_t, half_over_t);
+}
diff --git a/sysdeps/aarch64/fpu/sv_expf_inline.h b/sysdeps/aarch64/fpu/sv_expf_inline.h
new file mode 100644
index 0000000000000000..23963b5f8ec89ead
--- /dev/null
+++ b/sysdeps/aarch64/fpu/sv_expf_inline.h
@@ -0,0 +1,75 @@
+/* SVE helper for single-precision routines which depend on exp
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef AARCH64_FPU_SV_EXPF_INLINE_H
+#define AARCH64_FPU_SV_EXPF_INLINE_H
+
+#include "sv_math.h"
+
+struct sv_expf_data
+{
+ float poly[5];
+ float inv_ln2, ln2_hi, ln2_lo, shift;
+};
+
+/* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
+ compatibility with polynomial helpers. Shift is 1.5*2^17 + 127. */
+#define SV_EXPF_DATA \
+ { \
+ .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, \
+ 0x1.0e4020p-7f }, \
+ \
+ .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \
+ .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f, \
+ }
+
+#define C(i) sv_f32 (d->poly[i])
+
+static inline svfloat32_t
+expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
+{
+ /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+ x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
+
+ /* Load some constants in quad-word chunks to minimise memory access. */
+ svfloat32_t c4_invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->poly[4]);
+
+ /* n = round(x/(ln2/N)). */
+ svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, c4_invln2_and_ln2, 1);
+ svfloat32_t n = svsub_x (pg, z, d->shift);
+
+ /* r = x - n*ln2/N. */
+ svfloat32_t r = svmls_lane (x, n, c4_invln2_and_ln2, 2);
+ r = svmls_lane (r, n, c4_invln2_and_ln2, 3);
+
+ /* scale = 2^(n/N). */
+ svfloat32_t scale = svexpa (svreinterpret_u32_f32 (z));
+
+ /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */
+ svfloat32_t p12 = svmla_x (pg, C (1), C (2), r);
+ svfloat32_t p34 = svmla_lane (C (3), r, c4_invln2_and_ln2, 0);
+ svfloat32_t r2 = svmul_f32_x (pg, r, r);
+ svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
+ svfloat32_t p0 = svmul_f32_x (pg, r, C (0));
+ svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+
+ return svmla_x (pg, scale, scale, poly);
+}
+
+#endif
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index 41fdb92d7ea6e707..b37cb7d5e9c0d96a 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -28,6 +28,7 @@ VPCS_VECTOR_WRAPPER (asin_advsimd, _ZGVnN2v_asin)
VPCS_VECTOR_WRAPPER (atan_advsimd, _ZGVnN2v_atan)
VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2)
VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
+VPCS_VECTOR_WRAPPER (cosh_advsimd, _ZGVnN2v_cosh)
VPCS_VECTOR_WRAPPER (erf_advsimd, _ZGVnN2v_erf)
VPCS_VECTOR_WRAPPER (exp_advsimd, _ZGVnN2v_exp)
VPCS_VECTOR_WRAPPER (exp10_advsimd, _ZGVnN2v_exp10)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index 8e3d64da420348a7..011f07d2c15b148f 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -47,6 +47,7 @@ SVE_VECTOR_WRAPPER (asin_sve, _ZGVsMxv_asin)
SVE_VECTOR_WRAPPER (atan_sve, _ZGVsMxv_atan)
SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2)
SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
+SVE_VECTOR_WRAPPER (cosh_sve, _ZGVsMxv_cosh)
SVE_VECTOR_WRAPPER (erf_sve, _ZGVsMxv_erf)
SVE_VECTOR_WRAPPER (exp_sve, _ZGVsMxv_exp)
SVE_VECTOR_WRAPPER (exp10_sve, _ZGVsMxv_exp10)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index 33ae92878f774ac3..35452991431e238a 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -28,6 +28,7 @@ VPCS_VECTOR_WRAPPER (asinf_advsimd, _ZGVnN4v_asinf)
VPCS_VECTOR_WRAPPER (atanf_advsimd, _ZGVnN4v_atanf)
VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f)
VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
+VPCS_VECTOR_WRAPPER (coshf_advsimd, _ZGVnN4v_coshf)
VPCS_VECTOR_WRAPPER (erff_advsimd, _ZGVnN4v_erff)
VPCS_VECTOR_WRAPPER (expf_advsimd, _ZGVnN4v_expf)
VPCS_VECTOR_WRAPPER (exp10f_advsimd, _ZGVnN4v_exp10f)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index ac0464f196e7972f..bbc74ede88c9e6c8 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -47,6 +47,7 @@ SVE_VECTOR_WRAPPER (asinf_sve, _ZGVsMxv_asinf)
SVE_VECTOR_WRAPPER (atanf_sve, _ZGVsMxv_atanf)
SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f)
SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
+SVE_VECTOR_WRAPPER (coshf_sve, _ZGVsMxv_coshf)
SVE_VECTOR_WRAPPER (erff_sve, _ZGVsMxv_erff)
SVE_VECTOR_WRAPPER (expf_sve, _ZGVsMxv_expf)
SVE_VECTOR_WRAPPER (exp10f_sve, _ZGVsMxv_exp10f)
diff --git a/sysdeps/aarch64/fpu/v_exp_tail_data.c b/sysdeps/aarch64/fpu/v_exp_tail_data.c
new file mode 100644
index 0000000000000000..151e97c21bbc11ae
--- /dev/null
+++ b/sysdeps/aarch64/fpu/v_exp_tail_data.c
@@ -0,0 +1,110 @@
+/* Lookup table for high-precision exp(x, tail) function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "vecmath_config.h"
+
+/* 2^(j/N), j=0..N, N=2^8=256. */
+const uint64_t __v_exp_tail_data[] = {
+ 0x3ff0000000000000, 0x3feffb1afa5abcbf, 0x3feff63da9fb3335,
+ 0x3feff168143b0281, 0x3fefec9a3e778061, 0x3fefe7d42e11bbcc,
+ 0x3fefe315e86e7f85, 0x3fefde5f72f654b1, 0x3fefd9b0d3158574,
+ 0x3fefd50a0e3c1f89, 0x3fefd06b29ddf6de, 0x3fefcbd42b72a836,
+ 0x3fefc74518759bc8, 0x3fefc2bdf66607e0, 0x3fefbe3ecac6f383,
+ 0x3fefb9c79b1f3919, 0x3fefb5586cf9890f, 0x3fefb0f145e46c85,
+ 0x3fefac922b7247f7, 0x3fefa83b23395dec, 0x3fefa3ec32d3d1a2,
+ 0x3fef9fa55fdfa9c5, 0x3fef9b66affed31b, 0x3fef973028d7233e,
+ 0x3fef9301d0125b51, 0x3fef8edbab5e2ab6, 0x3fef8abdc06c31cc,
+ 0x3fef86a814f204ab, 0x3fef829aaea92de0, 0x3fef7e95934f312e,
+ 0x3fef7a98c8a58e51, 0x3fef76a45471c3c2, 0x3fef72b83c7d517b,
+ 0x3fef6ed48695bbc0, 0x3fef6af9388c8dea, 0x3fef672658375d2f,
+ 0x3fef635beb6fcb75, 0x3fef5f99f8138a1c, 0x3fef5be084045cd4,
+ 0x3fef582f95281c6b, 0x3fef54873168b9aa, 0x3fef50e75eb44027,
+ 0x3fef4d5022fcd91d, 0x3fef49c18438ce4d, 0x3fef463b88628cd6,
+ 0x3fef42be3578a819, 0x3fef3f49917ddc96, 0x3fef3bdda27912d1,
+ 0x3fef387a6e756238, 0x3fef351ffb82140a, 0x3fef31ce4fb2a63f,
+ 0x3fef2e85711ece75, 0x3fef2b4565e27cdd, 0x3fef280e341ddf29,
+ 0x3fef24dfe1f56381, 0x3fef21ba7591bb70, 0x3fef1e9df51fdee1,
+ 0x3fef1b8a66d10f13, 0x3fef187fd0dad990, 0x3fef157e39771b2f,
+ 0x3fef1285a6e4030b, 0x3fef0f961f641589, 0x3fef0cafa93e2f56,
+ 0x3fef09d24abd886b, 0x3fef06fe0a31b715, 0x3fef0432edeeb2fd,
+ 0x3fef0170fc4cd831, 0x3feefeb83ba8ea32, 0x3feefc08b26416ff,
+ 0x3feef96266e3fa2d, 0x3feef6c55f929ff1, 0x3feef431a2de883b,
+ 0x3feef1a7373aa9cb, 0x3feeef26231e754a, 0x3feeecae6d05d866,
+ 0x3feeea401b7140ef, 0x3feee7db34e59ff7, 0x3feee57fbfec6cf4,
+ 0x3feee32dc313a8e5, 0x3feee0e544ede173, 0x3feedea64c123422,
+ 0x3feedc70df1c5175, 0x3feeda4504ac801c, 0x3feed822c367a024,
+ 0x3feed60a21f72e2a, 0x3feed3fb2709468a, 0x3feed1f5d950a897,
+ 0x3feecffa3f84b9d4, 0x3feece086061892d, 0x3feecc2042a7d232,
+ 0x3feeca41ed1d0057, 0x3feec86d668b3237, 0x3feec6a2b5c13cd0,
+ 0x3feec4e1e192aed2, 0x3feec32af0d7d3de, 0x3feec17dea6db7d7,
+ 0x3feebfdad5362a27, 0x3feebe41b817c114, 0x3feebcb299fddd0d,
+ 0x3feebb2d81d8abff, 0x3feeb9b2769d2ca7, 0x3feeb8417f4531ee,
+ 0x3feeb6daa2cf6642, 0x3feeb57de83f4eef, 0x3feeb42b569d4f82,
+ 0x3feeb2e2f4f6ad27, 0x3feeb1a4ca5d920f, 0x3feeb070dde910d2,
+ 0x3feeaf4736b527da, 0x3feeae27dbe2c4cf, 0x3feead12d497c7fd,
+ 0x3feeac0827ff07cc, 0x3feeab07dd485429, 0x3feeaa11fba87a03,
+ 0x3feea9268a5946b7, 0x3feea84590998b93, 0x3feea76f15ad2148,
+ 0x3feea6a320dceb71, 0x3feea5e1b976dc09, 0x3feea52ae6cdf6f4,
+ 0x3feea47eb03a5585, 0x3feea3dd1d1929fd, 0x3feea34634ccc320,
+ 0x3feea2b9febc8fb7, 0x3feea23882552225, 0x3feea1c1c70833f6,
+ 0x3feea155d44ca973, 0x3feea0f4b19e9538, 0x3feea09e667f3bcd,
+ 0x3feea052fa75173e, 0x3feea012750bdabf, 0x3fee9fdcddd47645,
+ 0x3fee9fb23c651a2f, 0x3fee9f9298593ae5, 0x3fee9f7df9519484,
+ 0x3fee9f7466f42e87, 0x3fee9f75e8ec5f74, 0x3fee9f8286ead08a,
+ 0x3fee9f9a48a58174, 0x3fee9fbd35d7cbfd, 0x3fee9feb564267c9,
+ 0x3feea024b1ab6e09, 0x3feea0694fde5d3f, 0x3feea0b938ac1cf6,
+ 0x3feea11473eb0187, 0x3feea17b0976cfdb, 0x3feea1ed0130c132,
+ 0x3feea26a62ff86f0, 0x3feea2f336cf4e62, 0x3feea3878491c491,
+ 0x3feea427543e1a12, 0x3feea4d2add106d9, 0x3feea589994cce13,
+ 0x3feea64c1eb941f7, 0x3feea71a4623c7ad, 0x3feea7f4179f5b21,
+ 0x3feea8d99b4492ed, 0x3feea9cad931a436, 0x3feeaac7d98a6699,
+ 0x3feeabd0a478580f, 0x3feeace5422aa0db, 0x3feeae05bad61778,
+ 0x3feeaf3216b5448c, 0x3feeb06a5e0866d9, 0x3feeb1ae99157736,
+ 0x3feeb2fed0282c8a, 0x3feeb45b0b91ffc6, 0x3feeb5c353aa2fe2,
+ 0x3feeb737b0cdc5e5, 0x3feeb8b82b5f98e5, 0x3feeba44cbc8520f,
+ 0x3feebbdd9a7670b3, 0x3feebd829fde4e50, 0x3feebf33e47a22a2,
+ 0x3feec0f170ca07ba, 0x3feec2bb4d53fe0d, 0x3feec49182a3f090,
+ 0x3feec674194bb8d5, 0x3feec86319e32323, 0x3feeca5e8d07f29e,
+ 0x3feecc667b5de565, 0x3feece7aed8eb8bb, 0x3feed09bec4a2d33,
+ 0x3feed2c980460ad8, 0x3feed503b23e255d, 0x3feed74a8af46052,
+ 0x3feed99e1330b358, 0x3feedbfe53c12e59, 0x3feede6b5579fdbf,
+ 0x3feee0e521356eba, 0x3feee36bbfd3f37a, 0x3feee5ff3a3c2774,
+ 0x3feee89f995ad3ad, 0x3feeeb4ce622f2ff, 0x3feeee07298db666,
+ 0x3feef0ce6c9a8952, 0x3feef3a2b84f15fb, 0x3feef68415b749b1,
+ 0x3feef9728de5593a, 0x3feefc6e29f1c52a, 0x3feeff76f2fb5e47,
+ 0x3fef028cf22749e4, 0x3fef05b030a1064a, 0x3fef08e0b79a6f1f,
+ 0x3fef0c1e904bc1d2, 0x3fef0f69c3f3a207, 0x3fef12c25bd71e09,
+ 0x3fef16286141b33d, 0x3fef199bdd85529c, 0x3fef1d1cd9fa652c,
+ 0x3fef20ab5fffd07a, 0x3fef244778fafb22, 0x3fef27f12e57d14b,
+ 0x3fef2ba88988c933, 0x3fef2f6d9406e7b5, 0x3fef33405751c4db,
+ 0x3fef3720dcef9069, 0x3fef3b0f2e6d1675, 0x3fef3f0b555dc3fa,
+ 0x3fef43155b5bab74, 0x3fef472d4a07897c, 0x3fef4b532b08c968,
+ 0x3fef4f87080d89f2, 0x3fef53c8eacaa1d6, 0x3fef5818dcfba487,
+ 0x3fef5c76e862e6d3, 0x3fef60e316c98398, 0x3fef655d71ff6075,
+ 0x3fef69e603db3285, 0x3fef6e7cd63a8315, 0x3fef7321f301b460,
+ 0x3fef77d5641c0658, 0x3fef7c97337b9b5f, 0x3fef81676b197d17,
+ 0x3fef864614f5a129, 0x3fef8b333b16ee12, 0x3fef902ee78b3ff6,
+ 0x3fef953924676d76, 0x3fef9a51fbc74c83, 0x3fef9f7977cdb740,
+ 0x3fefa4afa2a490da, 0x3fefa9f4867cca6e, 0x3fefaf482d8e67f1,
+ 0x3fefb4aaa2188510, 0x3fefba1bee615a27, 0x3fefbf9c1cb6412a,
+ 0x3fefc52b376bba97, 0x3fefcac948dd7274, 0x3fefd0765b6e4540,
+ 0x3fefd632798844f8, 0x3fefdbfdad9cbe14, 0x3fefe1d802243c89,
+ 0x3fefe7c1819e90d8, 0x3fefedba3692d514, 0x3feff3c22b8f71f1,
+ 0x3feff9d96b2a23d9,
+};
diff --git a/sysdeps/aarch64/fpu/v_expf_inline.h b/sysdeps/aarch64/fpu/v_expf_inline.h
new file mode 100644
index 0000000000000000..a3b0e32f9eb42021
--- /dev/null
+++ b/sysdeps/aarch64/fpu/v_expf_inline.h
@@ -0,0 +1,71 @@
+/* Helper for single-precision AdvSIMD routines which depend on exp
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef AARCH64_FPU_V_EXPF_INLINE_H
+#define AARCH64_FPU_V_EXPF_INLINE_H
+
+#include "v_math.h"
+
+struct v_expf_data
+{
+ float32x4_t poly[5];
+ float32x4_t shift, invln2_and_ln2;
+};
+
+/* maxerr: 1.45358 +0.5 ulp. */
+#define V_EXPF_DATA \
+ { \
+ .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f), \
+ V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) }, \
+ .shift = V4 (0x1.8p23f), \
+ .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, \
+ }
+
+#define ExponentBias v_u32 (0x3f800000) /* asuint(1.0f). */
+#define C(i) d->poly[i]
+
+static inline float32x4_t
+v_expf_inline (float32x4_t x, const struct v_expf_data *d)
+{
+ /* Helper routine for calculating exp(x).
+ Copied from v_expf.c, with all special-case handling removed - the
+ calling routine should handle special values if required. */
+
+ /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+ x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
+ float32x4_t n, r, z;
+ z = vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0);
+ n = vsubq_f32 (z, d->shift);
+ r = vfmsq_laneq_f32 (x, n, d->invln2_and_ln2, 1);
+ r = vfmsq_laneq_f32 (r, n, d->invln2_and_ln2, 2);
+ uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
+ float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
+
+ /* Custom order-4 Estrin avoids building high order monomial. */
+ float32x4_t r2 = vmulq_f32 (r, r);
+ float32x4_t p, q, poly;
+ p = vfmaq_f32 (C (1), C (0), r);
+ q = vfmaq_f32 (C (3), C (2), r);
+ q = vfmaq_f32 (q, p, r2);
+ p = vmulq_f32 (C (4), r);
+ poly = vfmaq_f32 (p, q, r2);
+ return vfmaq_f32 (scale, poly, scale);
+}
+
+#endif
diff --git a/sysdeps/aarch64/fpu/vecmath_config.h b/sysdeps/aarch64/fpu/vecmath_config.h
index 409c0c9bd9b85422..3f0b5f476433ca06 100644
--- a/sysdeps/aarch64/fpu/vecmath_config.h
+++ b/sysdeps/aarch64/fpu/vecmath_config.h
@@ -59,6 +59,8 @@ extern const struct v_log_data
} table[1 << V_LOG_TABLE_BITS];
} __v_log_data attribute_hidden;
+#define V_EXP_TAIL_TABLE_BITS 8
+extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] attribute_hidden;
#define V_EXP_TABLE_BITS 7
extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] attribute_hidden;
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index f1103a245645476b..48d747ad5793be96 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -701,11 +701,19 @@ double: 2
float: 2
ldouble: 2
+Function: "cosh_advsimd":
+double: 2
+float: 2
+
Function: "cosh_downward":
double: 3
float: 1
ldouble: 3
+Function: "cosh_sve":
+double: 2
+float: 2
+
Function: "cosh_towardzero":
double: 3
float: 1
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index 6193518fb001cc92..f66da42c3630bf48 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -73,8 +73,13 @@ GLIBC_2.39 _ZGVsMxv_tan F
GLIBC_2.39 _ZGVsMxv_tanf F
GLIBC_2.39 _ZGVsMxvv_atan2 F
GLIBC_2.39 _ZGVsMxvv_atan2f F
+GLIBC_2.40 _ZGVnN2v_cosh F
+GLIBC_2.40 _ZGVnN2v_coshf F
GLIBC_2.40 _ZGVnN2v_erf F
GLIBC_2.40 _ZGVnN2v_erff F
+GLIBC_2.40 _ZGVnN4v_coshf F
GLIBC_2.40 _ZGVnN4v_erff F
+GLIBC_2.40 _ZGVsMxv_cosh F
+GLIBC_2.40 _ZGVsMxv_coshf F
GLIBC_2.40 _ZGVsMxv_erf F
GLIBC_2.40 _ZGVsMxv_erff F

359
glibc-RHEL-118273-20.patch Normal file
View File

@ -0,0 +1,359 @@
commit ca0c0d0f26fbf75b9cacc65122b457e8fdec40b8
Author: Pierre Blanchard <pierre.blanchard@arm.com>
Date: Mon Dec 9 15:55:39 2024 +0000
AArch64: Improve codegen in users of ADVSIMD log1p helper
Add inline helper for log1p and rearrange operations so MOV
is not necessary in reduction or around the special-case handler.
Reduce memory access by using more indexed MLAs in polynomial.
Speedup on Neoverse V1 for log1p (3.5%), acosh (7.5%) and atanh (10%).
Conflicts:
sysdeps/aarch64/fpu/log1p_advsimd.c
(Fixup context to apply without out-of-scope dependency 751a5502)
diff --git a/sysdeps/aarch64/fpu/acosh_advsimd.c b/sysdeps/aarch64/fpu/acosh_advsimd.c
index c88283cf1191f4eb..a98f4a2e4d8cbf42 100644
--- a/sysdeps/aarch64/fpu/acosh_advsimd.c
+++ b/sysdeps/aarch64/fpu/acosh_advsimd.c
@@ -54,9 +54,8 @@ VPCS_ATTR float64x2_t V_NAME_D1 (acosh) (float64x2_t x)
x = vbslq_f64 (special, vreinterpretq_f64_u64 (d->one), x);
#endif
- float64x2_t xm1 = vsubq_f64 (x, v_f64 (1));
- float64x2_t y;
- y = vaddq_f64 (x, v_f64 (1));
+ float64x2_t xm1 = vsubq_f64 (x, v_f64 (1.0));
+ float64x2_t y = vaddq_f64 (x, v_f64 (1.0));
y = vmulq_f64 (y, xm1);
y = vsqrtq_f64 (y);
y = vaddq_f64 (xm1, y);
diff --git a/sysdeps/aarch64/fpu/atanh_advsimd.c b/sysdeps/aarch64/fpu/atanh_advsimd.c
index 3c3d0bd6ad41396d..eb9769aeac29cf15 100644
--- a/sysdeps/aarch64/fpu/atanh_advsimd.c
+++ b/sysdeps/aarch64/fpu/atanh_advsimd.c
@@ -23,15 +23,19 @@
const static struct data
{
struct v_log1p_data log1p_consts;
- uint64x2_t one, half;
+ uint64x2_t one;
+ uint64x2_t sign_mask;
} data = { .log1p_consts = V_LOG1P_CONSTANTS_TABLE,
.one = V2 (0x3ff0000000000000),
- .half = V2 (0x3fe0000000000000) };
+ .sign_mask = V2 (0x8000000000000000) };
static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+special_case (float64x2_t x, float64x2_t halfsign, float64x2_t y,
+ uint64x2_t special, const struct data *d)
{
- return v_call_f64 (atanh, x, y, special);
+ y = log1p_inline (y, &d->log1p_consts);
+ return v_call_f64 (atanh, vbslq_f64 (d->sign_mask, halfsign, x),
+ vmulq_f64 (halfsign, y), special);
}
/* Approximation for vector double-precision atanh(x) using modified log1p.
@@ -43,11 +47,10 @@ float64x2_t V_NAME_D1 (atanh) (float64x2_t x)
{
const struct data *d = ptr_barrier (&data);
+ float64x2_t halfsign = vbslq_f64 (d->sign_mask, x, v_f64 (0.5));
float64x2_t ax = vabsq_f64 (x);
uint64x2_t ia = vreinterpretq_u64_f64 (ax);
- uint64x2_t sign = veorq_u64 (vreinterpretq_u64_f64 (x), ia);
uint64x2_t special = vcgeq_u64 (ia, d->one);
- float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->half));
#if WANT_SIMD_EXCEPT
ax = v_zerofy_f64 (ax, special);
@@ -55,10 +58,15 @@ float64x2_t V_NAME_D1 (atanh) (float64x2_t x)
float64x2_t y;
y = vaddq_f64 (ax, ax);
- y = vdivq_f64 (y, vsubq_f64 (v_f64 (1), ax));
- y = log1p_inline (y, &d->log1p_consts);
+ y = vdivq_f64 (y, vsubq_f64 (vreinterpretq_f64_u64 (d->one), ax));
if (__glibc_unlikely (v_any_u64 (special)))
- return special_case (x, vmulq_f64 (y, halfsign), special);
+#if WANT_SIMD_EXCEPT
+ return special_case (x, halfsign, y, special, d);
+#else
+ return special_case (ax, halfsign, y, special, d);
+#endif
+
+ y = log1p_inline (y, &d->log1p_consts);
return vmulq_f64 (y, halfsign);
}
diff --git a/sysdeps/aarch64/fpu/log1p_advsimd.c b/sysdeps/aarch64/fpu/log1p_advsimd.c
index ffc418fc9c24be28..9d18578ce6497787 100644
--- a/sysdeps/aarch64/fpu/log1p_advsimd.c
+++ b/sysdeps/aarch64/fpu/log1p_advsimd.c
@@ -17,43 +17,26 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include "v_math.h"
-#include "poly_advsimd_f64.h"
+#define WANT_V_LOG1P_K0_SHORTCUT 0
+#include "v_log1p_inline.h"
const static struct data
{
- float64x2_t poly[19], ln2[2];
- uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask, inf, minus_one;
- int64x2_t one_top;
-} data = {
- /* Generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1]. */
- .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2),
- V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3),
- V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3),
- V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4),
- V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4),
- V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4),
- V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4),
- V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5),
- V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4),
- V2 (-0x1.cfa7385bdb37ep-6) },
- .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) },
- /* top32(asuint64(sqrt(2)/2)) << 32. */
- .hf_rt2_top = V2 (0x3fe6a09e00000000),
- /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32. */
- .one_m_hf_rt2_top = V2 (0x00095f6200000000),
- .umask = V2 (0x000fffff00000000),
- .one_top = V2 (0x3ff),
- .inf = V2 (0x7ff0000000000000),
- .minus_one = V2 (0xbff0000000000000)
-};
+ struct v_log1p_data d;
+ uint64x2_t inf, minus_one;
+} data = { .d = V_LOG1P_CONSTANTS_TABLE,
+ .inf = V2 (0x7ff0000000000000),
+ .minus_one = V2 (0xbff0000000000000) };
#define BottomMask v_u64 (0xffffffff)
-static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, uint64x2_t cmp, const struct data *d)
{
- return v_call_f64 (log1p, x, y, special);
+ /* Side-step special lanes so fenv exceptions are not triggered
+ inadvertently. */
+ float64x2_t x_nospecial = v_zerofy_f64 (x, cmp);
+ return v_call_f64 (log1p, x, log1p_inline (x_nospecial, &d->d), cmp);
}
/* Vector log1p approximation using polynomial on reduced interval. Routine is
@@ -66,64 +49,12 @@ VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x)
const struct data *d = ptr_barrier (&data);
uint64x2_t ix = vreinterpretq_u64_f64 (x);
uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
- uint64x2_t special = vcgeq_u64 (ia, d->inf);
-#if WANT_SIMD_EXCEPT
- special = vorrq_u64 (special,
- vcgeq_u64 (ix, vreinterpretq_u64_f64 (v_f64 (-1))));
- if (__glibc_unlikely (v_any_u64 (special)))
- x = v_zerofy_f64 (x, special);
-#else
- special = vorrq_u64 (special, vcleq_f64 (x, v_f64 (-1)));
-#endif
+ uint64x2_t special_cases
+ = vorrq_u64 (vcgeq_u64 (ia, d->inf), vcgeq_u64 (ix, d->minus_one));
- /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
- is in [sqrt(2)/2, sqrt(2)]):
- log1p(x) = k*log(2) + log1p(f).
+ if (__glibc_unlikely (v_any_u64 (special_cases)))
+ return special_case (x, special_cases, d);
- f may not be representable exactly, so we need a correction term:
- let m = round(1 + x), c = (1 + x) - m.
- c << m: at very small x, log1p(x) ~ x, hence:
- log(1+x) - log(m) ~ c/m.
-
- We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m. */
-
- /* Obtain correctly scaled k by manipulation in the exponent.
- The scalar algorithm casts down to 32-bit at this point to calculate k and
- u_red. We stay in double-width to obtain f and k, using the same constants
- as the scalar algorithm but shifted left by 32. */
- float64x2_t m = vaddq_f64 (x, v_f64 (1));
- uint64x2_t mi = vreinterpretq_u64_f64 (m);
- uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
-
- int64x2_t ki
- = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top);
- float64x2_t k = vcvtq_f64_s64 (ki);
-
- /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
- uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
- uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask));
- float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1));
-
- /* Correction term c/m. */
- float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m);
-
- /* Approximate log1p(x) on the reduced input using a polynomial. Because
- log1p(0)=0 we choose an approximation of the form:
- x + C0*x^2 + C1*x^3 + C2x^4 + ...
- Hence approximation has the form f + f^2 * P(f)
- where P(x) = C0 + C1*x + C2x^2 + ...
- Assembling this all correctly is dealt with at the final step. */
- float64x2_t f2 = vmulq_f64 (f, f);
- float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly);
-
- float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]);
- float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]);
- float64x2_t y = vaddq_f64 (ylo, yhi);
-
- if (__glibc_unlikely (v_any_u64 (special)))
- return special_case (vreinterpretq_f64_u64 (ix), vfmaq_f64 (y, f2, p),
- special);
-
- return vfmaq_f64 (y, f2, p);
+ return log1p_inline (x, &d->d);
}
diff --git a/sysdeps/aarch64/fpu/v_log1p_inline.h b/sysdeps/aarch64/fpu/v_log1p_inline.h
index 242e43b6eecc0b6e..834ff65adf34ed4a 100644
--- a/sysdeps/aarch64/fpu/v_log1p_inline.h
+++ b/sysdeps/aarch64/fpu/v_log1p_inline.h
@@ -21,29 +21,30 @@
#define AARCH64_FPU_V_LOG1P_INLINE_H
#include "v_math.h"
-#include "poly_advsimd_f64.h"
struct v_log1p_data
{
- float64x2_t poly[19], ln2[2];
+ float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16;
uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask;
int64x2_t one_top;
+ double c1, c3, c5, c7, c9, c11, c13, c15, c17, c18;
+ double ln2[2];
};
/* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1]. */
#define V_LOG1P_CONSTANTS_TABLE \
{ \
- .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2), \
- V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3), \
- V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3), \
- V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4), \
- V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4), \
- V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4), \
- V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4), \
- V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5), \
- V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4), \
- V2 (-0x1.cfa7385bdb37ep-6) }, \
- .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) }, \
+ .c0 = V2 (-0x1.ffffffffffffbp-2), .c1 = 0x1.55555555551a9p-2, \
+ .c2 = V2 (-0x1.00000000008e3p-2), .c3 = 0x1.9999999a32797p-3, \
+ .c4 = V2 (-0x1.555555552fecfp-3), .c5 = 0x1.249248e071e5ap-3, \
+ .c6 = V2 (-0x1.ffffff8bf8482p-4), .c7 = 0x1.c71c8f07da57ap-4, \
+ .c8 = V2 (-0x1.9999ca4ccb617p-4), .c9 = 0x1.7459ad2e1dfa3p-4, \
+ .c10 = V2 (-0x1.554d2680a3ff2p-4), .c11 = 0x1.3b4c54d487455p-4, \
+ .c12 = V2 (-0x1.2548a9ffe80e6p-4), .c13 = 0x1.0f389a24b2e07p-4, \
+ .c14 = V2 (-0x1.eee4db15db335p-5), .c15 = 0x1.e95b494d4a5ddp-5, \
+ .c16 = V2 (-0x1.15fdf07cb7c73p-4), .c17 = 0x1.0310b70800fcfp-4, \
+ .c18 = -0x1.cfa7385bdb37ep-6, \
+ .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 }, \
.hf_rt2_top = V2 (0x3fe6a09e00000000), \
.one_m_hf_rt2_top = V2 (0x00095f6200000000), \
.umask = V2 (0x000fffff00000000), .one_top = V2 (0x3ff) \
@@ -51,19 +52,45 @@ struct v_log1p_data
#define BottomMask v_u64 (0xffffffff)
+static inline float64x2_t
+eval_poly (float64x2_t m, float64x2_t m2, const struct v_log1p_data *d)
+{
+ /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner. */
+ float64x2_t c13 = vld1q_f64 (&d->c1);
+ float64x2_t c57 = vld1q_f64 (&d->c5);
+ float64x2_t c911 = vld1q_f64 (&d->c9);
+ float64x2_t c1315 = vld1q_f64 (&d->c13);
+ float64x2_t c1718 = vld1q_f64 (&d->c17);
+ float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, m, c1718, 0);
+ float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, m, c1315, 1);
+ float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, m, c1315, 0);
+ float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, m, c911, 1);
+ float64x2_t p89 = vfmaq_laneq_f64 (d->c8, m, c911, 0);
+ float64x2_t p67 = vfmaq_laneq_f64 (d->c6, m, c57, 1);
+ float64x2_t p45 = vfmaq_laneq_f64 (d->c4, m, c57, 0);
+ float64x2_t p23 = vfmaq_laneq_f64 (d->c2, m, c13, 1);
+ float64x2_t p01 = vfmaq_laneq_f64 (d->c0, m, c13, 0);
+ float64x2_t p = vfmaq_laneq_f64 (p1617, m2, c1718, 1);
+ p = vfmaq_f64 (p1415, m2, p);
+ p = vfmaq_f64 (p1213, m2, p);
+ p = vfmaq_f64 (p1011, m2, p);
+ p = vfmaq_f64 (p89, m2, p);
+ p = vfmaq_f64 (p67, m2, p);
+ p = vfmaq_f64 (p45, m2, p);
+ p = vfmaq_f64 (p23, m2, p);
+ return vfmaq_f64 (p01, m2, p);
+}
+
static inline float64x2_t
log1p_inline (float64x2_t x, const struct v_log1p_data *d)
{
- /* Helper for calculating log(x + 1). Copied from v_log1p_2u5.c, with several
- modifications:
+ /* Helper for calculating log(x + 1):
- No special-case handling - this should be dealt with by the caller.
- - Pairwise Horner polynomial evaluation for improved accuracy.
- Optionally simulate the shortcut for k=0, used in the scalar routine,
- using v_sel, for improved accuracy when the argument to log1p is close to
- 0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1 in
- the source of the caller before including this file.
- See v_log1pf_2u1.c for details of the algorithm. */
- float64x2_t m = vaddq_f64 (x, v_f64 (1));
+ using v_sel, for improved accuracy when the argument to log1p is close
+ to 0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1
+ in the source of the caller before including this file. */
+ float64x2_t m = vaddq_f64 (x, v_f64 (1.0));
uint64x2_t mi = vreinterpretq_u64_f64 (m);
uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
@@ -74,14 +101,14 @@ log1p_inline (float64x2_t x, const struct v_log1p_data *d)
/* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask));
- float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1));
+ float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1.0));
/* Correction term c/m. */
- float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m);
+ float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1.0))), m);
#ifndef WANT_V_LOG1P_K0_SHORTCUT
-#error \
- "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
+# error \
+ "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
#elif WANT_V_LOG1P_K0_SHORTCUT
/* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
that the approximation is solely the polynomial. */
@@ -92,11 +119,12 @@ log1p_inline (float64x2_t x, const struct v_log1p_data *d)
/* Approximate log1p(f) on the reduced input using a polynomial. */
float64x2_t f2 = vmulq_f64 (f, f);
- float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly);
+ float64x2_t p = eval_poly (f, f2, d);
/* Assemble log1p(x) = k * log2 + log1p(f) + c/m. */
- float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]);
- float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]);
+ float64x2_t ln2 = vld1q_f64 (&d->ln2[0]);
+ float64x2_t ylo = vfmaq_laneq_f64 (cm, k, ln2, 1);
+ float64x2_t yhi = vfmaq_laneq_f64 (f, k, ln2, 0);
return vfmaq_f64 (vaddq_f64 (ylo, yhi), f2, p);
}

216
glibc-RHEL-118273-21.patch Normal file
View File

@ -0,0 +1,216 @@
commit 569cfaaf4984ae70b23c61ee28a609b5aef93fea
Author: Pierre Blanchard <pierre.blanchard@arm.com>
Date: Mon Dec 9 15:53:04 2024 +0000
AArch64: Improve codegen in AdvSIMD pow
Remove spurious ADRP. Improve memory access by shuffling constants and
using more indexed MLAs.
A few more optimisation with no impact on accuracy
- force fmas contraction
- switch from shift-aided rint to rint instruction
Between 1 and 5% throughput improvement on Neoverse
V1 depending on benchmark.
diff --git a/sysdeps/aarch64/fpu/pow_advsimd.c b/sysdeps/aarch64/fpu/pow_advsimd.c
index 3c91e3e183599e3e..81e134ac2f0bd2f5 100644
--- a/sysdeps/aarch64/fpu/pow_advsimd.c
+++ b/sysdeps/aarch64/fpu/pow_advsimd.c
@@ -22,9 +22,6 @@
/* Defines parameters of the approximation and scalar fallback. */
#include "finite_pow.h"
-#define VecSmallExp v_u64 (SmallExp)
-#define VecThresExp v_u64 (ThresExp)
-
#define VecSmallPowX v_u64 (SmallPowX)
#define VecThresPowX v_u64 (ThresPowX)
#define VecSmallPowY v_u64 (SmallPowY)
@@ -32,36 +29,48 @@
static const struct data
{
- float64x2_t log_poly[6];
- float64x2_t exp_poly[3];
- float64x2_t ln2_hi, ln2_lo;
- float64x2_t shift, inv_ln2_n, ln2_hi_n, ln2_lo_n, small_powx;
uint64x2_t inf;
+ float64x2_t small_powx;
+ uint64x2_t offset, mask;
+ uint64x2_t mask_sub_0, mask_sub_1;
+ float64x2_t log_c0, log_c2, log_c4, log_c5;
+ double log_c1, log_c3;
+ double ln2_lo, ln2_hi;
+ uint64x2_t small_exp, thres_exp;
+ double ln2_lo_n, ln2_hi_n;
+ double inv_ln2_n, exp_c2;
+ float64x2_t exp_c0, exp_c1;
} data = {
+ /* Power threshold. */
+ .inf = V2 (0x7ff0000000000000),
+ .small_powx = V2 (0x1p-126),
+ .offset = V2 (Off),
+ .mask = V2 (0xfffULL << 52),
+ .mask_sub_0 = V2 (1ULL << 52),
+ .mask_sub_1 = V2 (52ULL << 52),
/* Coefficients copied from v_pow_log_data.c
relative error: 0x1.11922ap-70 in [-0x1.6bp-8, 0x1.6bp-8]
Coefficients are scaled to match the scaling during evaluation. */
- .log_poly
- = { V2 (0x1.555555555556p-2 * -2), V2 (-0x1.0000000000006p-2 * -2),
- V2 (0x1.999999959554ep-3 * 4), V2 (-0x1.555555529a47ap-3 * 4),
- V2 (0x1.2495b9b4845e9p-3 * -8), V2 (-0x1.0002b8b263fc3p-3 * -8) },
- .ln2_hi = V2 (0x1.62e42fefa3800p-1),
- .ln2_lo = V2 (0x1.ef35793c76730p-45),
+ .log_c0 = V2 (0x1.555555555556p-2 * -2),
+ .log_c1 = -0x1.0000000000006p-2 * -2,
+ .log_c2 = V2 (0x1.999999959554ep-3 * 4),
+ .log_c3 = -0x1.555555529a47ap-3 * 4,
+ .log_c4 = V2 (0x1.2495b9b4845e9p-3 * -8),
+ .log_c5 = V2 (-0x1.0002b8b263fc3p-3 * -8),
+ .ln2_hi = 0x1.62e42fefa3800p-1,
+ .ln2_lo = 0x1.ef35793c76730p-45,
/* Polynomial coefficients: abs error: 1.43*2^-58, ulp error: 0.549
(0.550 without fma) if |x| < ln2/512. */
- .exp_poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6ef9p-3),
- V2 (0x1.5555576a5adcep-5) },
- .shift = V2 (0x1.8p52), /* round to nearest int. without intrinsics. */
- .inv_ln2_n = V2 (0x1.71547652b82fep8), /* N/ln2. */
- .ln2_hi_n = V2 (0x1.62e42fefc0000p-9), /* ln2/N. */
- .ln2_lo_n = V2 (-0x1.c610ca86c3899p-45),
- .small_powx = V2 (0x1p-126),
- .inf = V2 (0x7ff0000000000000)
+ .exp_c0 = V2 (0x1.fffffffffffd4p-2),
+ .exp_c1 = V2 (0x1.5555571d6ef9p-3),
+ .exp_c2 = 0x1.5555576a5adcep-5,
+ .small_exp = V2 (0x3c90000000000000),
+ .thres_exp = V2 (0x03f0000000000000),
+ .inv_ln2_n = 0x1.71547652b82fep8, /* N/ln2. */
+ .ln2_hi_n = 0x1.62e42fefc0000p-9, /* ln2/N. */
+ .ln2_lo_n = -0x1.c610ca86c3899p-45,
};
-#define A(i) data.log_poly[i]
-#define C(i) data.exp_poly[i]
-
/* This version implements an algorithm close to scalar pow but
- does not implement the trick in the exp's specialcase subroutine to avoid
double-rounding,
@@ -91,10 +100,9 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d)
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
- uint64x2_t tmp = vsubq_u64 (ix, v_u64 (Off));
- int64x2_t k
- = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift. */
- uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, v_u64 (0xfffULL << 52)));
+ uint64x2_t tmp = vsubq_u64 (ix, d->offset);
+ int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
+ uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->mask));
float64x2_t z = vreinterpretq_f64_u64 (iz);
float64x2_t kd = vcvtq_f64_s64 (k);
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
@@ -105,9 +113,10 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d)
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, invc);
/* k*Ln2 + log(c) + r. */
- float64x2_t t1 = vfmaq_f64 (logc, kd, d->ln2_hi);
+ float64x2_t ln2 = vld1q_f64 (&d->ln2_lo);
+ float64x2_t t1 = vfmaq_laneq_f64 (logc, kd, ln2, 1);
float64x2_t t2 = vaddq_f64 (t1, r);
- float64x2_t lo1 = vfmaq_f64 (logctail, kd, d->ln2_lo);
+ float64x2_t lo1 = vfmaq_laneq_f64 (logctail, kd, ln2, 0);
float64x2_t lo2 = vaddq_f64 (vsubq_f64 (t1, t2), r);
/* Evaluation is optimized assuming superscalar pipelined execution. */
float64x2_t ar = vmulq_f64 (v_f64 (-0.5), r);
@@ -118,9 +127,10 @@ v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d)
float64x2_t lo3 = vfmaq_f64 (vnegq_f64 (ar2), ar, r);
float64x2_t lo4 = vaddq_f64 (vsubq_f64 (t2, hi), ar2);
/* p = log1p(r) - r - A[0]*r*r. */
- float64x2_t a56 = vfmaq_f64 (A (4), r, A (5));
- float64x2_t a34 = vfmaq_f64 (A (2), r, A (3));
- float64x2_t a12 = vfmaq_f64 (A (0), r, A (1));
+ float64x2_t odd_coeffs = vld1q_f64 (&d->log_c1);
+ float64x2_t a56 = vfmaq_f64 (d->log_c4, r, d->log_c5);
+ float64x2_t a34 = vfmaq_laneq_f64 (d->log_c2, r, odd_coeffs, 1);
+ float64x2_t a12 = vfmaq_laneq_f64 (d->log_c0, r, odd_coeffs, 0);
float64x2_t p = vfmaq_f64 (a34, ar2, a56);
p = vfmaq_f64 (a12, ar2, p);
p = vmulq_f64 (ar3, p);
@@ -140,28 +150,28 @@ exp_special_case (float64x2_t x, float64x2_t xtail)
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. */
static inline float64x2_t
-v_exp_inline (float64x2_t x, float64x2_t xtail, const struct data *d)
+v_exp_inline (float64x2_t x, float64x2_t neg_xtail, const struct data *d)
{
/* Fallback to scalar exp_inline for all lanes if any lane
contains value of x s.t. |x| <= 2^-54 or >= 512. */
- uint64x2_t abstop
- = vshrq_n_u64 (vandq_u64 (vreinterpretq_u64_f64 (x), d->inf), 52);
- uint64x2_t uoflowx
- = vcgeq_u64 (vsubq_u64 (abstop, VecSmallExp), VecThresExp);
+ uint64x2_t uoflowx = vcgeq_u64 (
+ vsubq_u64 (vreinterpretq_u64_f64 (vabsq_f64 (x)), d->small_exp),
+ d->thres_exp);
if (__glibc_unlikely (v_any_u64 (uoflowx)))
- return exp_special_case (x, xtail);
+ return exp_special_case (x, vnegq_f64 (neg_xtail));
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N]. */
- float64x2_t z = vmulq_f64 (d->inv_ln2_n, x);
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
- float64x2_t kd = vaddq_f64 (z, d->shift);
- uint64x2_t ki = vreinterpretq_u64_f64 (kd);
- kd = vsubq_f64 (kd, d->shift);
- float64x2_t r = vfmsq_f64 (x, kd, d->ln2_hi_n);
- r = vfmsq_f64 (r, kd, d->ln2_lo_n);
+ float64x2_t exp_consts = vld1q_f64 (&d->inv_ln2_n);
+ float64x2_t z = vmulq_laneq_f64 (x, exp_consts, 0);
+ float64x2_t kd = vrndnq_f64 (z);
+ uint64x2_t ki = vreinterpretq_u64_s64 (vcvtaq_s64_f64 (z));
+ float64x2_t ln2_n = vld1q_f64 (&d->ln2_lo_n);
+ float64x2_t r = vfmsq_laneq_f64 (x, kd, ln2_n, 1);
+ r = vfmsq_laneq_f64 (r, kd, ln2_n, 0);
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
- r = vaddq_f64 (r, xtail);
+ r = vsubq_f64 (r, neg_xtail);
/* 2^(k/N) ~= scale. */
uint64x2_t idx = vandq_u64 (ki, v_u64 (N_EXP - 1));
uint64x2_t top = vshlq_n_u64 (ki, 52 - V_POW_EXP_TABLE_BITS);
@@ -170,8 +180,8 @@ v_exp_inline (float64x2_t x, float64x2_t xtail, const struct data *d)
sbits = vaddq_u64 (sbits, top);
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */
float64x2_t r2 = vmulq_f64 (r, r);
- float64x2_t tmp = vfmaq_f64 (C (1), r, C (2));
- tmp = vfmaq_f64 (C (0), r, tmp);
+ float64x2_t tmp = vfmaq_laneq_f64 (d->exp_c1, r, exp_consts, 1);
+ tmp = vfmaq_f64 (d->exp_c0, r, tmp);
tmp = vfmaq_f64 (r, r2, tmp);
float64x2_t scale = vreinterpretq_f64_u64 (sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
@@ -230,8 +240,8 @@ float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)
{
/* Normalize subnormal x so exponent becomes negative. */
uint64x2_t vix_norm = vreinterpretq_u64_f64 (
- vabsq_f64 (vmulq_f64 (x, vcvtq_f64_u64 (v_u64 (1ULL << 52)))));
- vix_norm = vsubq_u64 (vix_norm, v_u64 (52ULL << 52));
+ vabsq_f64 (vmulq_f64 (x, vcvtq_f64_u64 (d->mask_sub_0))));
+ vix_norm = vsubq_u64 (vix_norm, d->mask_sub_1);
vix = vbslq_u64 (sub_x, vix_norm, vix);
}
}
@@ -242,8 +252,7 @@ float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)
/* Vector Exp(y_loghi, y_loglo). */
float64x2_t vehi = vmulq_f64 (y, vhi);
- float64x2_t velo = vmulq_f64 (y, vlo);
float64x2_t vemi = vfmsq_f64 (vehi, y, vhi);
- velo = vsubq_f64 (velo, vemi);
- return v_exp_inline (vehi, velo, d);
+ float64x2_t neg_velo = vfmsq_f64 (vemi, y, vlo);
+ return v_exp_inline (vehi, neg_velo, d);
}

501
glibc-RHEL-118273-22.patch Normal file
View File

@ -0,0 +1,501 @@
commit cff9648d0b50d19cdaf685f6767add040d4e1a8e
Author: Joana Cruz <Joana.Cruz@arm.com>
Date: Tue Dec 17 14:50:33 2024 +0000
AArch64: Improve codegen of AdvSIMD expf family
Load the polynomial evaluation coefficients into 2 vectors and use lanewise MLAs.
Also use intrinsics instead of native operations.
expf: 3% improvement in throughput microbenchmark on Neoverse V1, exp2f: 5%,
exp10f: 13%, coshf: 14%.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/coshf_advsimd.c b/sysdeps/aarch64/fpu/coshf_advsimd.c
index c1ab4923b826569b..cd5c86652129ea9c 100644
--- a/sysdeps/aarch64/fpu/coshf_advsimd.c
+++ b/sysdeps/aarch64/fpu/coshf_advsimd.c
@@ -23,19 +23,27 @@
static const struct data
{
struct v_expf_data expf_consts;
- uint32x4_t tiny_bound, special_bound;
+ uint32x4_t tiny_bound;
+ float32x4_t bound;
+#if WANT_SIMD_EXCEPT
+ uint32x4_t special_bound;
+#endif
} data = {
.expf_consts = V_EXPF_DATA,
.tiny_bound = V4 (0x20000000), /* 0x1p-63: Round to 1 below this. */
/* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */
+ .bound = V4 (0x1.5a92d8p+6),
+#if WANT_SIMD_EXCEPT
.special_bound = V4 (0x42ad496c),
+#endif
};
#if !WANT_SIMD_EXCEPT
static float32x4_t NOINLINE VPCS_ATTR
-special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+special_case (float32x4_t x, float32x4_t half_t, float32x4_t half_over_t,
+ uint32x4_t special)
{
- return v_call_f32 (coshf, x, y, special);
+ return v_call_f32 (coshf, x, vaddq_f32 (half_t, half_over_t), special);
}
#endif
@@ -47,14 +55,13 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x)
{
const struct data *d = ptr_barrier (&data);
- float32x4_t ax = vabsq_f32 (x);
- uint32x4_t iax = vreinterpretq_u32_f32 (ax);
- uint32x4_t special = vcgeq_u32 (iax, d->special_bound);
-
#if WANT_SIMD_EXCEPT
/* If fp exceptions are to be triggered correctly, fall back to the scalar
variant for all inputs if any input is a special value or above the bound
at which expf overflows. */
+ float32x4_t ax = vabsq_f32 (x);
+ uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+ uint32x4_t special = vcgeq_u32 (iax, d->special_bound);
if (__glibc_unlikely (v_any_u32 (special)))
return v_call_f32 (coshf, x, x, v_u32 (-1));
@@ -63,10 +70,13 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x)
input to 0, which will generate no exceptions. */
if (__glibc_unlikely (v_any_u32 (tiny)))
ax = v_zerofy_f32 (ax, tiny);
+ float32x4_t t = v_expf_inline (ax, &d->expf_consts);
+#else
+ uint32x4_t special = vcageq_f32 (x, d->bound);
+ float32x4_t t = v_expf_inline (x, &d->expf_consts);
#endif
/* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */
- float32x4_t t = v_expf_inline (ax, &d->expf_consts);
float32x4_t half_t = vmulq_n_f32 (t, 0.5);
float32x4_t half_over_t = vdivq_f32 (v_f32 (0.5), t);
@@ -75,7 +85,7 @@ float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x)
return vbslq_f32 (tiny, v_f32 (1), vaddq_f32 (half_t, half_over_t));
#else
if (__glibc_unlikely (v_any_u32 (special)))
- return special_case (x, vaddq_f32 (half_t, half_over_t), special);
+ return special_case (x, half_t, half_over_t, special);
#endif
return vaddq_f32 (half_t, half_over_t);
diff --git a/sysdeps/aarch64/fpu/exp10f_advsimd.c b/sysdeps/aarch64/fpu/exp10f_advsimd.c
index cf53e73290fcedb6..55d9cd83f2968ab9 100644
--- a/sysdeps/aarch64/fpu/exp10f_advsimd.c
+++ b/sysdeps/aarch64/fpu/exp10f_advsimd.c
@@ -18,16 +18,15 @@
<https://www.gnu.org/licenses/>. */
#include "v_math.h"
-#include "poly_advsimd_f32.h"
#define ScaleBound 192.0f
static const struct data
{
- float32x4_t poly[5];
- float log10_2_and_inv[4];
- float32x4_t shift;
-
+ float32x4_t c0, c1, c3;
+ float log10_2_high, log10_2_low, c2, c4;
+ float32x4_t inv_log10_2, special_bound;
+ uint32x4_t exponent_bias, special_offset, special_bias;
#if !WANT_SIMD_EXCEPT
float32x4_t scale_thresh;
#endif
@@ -37,19 +36,24 @@ static const struct data
rel error: 0x1.89dafa3p-24
abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
maxerr: 1.85943 +0.5 ulp. */
- .poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f),
- V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) },
- .shift = V4 (0x1.8p23f),
-
- /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0. */
- .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 },
+ .c0 = V4 (0x1.26bb16p+1f),
+ .c1 = V4 (0x1.5350d2p+1f),
+ .c2 = 0x1.04744ap+1f,
+ .c3 = V4 (0x1.2d8176p+0f),
+ .c4 = 0x1.12b41ap-1f,
+ .inv_log10_2 = V4 (0x1.a934fp+1),
+ .log10_2_high = 0x1.344136p-2,
+ .log10_2_low = 0x1.ec10cp-27,
+ /* rint (log2 (2^127 / (1 + sqrt (2)))). */
+ .special_bound = V4 (126.0f),
+ .exponent_bias = V4 (0x3f800000),
+ .special_offset = V4 (0x82000000),
+ .special_bias = V4 (0x7f000000),
#if !WANT_SIMD_EXCEPT
.scale_thresh = V4 (ScaleBound)
#endif
};
-#define ExponentBias v_u32 (0x3f800000)
-
#if WANT_SIMD_EXCEPT
# define SpecialBound 38.0f /* rint(log10(2^127)). */
@@ -67,17 +71,15 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
#else
-# define SpecialBound 126.0f /* rint (log2 (2^127 / (1 + sqrt (2)))). */
-# define SpecialOffset v_u32 (0x82000000)
-# define SpecialBias v_u32 (0x7f000000)
+# define SpecialBound 126.0f
static float32x4_t VPCS_ATTR NOINLINE
special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
float32x4_t scale, const struct data *d)
{
/* 2^n may overflow, break it up into s1*s2. */
- uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset);
- float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias));
+ uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+ float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
float32x4_t r2 = vmulq_f32 (s1, s1);
@@ -112,23 +114,23 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x)
/* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)),
with poly(r) in [1/sqrt(2), sqrt(2)] and
x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2]. */
- float32x4_t log10_2_and_inv = vld1q_f32 (d->log10_2_and_inv);
- float32x4_t z = vfmaq_laneq_f32 (d->shift, x, log10_2_and_inv, 0);
- float32x4_t n = vsubq_f32 (z, d->shift);
- float32x4_t r = vfmsq_laneq_f32 (x, n, log10_2_and_inv, 1);
- r = vfmsq_laneq_f32 (r, n, log10_2_and_inv, 2);
- uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
+ float32x4_t log10_2_c24 = vld1q_f32 (&d->log10_2_high);
+ float32x4_t n = vrndaq_f32 (vmulq_f32 (x, d->inv_log10_2));
+ float32x4_t r = vfmsq_laneq_f32 (x, n, log10_2_c24, 0);
+ r = vfmaq_laneq_f32 (r, n, log10_2_c24, 1);
+ uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (n)), 23);
- float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
+ float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
#if !WANT_SIMD_EXCEPT
- uint32x4_t cmp = vcagtq_f32 (n, v_f32 (SpecialBound));
+ uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
#endif
float32x4_t r2 = vmulq_f32 (r, r);
- float32x4_t poly
- = vfmaq_f32 (vmulq_f32 (r, d->poly[0]),
- v_pairwise_poly_3_f32 (r, r2, d->poly + 1), r2);
+ float32x4_t p12 = vfmaq_laneq_f32 (d->c1, r, log10_2_c24, 2);
+ float32x4_t p34 = vfmaq_laneq_f32 (d->c3, r, log10_2_c24, 3);
+ float32x4_t p14 = vfmaq_f32 (p12, r2, p34);
+ float32x4_t poly = vfmaq_f32 (vmulq_f32 (r, d->c0), p14, r2);
if (__glibc_unlikely (v_any_u32 (cmp)))
#if WANT_SIMD_EXCEPT
diff --git a/sysdeps/aarch64/fpu/exp2f_advsimd.c b/sysdeps/aarch64/fpu/exp2f_advsimd.c
index 69e0b193a1a91249..a4220da63c624490 100644
--- a/sysdeps/aarch64/fpu/exp2f_advsimd.c
+++ b/sysdeps/aarch64/fpu/exp2f_advsimd.c
@@ -21,24 +21,28 @@
static const struct data
{
- float32x4_t poly[5];
- uint32x4_t exponent_bias;
+ float32x4_t c1, c3;
+ uint32x4_t exponent_bias, special_offset, special_bias;
#if !WANT_SIMD_EXCEPT
- float32x4_t special_bound, scale_thresh;
+ float32x4_t scale_thresh, special_bound;
#endif
+ float c0, c2, c4, zero;
} data = {
/* maxerr: 1.962 ulp. */
- .poly = { V4 (0x1.59977ap-10f), V4 (0x1.3ce9e4p-7f), V4 (0x1.c6bd32p-5f),
- V4 (0x1.ebf9bcp-3f), V4 (0x1.62e422p-1f) },
+ .c0 = 0x1.59977ap-10f,
+ .c1 = V4 (0x1.3ce9e4p-7f),
+ .c2 = 0x1.c6bd32p-5f,
+ .c3 = V4 (0x1.ebf9bcp-3f),
+ .c4 = 0x1.62e422p-1f,
.exponent_bias = V4 (0x3f800000),
+ .special_offset = V4 (0x82000000),
+ .special_bias = V4 (0x7f000000),
#if !WANT_SIMD_EXCEPT
.special_bound = V4 (126.0f),
.scale_thresh = V4 (192.0f),
#endif
};
-#define C(i) d->poly[i]
-
#if WANT_SIMD_EXCEPT
# define TinyBound v_u32 (0x20000000) /* asuint (0x1p-63). */
@@ -55,16 +59,13 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
#else
-# define SpecialOffset v_u32 (0x82000000)
-# define SpecialBias v_u32 (0x7f000000)
-
static float32x4_t VPCS_ATTR NOINLINE
special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
float32x4_t scale, const struct data *d)
{
/* 2^n may overflow, break it up into s1*s2. */
- uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset);
- float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias));
+ uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+ float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
float32x4_t r2 = vmulq_f32 (s1, s1);
@@ -80,13 +81,11 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp2) (float32x4_t x)
{
const struct data *d = ptr_barrier (&data);
- float32x4_t n, r, r2, scale, p, q, poly;
- uint32x4_t cmp, e;
#if WANT_SIMD_EXCEPT
/* asuint(|x|) - TinyBound >= BigBound - TinyBound. */
uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x));
- cmp = vcgeq_u32 (vsubq_u32 (ia, TinyBound), SpecialBound);
+ uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (ia, TinyBound), SpecialBound);
float32x4_t xm = x;
/* If any lanes are special, mask them with 1 and retain a copy of x to allow
special_case to fix special lanes later. This is only necessary if fenv
@@ -95,23 +94,24 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp2) (float32x4_t x)
x = vbslq_f32 (cmp, v_f32 (1), x);
#endif
- /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
- x = n + r, with r in [-1/2, 1/2]. */
- n = vrndaq_f32 (x);
- r = vsubq_f32 (x, n);
- e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), 23);
- scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
+ /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+ x = n + r, with r in [-1/2, 1/2]. */
+ float32x4_t n = vrndaq_f32 (x);
+ float32x4_t r = vsubq_f32 (x, n);
+ uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (x)), 23);
+ float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
#if !WANT_SIMD_EXCEPT
- cmp = vcagtq_f32 (n, d->special_bound);
+ uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
#endif
- r2 = vmulq_f32 (r, r);
- p = vfmaq_f32 (C (1), C (0), r);
- q = vfmaq_f32 (C (3), C (2), r);
+ float32x4_t c024 = vld1q_f32 (&d->c0);
+ float32x4_t r2 = vmulq_f32 (r, r);
+ float32x4_t p = vfmaq_laneq_f32 (d->c1, r, c024, 0);
+ float32x4_t q = vfmaq_laneq_f32 (d->c3, r, c024, 1);
q = vfmaq_f32 (q, p, r2);
- p = vmulq_f32 (C (4), r);
- poly = vfmaq_f32 (p, q, r2);
+ p = vmulq_laneq_f32 (r, c024, 2);
+ float32x4_t poly = vfmaq_f32 (p, q, r2);
if (__glibc_unlikely (v_any_u32 (cmp)))
#if WANT_SIMD_EXCEPT
diff --git a/sysdeps/aarch64/fpu/expf_advsimd.c b/sysdeps/aarch64/fpu/expf_advsimd.c
index 5c9cb726205ece6e..70f137e2e5b46207 100644
--- a/sysdeps/aarch64/fpu/expf_advsimd.c
+++ b/sysdeps/aarch64/fpu/expf_advsimd.c
@@ -21,20 +21,25 @@
static const struct data
{
- float32x4_t poly[5];
- float32x4_t inv_ln2, ln2_hi, ln2_lo;
- uint32x4_t exponent_bias;
+ float32x4_t c1, c3, c4, inv_ln2;
+ float ln2_hi, ln2_lo, c0, c2;
+ uint32x4_t exponent_bias, special_offset, special_bias;
#if !WANT_SIMD_EXCEPT
float32x4_t special_bound, scale_thresh;
#endif
} data = {
/* maxerr: 1.45358 +0.5 ulp. */
- .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f),
- V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) },
+ .c0 = 0x1.0e4020p-7f,
+ .c1 = V4 (0x1.573e2ep-5f),
+ .c2 = 0x1.555e66p-3f,
+ .c3 = V4 (0x1.fffdb6p-2f),
+ .c4 = V4 (0x1.ffffecp-1f),
.inv_ln2 = V4 (0x1.715476p+0f),
- .ln2_hi = V4 (0x1.62e4p-1f),
- .ln2_lo = V4 (0x1.7f7d1cp-20f),
+ .ln2_hi = 0x1.62e4p-1f,
+ .ln2_lo = 0x1.7f7d1cp-20f,
.exponent_bias = V4 (0x3f800000),
+ .special_offset = V4 (0x82000000),
+ .special_bias = V4 (0x7f000000),
#if !WANT_SIMD_EXCEPT
.special_bound = V4 (126.0f),
.scale_thresh = V4 (192.0f),
@@ -59,19 +64,17 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
#else
-# define SpecialOffset v_u32 (0x82000000)
-# define SpecialBias v_u32 (0x7f000000)
-
static float32x4_t VPCS_ATTR NOINLINE
special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
float32x4_t scale, const struct data *d)
{
/* 2^n may overflow, break it up into s1*s2. */
- uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset);
- float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias));
+ uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
+ float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
float32x4_t r2 = vmulq_f32 (s1, s1);
+ // (s2 + p*s2)*s1 = s2(p+1)s1
float32x4_t r1 = vmulq_f32 (vfmaq_f32 (s2, poly, s2), s1);
/* Similar to r1 but avoids double rounding in the subnormal range. */
float32x4_t r0 = vfmaq_f32 (scale, poly, scale);
@@ -84,12 +87,11 @@ special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x)
{
const struct data *d = ptr_barrier (&data);
- float32x4_t n, r, r2, scale, p, q, poly;
- uint32x4_t cmp, e;
+ float32x4_t ln2_c02 = vld1q_f32 (&d->ln2_hi);
#if WANT_SIMD_EXCEPT
/* asuint(x) - TinyBound >= BigBound - TinyBound. */
- cmp = vcgeq_u32 (
+ uint32x4_t cmp = vcgeq_u32 (
vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)),
TinyBound),
SpecialBound);
@@ -103,22 +105,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x)
/* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
- n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2));
- r = vfmsq_f32 (x, n, d->ln2_hi);
- r = vfmsq_f32 (r, n, d->ln2_lo);
- e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23);
- scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
+ float32x4_t n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2));
+ float32x4_t r = vfmsq_laneq_f32 (x, n, ln2_c02, 0);
+ r = vfmsq_laneq_f32 (r, n, ln2_c02, 1);
+ uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23);
+ float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
#if !WANT_SIMD_EXCEPT
- cmp = vcagtq_f32 (n, d->special_bound);
+ uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
#endif
- r2 = vmulq_f32 (r, r);
- p = vfmaq_f32 (C (1), C (0), r);
- q = vfmaq_f32 (C (3), C (2), r);
+ float32x4_t r2 = vmulq_f32 (r, r);
+ float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c02, 2);
+ float32x4_t q = vfmaq_laneq_f32 (d->c3, r, ln2_c02, 3);
q = vfmaq_f32 (q, p, r2);
- p = vmulq_f32 (C (4), r);
- poly = vfmaq_f32 (p, q, r2);
+ p = vmulq_f32 (d->c4, r);
+ float32x4_t poly = vfmaq_f32 (p, q, r2);
if (__glibc_unlikely (v_any_u32 (cmp)))
#if WANT_SIMD_EXCEPT
diff --git a/sysdeps/aarch64/fpu/v_expf_inline.h b/sysdeps/aarch64/fpu/v_expf_inline.h
index 08b06e0a6b34b4f4..eacd2af24161fe3a 100644
--- a/sysdeps/aarch64/fpu/v_expf_inline.h
+++ b/sysdeps/aarch64/fpu/v_expf_inline.h
@@ -24,50 +24,45 @@
struct v_expf_data
{
- float32x4_t poly[5];
- float32x4_t shift;
- float invln2_and_ln2[4];
+ float ln2_hi, ln2_lo, c0, c2;
+ float32x4_t inv_ln2, c1, c3, c4;
+ /* asuint(1.0f). */
+ uint32x4_t exponent_bias;
};
/* maxerr: 1.45358 +0.5 ulp. */
#define V_EXPF_DATA \
{ \
- .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f), \
- V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) }, \
- .shift = V4 (0x1.8p23f), \
- .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, \
+ .c0 = 0x1.0e4020p-7f, .c1 = V4 (0x1.573e2ep-5f), .c2 = 0x1.555e66p-3f, \
+ .c3 = V4 (0x1.fffdb6p-2f), .c4 = V4 (0x1.ffffecp-1f), \
+ .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \
+ .inv_ln2 = V4 (0x1.715476p+0f), .exponent_bias = V4 (0x3f800000), \
}
-#define ExponentBias v_u32 (0x3f800000) /* asuint(1.0f). */
-#define C(i) d->poly[i]
-
static inline float32x4_t
v_expf_inline (float32x4_t x, const struct v_expf_data *d)
{
- /* Helper routine for calculating exp(x).
+ /* Helper routine for calculating exp(ax).
Copied from v_expf.c, with all special-case handling removed - the
calling routine should handle special values if required. */
- /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
- x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
- float32x4_t n, r, z;
- float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2);
- z = vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0);
- n = vsubq_f32 (z, d->shift);
- r = vfmsq_laneq_f32 (x, n, invln2_and_ln2, 1);
- r = vfmsq_laneq_f32 (r, n, invln2_and_ln2, 2);
- uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
- float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
+ /* exp(ax) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+ ax = ln2*n + r, with r in [-ln2/2, ln2/2]. */
+ float32x4_t ax = vabsq_f32 (x);
+ float32x4_t ln2_c02 = vld1q_f32 (&d->ln2_hi);
+ float32x4_t n = vrndaq_f32 (vmulq_f32 (ax, d->inv_ln2));
+ float32x4_t r = vfmsq_laneq_f32 (ax, n, ln2_c02, 0);
+ r = vfmsq_laneq_f32 (r, n, ln2_c02, 1);
+ uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23);
+ float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
/* Custom order-4 Estrin avoids building high order monomial. */
float32x4_t r2 = vmulq_f32 (r, r);
- float32x4_t p, q, poly;
- p = vfmaq_f32 (C (1), C (0), r);
- q = vfmaq_f32 (C (3), C (2), r);
+ float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c02, 2);
+ float32x4_t q = vfmaq_laneq_f32 (d->c3, r, ln2_c02, 3);
q = vfmaq_f32 (q, p, r2);
- p = vmulq_f32 (C (4), r);
- poly = vfmaq_f32 (p, q, r2);
+ p = vmulq_f32 (d->c4, r);
+ float32x4_t poly = vfmaq_f32 (p, q, r2);
return vfmaq_f32 (scale, poly, scale);
}
-
#endif

362
glibc-RHEL-118273-23.patch Normal file
View File

@ -0,0 +1,362 @@
commit 91c1fadba338752bf514cd4cca057b27b1b10eed
Author: Yat Long Poon <yatlong.poon@arm.com>
Date: Fri Jan 3 19:09:05 2025 +0000
AArch64: Improve codegen for SVE log1pf users
Reduce memory access by using lanewise MLA and reduce number of MOVPRFXs.
Move log1pf implementation to inline helper function.
Speedup on Neoverse V1 for log1pf (10%), acoshf (-1%), atanhf (2%), asinhf (2%).
Conflicts:
sysdeps/aarch64/fpu/log1pf_sve.c
(Fixup context to apply without out-of-scope dependency 751a5502)
diff --git a/sysdeps/aarch64/fpu/acoshf_sve.c b/sysdeps/aarch64/fpu/acoshf_sve.c
index 2110894e629500be..491365e24d692f0f 100644
--- a/sysdeps/aarch64/fpu/acoshf_sve.c
+++ b/sysdeps/aarch64/fpu/acoshf_sve.c
@@ -17,23 +17,26 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
+#include "sv_math.h"
+#include "sv_log1pf_inline.h"
+
#define One 0x3f800000
#define Thres 0x20000000 /* asuint(0x1p64) - One. */
-#include "sv_log1pf_inline.h"
-
static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svfloat32_t xm1, svfloat32_t tmp, svbool_t special)
{
+ svfloat32_t x = svadd_x (svptrue_b32 (), xm1, 1.0f);
+ svfloat32_t y = sv_log1pf_inline (tmp, svptrue_b32 ());
return sv_call_f32 (acoshf, x, y, special);
}
/* Single-precision SVE acosh(x) routine. Implements the same algorithm as
vector acoshf and log1p.
- Maximum error is 2.78 ULPs:
- SV_NAME_F1 (acosh) (0x1.01e996p+0) got 0x1.f45b42p-4
- want 0x1.f45b3cp-4. */
+ Maximum error is 2.47 ULPs:
+ SV_NAME_F1 (acosh) (0x1.01ca76p+0) got 0x1.e435a6p-4
+ want 0x1.e435a2p-4. */
svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
{
svuint32_t ix = svreinterpret_u32 (x);
@@ -41,9 +44,9 @@ svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
svfloat32_t xm1 = svsub_x (pg, x, 1.0f);
svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f));
- svfloat32_t y = sv_log1pf_inline (svadd_x (pg, xm1, svsqrt_x (pg, u)), pg);
+ svfloat32_t tmp = svadd_x (pg, xm1, svsqrt_x (pg, u));
if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, y, special);
- return y;
+ return special_case (xm1, tmp, special);
+ return sv_log1pf_inline (tmp, pg);
}
diff --git a/sysdeps/aarch64/fpu/asinhf_sve.c b/sysdeps/aarch64/fpu/asinhf_sve.c
index d85c3a685c0b83ff..b7f253bf32fb9478 100644
--- a/sysdeps/aarch64/fpu/asinhf_sve.c
+++ b/sysdeps/aarch64/fpu/asinhf_sve.c
@@ -20,20 +20,23 @@
#include "sv_math.h"
#include "sv_log1pf_inline.h"
-#define BigBound (0x5f800000) /* asuint(0x1p64). */
+#define BigBound 0x5f800000 /* asuint(0x1p64). */
static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svuint32_t iax, svuint32_t sign, svfloat32_t y, svbool_t special)
{
+ svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign));
+ y = svreinterpret_f32 (
+ svorr_x (svptrue_b32 (), sign, svreinterpret_u32 (y)));
return sv_call_f32 (asinhf, x, y, special);
}
/* Single-precision SVE asinh(x) routine. Implements the same algorithm as
vector asinhf and log1p.
- Maximum error is 2.48 ULPs:
- SV_NAME_F1 (asinh) (0x1.008864p-3) got 0x1.ffbbbcp-4
- want 0x1.ffbbb8p-4. */
+ Maximum error is 1.92 ULPs:
+ SV_NAME_F1 (asinh) (-0x1.0922ecp-1) got -0x1.fd0bccp-2
+ want -0x1.fd0bc8p-2. */
svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
{
svfloat32_t ax = svabs_x (pg, x);
@@ -49,8 +52,6 @@ svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
= sv_log1pf_inline (svadd_x (pg, ax, svdiv_x (pg, ax2, d)), pg);
if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (
- x, svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y))),
- special);
+ return special_case (iax, sign, y, special);
return svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y)));
}
diff --git a/sysdeps/aarch64/fpu/atanhf_sve.c b/sysdeps/aarch64/fpu/atanhf_sve.c
index dae83041ef7157f0..2d3005bbc88393ec 100644
--- a/sysdeps/aarch64/fpu/atanhf_sve.c
+++ b/sysdeps/aarch64/fpu/atanhf_sve.c
@@ -17,21 +17,25 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
+#include "sv_math.h"
#include "sv_log1pf_inline.h"
#define One (0x3f800000)
#define Half (0x3f000000)
static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svuint32_t iax, svuint32_t sign, svfloat32_t halfsign,
+ svfloat32_t y, svbool_t special)
{
+ svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign));
+ y = svmul_x (svptrue_b32 (), halfsign, y);
return sv_call_f32 (atanhf, x, y, special);
}
/* Approximation for vector single-precision atanh(x) using modified log1p.
- The maximum error is 2.28 ULP:
- _ZGVsMxv_atanhf(0x1.ff1194p-5) got 0x1.ffbbbcp-5
- want 0x1.ffbbb6p-5. */
+ The maximum error is 1.99 ULP:
+ _ZGVsMxv_atanhf(0x1.f1583p-5) got 0x1.f1f4fap-5
+ want 0x1.f1f4f6p-5. */
svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
{
svfloat32_t ax = svabs_x (pg, x);
@@ -48,7 +52,7 @@ svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
y = sv_log1pf_inline (y, pg);
if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, svmul_x (pg, halfsign, y), special);
+ return special_case (iax, sign, halfsign, y, special);
return svmul_x (pg, halfsign, y);
}
diff --git a/sysdeps/aarch64/fpu/log1pf_sve.c b/sysdeps/aarch64/fpu/log1pf_sve.c
index f645cc997e430bcb..4f17c44e2d96039a 100644
--- a/sysdeps/aarch64/fpu/log1pf_sve.c
+++ b/sysdeps/aarch64/fpu/log1pf_sve.c
@@ -18,30 +18,13 @@
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
-#include "poly_sve_f32.h"
-
-static const struct data
-{
- float poly[8];
- float ln2, exp_bias;
- uint32_t four, three_quarters;
-} data = {.poly = {/* Do not store first term of polynomial, which is -0.5, as
- this can be fmov-ed directly instead of including it in
- the main load-and-mla polynomial schedule. */
- 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
- -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f,
- 0x1.abcb6p-4f, -0x1.6f0d5ep-5f},
- .ln2 = 0x1.62e43p-1f,
- .exp_bias = 0x1p-23f,
- .four = 0x40800000,
- .three_quarters = 0x3f400000};
-
-#define SignExponentMask 0xff800000
+#include "sv_log1pf_inline.h"
static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svfloat32_t x, svbool_t special)
{
- return sv_call_f32 (log1pf, x, y, special);
+ return sv_call_f32 (log1pf, x, sv_log1pf_inline (x, svptrue_b32 ()),
+ special);
}
/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
@@ -50,51 +33,12 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
want 0x1.9f323ep-2. */
svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
{
- const struct data *d = ptr_barrier (&data);
/* x < -1, Inf/Nan. */
svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000);
special = svorn_z (pg, special, svcmpge (pg, x, -1));
- /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
- is in [-0.25, 0.5]):
- log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
-
- We approximate log1p(m) with a polynomial, then scale by
- k*log(2). Instead of doing this directly, we use an intermediate
- scale factor s = 4*k*log(2) to ensure the scale is representable
- as a normalised fp32 number. */
- svfloat32_t m = svadd_x (pg, x, 1);
-
- /* Choose k to scale x to the range [-1/4, 1/2]. */
- svint32_t k
- = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters),
- sv_s32 (SignExponentMask));
-
- /* Scale x by exponent manipulation. */
- svfloat32_t m_scale = svreinterpret_f32 (
- svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k)));
-
- /* Scale up to ensure that the scale factor is representable as normalised
- fp32 number, and scale m down accordingly. */
- svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four));
- m_scale = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1), s, 0.25));
-
- /* Evaluate polynomial on reduced interval. */
- svfloat32_t ms2 = svmul_x (pg, m_scale, m_scale),
- ms4 = svmul_x (pg, ms2, ms2);
- svfloat32_t p = sv_estrin_7_f32_x (pg, m_scale, ms2, ms4, d->poly);
- p = svmad_x (pg, m_scale, p, -0.5);
- p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p));
-
- /* The scale factor to be applied back at the end - by multiplying float(k)
- by 2^-23 we get the unbiased exponent of k. */
- svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->exp_bias);
-
- /* Apply the scaling back. */
- svfloat32_t y = svmla_x (pg, p, scale_back, d->ln2);
-
if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, y, special);
+ return special_case (x, special);
- return y;
+ return sv_log1pf_inline (x, pg);
}
diff --git a/sysdeps/aarch64/fpu/sv_log1pf_inline.h b/sysdeps/aarch64/fpu/sv_log1pf_inline.h
index b94b2da055a6c59b..850297d61556740c 100644
--- a/sysdeps/aarch64/fpu/sv_log1pf_inline.h
+++ b/sysdeps/aarch64/fpu/sv_log1pf_inline.h
@@ -22,55 +22,76 @@
#include "sv_math.h"
#include "vecmath_config.h"
-#include "poly_sve_f32.h"
+
+#define SignExponentMask 0xff800000
static const struct sv_log1pf_data
{
- float32_t poly[9];
- float32_t ln2;
- float32_t scale_back;
+ float c0, c2, c4, c6;
+ float c1, c3, c5, c7;
+ float ln2, exp_bias, quarter;
+ uint32_t four, three_quarters;
} sv_log1pf_data = {
- /* Polynomial generated using FPMinimax in [-0.25, 0.5]. */
- .poly = { -0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
- -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
- -0x1.6f0d5ep-5f },
- .scale_back = 0x1.0p-23f,
- .ln2 = 0x1.62e43p-1f,
+ /* Do not store first term of polynomial, which is -0.5, as
+ this can be fmov-ed directly instead of including it in
+ the main load-and-mla polynomial schedule. */
+ .c0 = 0x1.5555aap-2f, .c1 = -0x1.000038p-2f, .c2 = 0x1.99675cp-3f,
+ .c3 = -0x1.54ef78p-3f, .c4 = 0x1.28a1f4p-3f, .c5 = -0x1.0da91p-3f,
+ .c6 = 0x1.abcb6p-4f, .c7 = -0x1.6f0d5ep-5f, .ln2 = 0x1.62e43p-1f,
+ .exp_bias = 0x1p-23f, .quarter = 0x1p-2f, .four = 0x40800000,
+ .three_quarters = 0x3f400000,
};
-static inline svfloat32_t
-eval_poly (svfloat32_t m, const float32_t *c, svbool_t pg)
-{
- svfloat32_t p_12 = svmla_x (pg, sv_f32 (c[0]), m, sv_f32 (c[1]));
- svfloat32_t m2 = svmul_x (pg, m, m);
- svfloat32_t q = svmla_x (pg, m, m2, p_12);
- svfloat32_t p = sv_pw_horner_6_f32_x (pg, m, m2, c + 2);
- p = svmul_x (pg, m2, p);
-
- return svmla_x (pg, q, m2, p);
-}
-
static inline svfloat32_t
sv_log1pf_inline (svfloat32_t x, svbool_t pg)
{
const struct sv_log1pf_data *d = ptr_barrier (&sv_log1pf_data);
- svfloat32_t m = svadd_x (pg, x, 1.0f);
-
- svint32_t ks = svsub_x (pg, svreinterpret_s32 (m),
- svreinterpret_s32 (svdup_f32 (0.75f)));
- ks = svand_x (pg, ks, 0xff800000);
- svuint32_t k = svreinterpret_u32 (ks);
- svfloat32_t s = svreinterpret_f32 (
- svsub_x (pg, svreinterpret_u32 (svdup_f32 (4.0f)), k));
-
- svfloat32_t m_scale
- = svreinterpret_f32 (svsub_x (pg, svreinterpret_u32 (x), k));
- m_scale
- = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1.0f), sv_f32 (0.25f), s));
- svfloat32_t p = eval_poly (m_scale, d->poly, pg);
- svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->scale_back);
- return svmla_x (pg, p, scale_back, d->ln2);
+ /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+ is in [-0.25, 0.5]):
+ log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
+
+ We approximate log1p(m) with a polynomial, then scale by
+ k*log(2). Instead of doing this directly, we use an intermediate
+ scale factor s = 4*k*log(2) to ensure the scale is representable
+ as a normalised fp32 number. */
+ svfloat32_t m = svadd_x (pg, x, 1);
+
+ /* Choose k to scale x to the range [-1/4, 1/2]. */
+ svint32_t k
+ = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters),
+ sv_s32 (SignExponentMask));
+
+ /* Scale x by exponent manipulation. */
+ svfloat32_t m_scale = svreinterpret_f32 (
+ svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k)));
+
+ /* Scale up to ensure that the scale factor is representable as normalised
+ fp32 number, and scale m down accordingly. */
+ svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four));
+ svfloat32_t fconst = svld1rq_f32 (svptrue_b32 (), &d->ln2);
+ m_scale = svadd_x (pg, m_scale, svmla_lane_f32 (sv_f32 (-1), s, fconst, 2));
+
+ /* Evaluate polynomial on reduced interval. */
+ svfloat32_t ms2 = svmul_x (svptrue_b32 (), m_scale, m_scale);
+
+ svfloat32_t c1357 = svld1rq_f32 (svptrue_b32 (), &d->c1);
+ svfloat32_t p01 = svmla_lane_f32 (sv_f32 (d->c0), m_scale, c1357, 0);
+ svfloat32_t p23 = svmla_lane_f32 (sv_f32 (d->c2), m_scale, c1357, 1);
+ svfloat32_t p45 = svmla_lane_f32 (sv_f32 (d->c4), m_scale, c1357, 2);
+ svfloat32_t p67 = svmla_lane_f32 (sv_f32 (d->c6), m_scale, c1357, 3);
+
+ svfloat32_t p = svmla_x (pg, p45, p67, ms2);
+ p = svmla_x (pg, p23, p, ms2);
+ p = svmla_x (pg, p01, p, ms2);
+
+ p = svmad_x (pg, m_scale, p, -0.5);
+ p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p));
+
+ /* The scale factor to be applied back at the end - by multiplying float(k)
+ by 2^-23 we get the unbiased exponent of k. */
+ svfloat32_t scale_back = svmul_lane_f32 (svcvt_f32_x (pg, k), fconst, 1);
+ return svmla_lane_f32 (p, scale_back, fconst, 0);
}
#endif

258
glibc-RHEL-118273-24.patch Normal file
View File

@ -0,0 +1,258 @@
commit 140b985e5a2071000122b3cb63ebfe88cf21dd29
Author: Luna Lamb <luna.lamb@arm.com>
Date: Fri Jan 3 19:00:12 2025 +0000
AArch64: Improve codegen in AdvSIMD asinh
Improves memory access and removes spills.
Load the polynomial evaluation coefficients into 2 vectors and use lanewise
MLAs. Reduces MOVs 6->3 , LDR 11->5, STR/STP 2->0, ADRP 3->2.
diff --git a/sysdeps/aarch64/fpu/asinh_advsimd.c b/sysdeps/aarch64/fpu/asinh_advsimd.c
index 6207e7da9531f48d..2739f98b390edca7 100644
--- a/sysdeps/aarch64/fpu/asinh_advsimd.c
+++ b/sysdeps/aarch64/fpu/asinh_advsimd.c
@@ -20,41 +20,71 @@
#include "v_math.h"
#include "poly_advsimd_f64.h"
-#define A(i) v_f64 (__v_log_data.poly[i])
-#define N (1 << V_LOG_TABLE_BITS)
-#define IndexMask (N - 1)
-
const static struct data
{
- float64x2_t poly[18];
- uint64x2_t off, huge_bound, abs_mask;
- float64x2_t ln2, tiny_bound;
+ uint64x2_t huge_bound, abs_mask, off, mask;
+#if WANT_SIMD_EXCEPT
+ float64x2_t tiny_bound;
+#endif
+ float64x2_t lc0, lc2;
+ double lc1, lc3, ln2, lc4;
+
+ float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c17;
+ double c1, c3, c5, c7, c9, c11, c13, c15;
+
} data = {
- .off = V2 (0x3fe6900900000000),
- .ln2 = V2 (0x1.62e42fefa39efp-1),
- .huge_bound = V2 (0x5fe0000000000000),
+
+#if WANT_SIMD_EXCEPT
.tiny_bound = V2 (0x1p-26),
- .abs_mask = V2 (0x7fffffffffffffff),
+#endif
/* Even terms of polynomial s.t. asinh(x) is approximated by
asinh(x) ~= x + x^3 * (C0 + C1 * x + C2 * x^2 + C3 * x^3 + ...).
Generated using Remez, f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2). */
- .poly = { V2 (-0x1.55555555554a7p-3), V2 (0x1.3333333326c7p-4),
- V2 (-0x1.6db6db68332e6p-5), V2 (0x1.f1c71b26fb40dp-6),
- V2 (-0x1.6e8b8b654a621p-6), V2 (0x1.1c4daa9e67871p-6),
- V2 (-0x1.c9871d10885afp-7), V2 (0x1.7a16e8d9d2ecfp-7),
- V2 (-0x1.3ddca533e9f54p-7), V2 (0x1.0becef748dafcp-7),
- V2 (-0x1.b90c7099dd397p-8), V2 (0x1.541f2bb1ffe51p-8),
- V2 (-0x1.d217026a669ecp-9), V2 (0x1.0b5c7977aaf7p-9),
- V2 (-0x1.e0f37daef9127p-11), V2 (0x1.388b5fe542a6p-12),
- V2 (-0x1.021a48685e287p-14), V2 (0x1.93d4ba83d34dap-18) },
+
+ .c0 = V2 (-0x1.55555555554a7p-3),
+ .c1 = 0x1.3333333326c7p-4,
+ .c2 = V2 (-0x1.6db6db68332e6p-5),
+ .c3 = 0x1.f1c71b26fb40dp-6,
+ .c4 = V2 (-0x1.6e8b8b654a621p-6),
+ .c5 = 0x1.1c4daa9e67871p-6,
+ .c6 = V2 (-0x1.c9871d10885afp-7),
+ .c7 = 0x1.7a16e8d9d2ecfp-7,
+ .c8 = V2 (-0x1.3ddca533e9f54p-7),
+ .c9 = 0x1.0becef748dafcp-7,
+ .c10 = V2 (-0x1.b90c7099dd397p-8),
+ .c11 = 0x1.541f2bb1ffe51p-8,
+ .c12 = V2 (-0x1.d217026a669ecp-9),
+ .c13 = 0x1.0b5c7977aaf7p-9,
+ .c14 = V2 (-0x1.e0f37daef9127p-11),
+ .c15 = 0x1.388b5fe542a6p-12,
+ .c16 = V2 (-0x1.021a48685e287p-14),
+ .c17 = V2 (0x1.93d4ba83d34dap-18),
+
+ .lc0 = V2 (-0x1.ffffffffffff7p-2),
+ .lc1 = 0x1.55555555170d4p-2,
+ .lc2 = V2 (-0x1.0000000399c27p-2),
+ .lc3 = 0x1.999b2e90e94cap-3,
+ .lc4 = -0x1.554e550bd501ep-3,
+ .ln2 = 0x1.62e42fefa39efp-1,
+
+ .off = V2 (0x3fe6900900000000),
+ .huge_bound = V2 (0x5fe0000000000000),
+ .abs_mask = V2 (0x7fffffffffffffff),
+ .mask = V2 (0xfffULL << 52),
};
static float64x2_t NOINLINE VPCS_ATTR
-special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+special_case (float64x2_t x, float64x2_t y, uint64x2_t abs_mask,
+ uint64x2_t special)
{
+ /* Copy sign. */
+ y = vbslq_f64 (abs_mask, y, x);
return v_call_f64 (asinh, x, y, special);
}
+#define N (1 << V_LOG_TABLE_BITS)
+#define IndexMask (N - 1)
+
struct entry
{
float64x2_t invc;
@@ -76,27 +106,34 @@ lookup (uint64x2_t i)
}
static inline float64x2_t
-log_inline (float64x2_t x, const struct data *d)
+log_inline (float64x2_t xm, const struct data *d)
{
- /* Double-precision vector log, copied from ordinary vector log with some
- cosmetic modification and special-cases removed. */
- uint64x2_t ix = vreinterpretq_u64_f64 (x);
- uint64x2_t tmp = vsubq_u64 (ix, d->off);
- int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
- uint64x2_t iz
- = vsubq_u64 (ix, vandq_u64 (tmp, vdupq_n_u64 (0xfffULL << 52)));
+
+ uint64x2_t u = vreinterpretq_u64_f64 (xm);
+ uint64x2_t u_off = vsubq_u64 (u, d->off);
+
+ int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (u_off), 52);
+ uint64x2_t iz = vsubq_u64 (u, vandq_u64 (u_off, d->mask));
float64x2_t z = vreinterpretq_f64_u64 (iz);
- struct entry e = lookup (tmp);
+
+ struct entry e = lookup (u_off);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
float64x2_t kd = vcvtq_f64_s64 (k);
- float64x2_t hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2);
+
+ /* hi = r + log(c) + k*Ln2. */
+ float64x2_t ln2_and_lc4 = vld1q_f64 (&d->ln2);
+ float64x2_t hi = vfmaq_laneq_f64 (vaddq_f64 (e.logc, r), kd, ln2_and_lc4, 0);
+
+ /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */
+ float64x2_t odd_coeffs = vld1q_f64 (&d->lc1);
float64x2_t r2 = vmulq_f64 (r, r);
- float64x2_t y = vfmaq_f64 (A (2), A (3), r);
- float64x2_t p = vfmaq_f64 (A (0), A (1), r);
- y = vfmaq_f64 (y, A (4), r2);
- y = vfmaq_f64 (p, y, r2);
- y = vfmaq_f64 (hi, y, r2);
- return y;
+ float64x2_t y = vfmaq_laneq_f64 (d->lc2, r, odd_coeffs, 1);
+ float64x2_t p = vfmaq_laneq_f64 (d->lc0, r, odd_coeffs, 0);
+ y = vfmaq_laneq_f64 (y, r2, ln2_and_lc4, 1);
+ y = vfmaq_f64 (p, r2, y);
+ return vfmaq_f64 (hi, y, r2);
}
/* Double-precision implementation of vector asinh(x).
@@ -106,23 +143,24 @@ log_inline (float64x2_t x, const struct data *d)
asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1) if |x| >= 1
= sign(x) * (|x| + |x|^3 * P(x^2)) otherwise
where log(x) is an optimized log approximation, and P(x) is a polynomial
- shared with the scalar routine. The greatest observed error 3.29 ULP, in
+ shared with the scalar routine. The greatest observed error 2.79 ULP, in
|x| >= 1:
- __v_asinh(0x1.2cd9d717e2c9bp+0) got 0x1.ffffcfd0e234fp-1
- want 0x1.ffffcfd0e2352p-1. */
+ _ZGVnN2v_asinh(0x1.2cd9d73ea76a6p+0) got 0x1.ffffd003219dap-1
+ want 0x1.ffffd003219ddp-1. */
VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x)
{
const struct data *d = ptr_barrier (&data);
-
float64x2_t ax = vabsq_f64 (x);
- uint64x2_t iax = vreinterpretq_u64_f64 (ax);
uint64x2_t gt1 = vcgeq_f64 (ax, v_f64 (1));
- uint64x2_t special = vcgeq_u64 (iax, d->huge_bound);
#if WANT_SIMD_EXCEPT
+ uint64x2_t iax = vreinterpretq_u64_f64 (ax);
+ uint64x2_t special = vcgeq_u64 (iax, (d->huge_bound));
uint64x2_t tiny = vcltq_f64 (ax, d->tiny_bound);
special = vorrq_u64 (special, tiny);
+#else
+ uint64x2_t special = vcgeq_f64 (ax, vreinterpretq_f64_u64 (d->huge_bound));
#endif
/* Option 1: |x| >= 1.
@@ -147,19 +185,45 @@ VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x)
overflow, and tiny lanes, which will underflow, by setting them to 0. They
will be fixed later, either by selecting x or falling back to the scalar
special-case. The largest observed error in this region is 1.47 ULPs:
- __v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
- want 0x1.c1d6bf874019cp-1. */
+ _ZGVnN2v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
+ want 0x1.c1d6bf874019cp-1. */
float64x2_t option_2 = v_f64 (0);
+
if (__glibc_likely (v_any_u64 (vceqzq_u64 (gt1))))
{
+
#if WANT_SIMD_EXCEPT
ax = v_zerofy_f64 (ax, vorrq_u64 (tiny, gt1));
#endif
- float64x2_t x2 = vmulq_f64 (ax, ax), x3 = vmulq_f64 (ax, x2),
- z2 = vmulq_f64 (x2, x2), z4 = vmulq_f64 (z2, z2),
- z8 = vmulq_f64 (z4, z4), z16 = vmulq_f64 (z8, z8);
- float64x2_t p = v_estrin_17_f64 (x2, z2, z4, z8, z16, d->poly);
- option_2 = vfmaq_f64 (ax, p, x3);
+ float64x2_t x2 = vmulq_f64 (ax, ax), z2 = vmulq_f64 (x2, x2);
+ /* Order-17 Pairwise Horner scheme. */
+ float64x2_t c13 = vld1q_f64 (&d->c1);
+ float64x2_t c57 = vld1q_f64 (&d->c5);
+ float64x2_t c911 = vld1q_f64 (&d->c9);
+ float64x2_t c1315 = vld1q_f64 (&d->c13);
+
+ float64x2_t p01 = vfmaq_laneq_f64 (d->c0, x2, c13, 0);
+ float64x2_t p23 = vfmaq_laneq_f64 (d->c2, x2, c13, 1);
+ float64x2_t p45 = vfmaq_laneq_f64 (d->c4, x2, c57, 0);
+ float64x2_t p67 = vfmaq_laneq_f64 (d->c6, x2, c57, 1);
+ float64x2_t p89 = vfmaq_laneq_f64 (d->c8, x2, c911, 0);
+ float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, x2, c911, 1);
+ float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, x2, c1315, 0);
+ float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, x2, c1315, 1);
+ float64x2_t p1617 = vfmaq_f64 (d->c16, x2, d->c17);
+
+ float64x2_t p = vfmaq_f64 (p1415, z2, p1617);
+ p = vfmaq_f64 (p1213, z2, p);
+ p = vfmaq_f64 (p1011, z2, p);
+ p = vfmaq_f64 (p89, z2, p);
+
+ p = vfmaq_f64 (p67, z2, p);
+ p = vfmaq_f64 (p45, z2, p);
+
+ p = vfmaq_f64 (p23, z2, p);
+
+ p = vfmaq_f64 (p01, z2, p);
+ option_2 = vfmaq_f64 (ax, p, vmulq_f64 (ax, x2));
#if WANT_SIMD_EXCEPT
option_2 = vbslq_f64 (tiny, x, option_2);
#endif
@@ -167,10 +231,10 @@ VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x)
/* Choose the right option for each lane. */
float64x2_t y = vbslq_f64 (gt1, option_1, option_2);
- /* Copy sign. */
- y = vbslq_f64 (d->abs_mask, y, x);
-
if (__glibc_unlikely (v_any_u64 (special)))
- return special_case (x, y, special);
- return y;
+ {
+ return special_case (x, y, d->abs_mask, special);
+ }
+ /* Copy sign. */
+ return vbslq_f64 (d->abs_mask, y, x);
}

221
glibc-RHEL-118273-25.patch Normal file
View File

@ -0,0 +1,221 @@
commit f86b4cf87581cf1e45702b07880679ffa0b1f47a
Author: Luna Lamb <luna.lamb@arm.com>
Date: Fri Jan 3 20:15:17 2025 +0000
AArch64: Improve codegen in SVE expm1f and users
Use unpredicated muls, use absolute compare and improve memory access.
Expm1f, sinhf and tanhf show 7%, 5% and 1% improvement in throughput
microbenchmark on Neoverse V1.
diff --git a/sysdeps/aarch64/fpu/expm1f_sve.c b/sysdeps/aarch64/fpu/expm1f_sve.c
index 7c852125cdbd0a2b..05a66400d477b819 100644
--- a/sysdeps/aarch64/fpu/expm1f_sve.c
+++ b/sysdeps/aarch64/fpu/expm1f_sve.c
@@ -18,7 +18,6 @@
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
-#include "poly_sve_f32.h"
/* Largest value of x for which expm1(x) should round to -1. */
#define SpecialBound 0x1.5ebc4p+6f
@@ -28,20 +27,17 @@ static const struct data
/* These 4 are grouped together so they can be loaded as one quadword, then
used with _lane forms of svmla/svmls. */
float c2, c4, ln2_hi, ln2_lo;
- float c0, c1, c3, inv_ln2, special_bound, shift;
+ float c0, inv_ln2, c1, c3, special_bound;
} data = {
/* Generated using fpminimax. */
.c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3,
.c2 = 0x1.555736p-5, .c3 = 0x1.12287cp-7,
- .c4 = 0x1.6b55a2p-10,
+ .c4 = 0x1.6b55a2p-10, .inv_ln2 = 0x1.715476p+0f,
+ .special_bound = SpecialBound, .ln2_lo = 0x1.7f7d1cp-20f,
+ .ln2_hi = 0x1.62e4p-1f,
- .special_bound = SpecialBound, .shift = 0x1.8p23f,
- .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f,
- .ln2_lo = 0x1.7f7d1cp-20f,
};
-#define C(i) sv_f32 (d->c##i)
-
static svfloat32_t NOINLINE
special_case (svfloat32_t x, svbool_t pg)
{
@@ -71,9 +67,8 @@ svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, svbool_t pg)
and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
where 2^i is exact because i is an integer. */
- svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2);
- j = svsub_x (pg, j, d->shift);
- svint32_t i = svcvt_s32_x (pg, j);
+ svfloat32_t j = svmul_x (svptrue_b32 (), x, d->inv_ln2);
+ j = svrinta_x (pg, j);
svfloat32_t f = svmls_lane (x, j, lane_constants, 2);
f = svmls_lane (f, j, lane_constants, 3);
@@ -83,17 +78,17 @@ svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, svbool_t pg)
x + ax^2 + bx^3 + cx^4 ....
So we calculate the polynomial P(f) = a + bf + cf^2 + ...
and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
- svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0);
- svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1);
- svfloat32_t f2 = svmul_x (pg, f, f);
+ svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), f, lane_constants, 0);
+ svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), f, lane_constants, 1);
+ svfloat32_t f2 = svmul_x (svptrue_b32 (), f, f);
svfloat32_t p = svmla_x (pg, p12, f2, p34);
- p = svmla_x (pg, C (0), f, p);
+
+ p = svmla_x (pg, sv_f32 (d->c0), f, p);
p = svmla_x (pg, f, f2, p);
/* Assemble the result.
expm1(x) ~= 2^i * (p + 1) - 1
Let t = 2^i. */
- svfloat32_t t = svreinterpret_f32 (
- svadd_x (pg, svreinterpret_u32 (svlsl_x (pg, i, 23)), 0x3f800000));
- return svmla_x (pg, svsub_x (pg, t, 1), p, t);
+ svfloat32_t t = svscale_x (pg, sv_f32 (1.0f), svcvt_s32_x (pg, j));
+ return svmla_x (pg, svsub_x (pg, t, 1.0f), p, t);
}
diff --git a/sysdeps/aarch64/fpu/sinhf_sve.c b/sysdeps/aarch64/fpu/sinhf_sve.c
index 6c204b57a2aa18d3..50dd386774b005ca 100644
--- a/sysdeps/aarch64/fpu/sinhf_sve.c
+++ b/sysdeps/aarch64/fpu/sinhf_sve.c
@@ -63,5 +63,5 @@ svfloat32_t SV_NAME_F1 (sinh) (svfloat32_t x, const svbool_t pg)
if (__glibc_unlikely (svptest_any (pg, special)))
return special_case (x, svmul_x (pg, t, halfsign), special);
- return svmul_x (pg, t, halfsign);
+ return svmul_x (svptrue_b32 (), t, halfsign);
}
diff --git a/sysdeps/aarch64/fpu/sv_expm1f_inline.h b/sysdeps/aarch64/fpu/sv_expm1f_inline.h
index 5b7245122294e1b4..e46ddda5437dc826 100644
--- a/sysdeps/aarch64/fpu/sv_expm1f_inline.h
+++ b/sysdeps/aarch64/fpu/sv_expm1f_inline.h
@@ -27,21 +27,18 @@ struct sv_expm1f_data
/* These 4 are grouped together so they can be loaded as one quadword, then
used with _lane forms of svmla/svmls. */
float32_t c2, c4, ln2_hi, ln2_lo;
- float32_t c0, c1, c3, inv_ln2, shift;
+ float c0, inv_ln2, c1, c3, special_bound;
};
/* Coefficients generated using fpminimax. */
#define SV_EXPM1F_DATA \
{ \
- .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .c2 = 0x1.555736p-5, \
- .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10, \
+ .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .inv_ln2 = 0x1.715476p+0f, \
+ .c2 = 0x1.555736p-5, .c3 = 0x1.12287cp-7, \
\
- .shift = 0x1.8p23f, .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \
- .ln2_lo = 0x1.7f7d1cp-20f, \
+ .c4 = 0x1.6b55a2p-10, .ln2_lo = 0x1.7f7d1cp-20f, .ln2_hi = 0x1.62e4p-1f, \
}
-#define C(i) sv_f32 (d->c##i)
-
static inline svfloat32_t
expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d)
{
@@ -55,9 +52,8 @@ expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d)
and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
where 2^i is exact because i is an integer. */
- svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2);
- j = svsub_x (pg, j, d->shift);
- svint32_t i = svcvt_s32_x (pg, j);
+ svfloat32_t j = svmul_x (svptrue_b32 (), x, d->inv_ln2);
+ j = svrinta_x (pg, j);
svfloat32_t f = svmls_lane (x, j, lane_constants, 2);
f = svmls_lane (f, j, lane_constants, 3);
@@ -67,18 +63,18 @@ expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d)
x + ax^2 + bx^3 + cx^4 ....
So we calculate the polynomial P(f) = a + bf + cf^2 + ...
and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
- svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0);
- svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1);
- svfloat32_t f2 = svmul_x (pg, f, f);
+ svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), f, lane_constants, 0);
+ svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), f, lane_constants, 1);
+ svfloat32_t f2 = svmul_x (svptrue_b32 (), f, f);
svfloat32_t p = svmla_x (pg, p12, f2, p34);
- p = svmla_x (pg, C (0), f, p);
+ p = svmla_x (pg, sv_f32 (d->c0), f, p);
p = svmla_x (pg, f, f2, p);
/* Assemble the result.
expm1(x) ~= 2^i * (p + 1) - 1
Let t = 2^i. */
- svfloat32_t t = svscale_x (pg, sv_f32 (1), i);
- return svmla_x (pg, svsub_x (pg, t, 1), p, t);
+ svfloat32_t t = svscale_x (pg, sv_f32 (1.0f), svcvt_s32_x (pg, j));
+ return svmla_x (pg, svsub_x (pg, t, 1.0f), p, t);
}
#endif
diff --git a/sysdeps/aarch64/fpu/tanhf_sve.c b/sysdeps/aarch64/fpu/tanhf_sve.c
index 0b94523cf5074200..80dd679346f13f37 100644
--- a/sysdeps/aarch64/fpu/tanhf_sve.c
+++ b/sysdeps/aarch64/fpu/tanhf_sve.c
@@ -19,20 +19,27 @@
#include "sv_expm1f_inline.h"
+/* Largest value of x for which tanhf(x) rounds to 1 (or -1 for negative). */
+#define BoringBound 0x1.205966p+3f
+
static const struct data
{
struct sv_expm1f_data expm1f_consts;
- uint32_t boring_bound, onef;
+ uint32_t onef, special_bound;
+ float boring_bound;
} data = {
.expm1f_consts = SV_EXPM1F_DATA,
- /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative). */
- .boring_bound = 0x41102cb3,
.onef = 0x3f800000,
+ .special_bound = 0x7f800000,
+ .boring_bound = BoringBound,
};
static svfloat32_t NOINLINE
-special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+special_case (svfloat32_t x, svbool_t pg, svbool_t is_boring,
+ svfloat32_t boring, svfloat32_t q, svbool_t special)
{
+ svfloat32_t y
+ = svsel_f32 (is_boring, boring, svdiv_x (pg, q, svadd_x (pg, q, 2.0)));
return sv_call_f32 (tanhf, x, y, special);
}
@@ -47,15 +54,16 @@ svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg)
svfloat32_t ax = svabs_x (pg, x);
svuint32_t iax = svreinterpret_u32 (ax);
svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
- svbool_t is_boring = svcmpgt (pg, iax, d->boring_bound);
svfloat32_t boring = svreinterpret_f32 (svorr_x (pg, sign, d->onef));
-
- svbool_t special = svcmpgt (pg, iax, 0x7f800000);
+ svbool_t special = svcmpgt (pg, iax, d->special_bound);
+ svbool_t is_boring = svacgt (pg, x, d->boring_bound);
/* tanh(x) = (e^2x - 1) / (e^2x + 1). */
- svfloat32_t q = expm1f_inline (svmul_x (pg, x, 2.0), pg, &d->expm1f_consts);
- svfloat32_t y = svdiv_x (pg, q, svadd_x (pg, q, 2.0));
+ svfloat32_t q = expm1f_inline (svmul_x (svptrue_b32 (), x, 2.0), pg,
+ &d->expm1f_consts);
+
if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, svsel_f32 (is_boring, boring, y), special);
+ return special_case (x, pg, is_boring, boring, q, special);
+ svfloat32_t y = svdiv_x (pg, q, svadd_x (pg, q, 2.0));
return svsel_f32 (is_boring, boring, y);
}

125
glibc-RHEL-118273-26.patch Normal file
View File

@ -0,0 +1,125 @@
commit d3f2b71ef1d146137a25dd1367d97a14fac341c6
Author: Yury Khrustalev <yury.khrustalev@arm.com>
Date: Tue Nov 26 11:38:30 2024 +0000
aarch64: Fix tests not compatible with targets supporting GCS
- Add GCS marking to some of the tests when target supports GCS
- Fix tst-ro-dynamic-mod.map linker script to avoid removing
GNU properties
- Add header with macros for GNU properties
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/elf/tst-asm-helper.h b/elf/tst-asm-helper.h
new file mode 100644
index 0000000000000000..6f91ac2ddc54d3f9
--- /dev/null
+++ b/elf/tst-asm-helper.h
@@ -0,0 +1,49 @@
+/* Test header that defines macros for GNU properties that need to be
+ used in some test assembly files where sysdep.h cannot be included
+ for some reason.
+ Copyright (C) 2024-2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */
+#define FEATURE_1_AND 0xc0000000
+#define FEATURE_1_BTI 1
+#define FEATURE_1_PAC 2
+#define FEATURE_1_GCS 4
+
+/* Add a NT_GNU_PROPERTY_TYPE_0 note. */
+#define GNU_PROPERTY(type, value) \
+ .section .note.gnu.property, "a"; \
+ .p2align 3; \
+ .word 4; \
+ .word 16; \
+ .word 5; \
+ .asciz "GNU"; \
+ .word type; \
+ .word 4; \
+ .word value; \
+ .word 0; \
+ .text
+
+/* Add GNU property note with the supported features to all asm code
+ where sysdep.h is included. */
+#if HAVE_AARCH64_BTI && HAVE_AARCH64_PAC_RET
+GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC|FEATURE_1_GCS)
+#elif HAVE_AARCH64_BTI
+GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_GCS)
+#endif
diff --git a/elf/tst-big-note-lib.S b/elf/tst-big-note-lib.S
index 5eb1e03cfbe2cee8..cfd31137e85a1335 100644
--- a/elf/tst-big-note-lib.S
+++ b/elf/tst-big-note-lib.S
@@ -20,6 +20,8 @@
On a typical Linux system with 8MiB "ulimit -s", that was enough
to trigger stack overflow in open_verify. */
+#include "tst-asm-helper.h"
+
#define NOTE_SIZE 8*1024*1024
.pushsection .note.big,"a"
diff --git a/elf/tst-ro-dynamic-mod.map b/elf/tst-ro-dynamic-mod.map
index 2fe4a2998cddd587..2a158480c07d9691 100644
--- a/elf/tst-ro-dynamic-mod.map
+++ b/elf/tst-ro-dynamic-mod.map
@@ -3,14 +3,13 @@ SECTIONS
. = SIZEOF_HEADERS;
.dynamic : { *(.dynamic) } :text :dynamic
.rodata : { *(.data*) *(.bss*) } :text
- /DISCARD/ : {
- *(.note.gnu.property)
- }
- .note : { *(.note.*) } :text :note
+ .note : { *(.note) } :text :note
+ .note.gnu.property : { *(.note.gnu.property) } :text :gnu_property
}
PHDRS
{
text PT_LOAD FLAGS(5) FILEHDR PHDRS;
dynamic PT_DYNAMIC FLAGS(4);
note PT_NOTE FLAGS(4);
+ gnu_property PT_GNU_PROPERTY FLAGS(4);
}
diff --git a/sysdeps/aarch64/tst-vpcs-mod.S b/sysdeps/aarch64/tst-vpcs-mod.S
index 19b01c3c3859e13b..b3b5824eda1fb076 100644
--- a/sysdeps/aarch64/tst-vpcs-mod.S
+++ b/sysdeps/aarch64/tst-vpcs-mod.S
@@ -17,6 +17,8 @@
License along with the GNU C Library. If not, see
<https://www.gnu.org/licenses/>. */
+#include "tst-asm-helper.h"
+
.variant_pcs vpcs_call
.global vpcs_call
.type vpcs_call, %function
@@ -121,7 +123,7 @@ vpcs_call_regs:
/* Emulate a BL using B, but save x30 before the branch. */
adr x30, .L_return_addr
stp x30, x29, [x1, 240]
- b vpcs_call
+ bl vpcs_call
.L_return_addr:
/* Restore callee-saved registers. */

241
glibc-RHEL-118273-27.patch Normal file
View File

@ -0,0 +1,241 @@
commit 95e807209b680257a9afe81a507754f1565dbb4d
Author: Yat Long Poon <yatlong.poon@arm.com>
Date: Thu Feb 13 18:03:04 2025 +0000
AArch64: Improve codegen for SVE powf
Improve memory access with indexed/unpredicated instructions.
Eliminate register spills. Speedup on Neoverse V1: 3%.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/powf_sve.c b/sysdeps/aarch64/fpu/powf_sve.c
index 4f6a142325ae719b..08d7019a1855ff3c 100644
--- a/sysdeps/aarch64/fpu/powf_sve.c
+++ b/sysdeps/aarch64/fpu/powf_sve.c
@@ -26,7 +26,6 @@
#define Tlogc __v_powf_data.logc
#define Texp __v_powf_data.scale
#define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11))
-#define Shift 0x1.8p52
#define Norm 0x1p23f /* 0x4b000000. */
/* Overall ULP error bound for pow is 2.6 ulp
@@ -36,7 +35,7 @@ static const struct data
double log_poly[4];
double exp_poly[3];
float uflow_bound, oflow_bound, small_bound;
- uint32_t sign_bias, sign_mask, subnormal_bias, off;
+ uint32_t sign_bias, subnormal_bias, off;
} data = {
/* rel err: 1.5 * 2^-30. Each coefficients is multiplied the value of
V_POWF_EXP2_N. */
@@ -53,7 +52,6 @@ static const struct data
.small_bound = 0x1p-126f,
.off = 0x3f35d000,
.sign_bias = SignBias,
- .sign_mask = 0x80000000,
.subnormal_bias = 0x0b800000, /* 23 << 23. */
};
@@ -86,7 +84,7 @@ svisodd (svbool_t pg, svfloat32_t x)
static inline svbool_t
sv_zeroinfnan (svbool_t pg, svuint32_t i)
{
- return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2u), 1),
+ return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),
2u * 0x7f800000 - 1);
}
@@ -150,9 +148,14 @@ powf_specialcase (float x, float y, float z)
}
/* Scalar fallback for special case routines with custom signature. */
-static inline svfloat32_t
-sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
+static svfloat32_t NOINLINE
+sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y)
{
+ /* Special cases of x or y: zero, inf and nan. */
+ svbool_t xspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x1));
+ svbool_t yspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x2));
+ svbool_t cmp = svorr_z (svptrue_b32 (), xspecial, yspecial);
+
svbool_t p = svpfirst (cmp, svpfalse ());
while (svptest_any (cmp, p))
{
@@ -182,30 +185,30 @@ sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
/* Polynomial to approximate log1p(r)/ln2. */
svfloat64_t logx = A (0);
- logx = svmla_x (pg, A (1), r, logx);
- logx = svmla_x (pg, A (2), r, logx);
- logx = svmla_x (pg, A (3), r, logx);
- logx = svmla_x (pg, y0, r, logx);
+ logx = svmad_x (pg, r, logx, A (1));
+ logx = svmad_x (pg, r, logx, A (2));
+ logx = svmad_x (pg, r, logx, A (3));
+ logx = svmad_x (pg, r, logx, y0);
*pylogx = svmul_x (pg, y, logx);
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
- svfloat64_t kd = svadd_x (pg, *pylogx, Shift);
- svuint64_t ki = svreinterpret_u64 (kd);
- kd = svsub_x (pg, kd, Shift);
+ svfloat64_t kd = svrinta_x (svptrue_b64 (), *pylogx);
+ svuint64_t ki = svreinterpret_u64 (svcvt_s64_x (svptrue_b64 (), kd));
r = svsub_x (pg, *pylogx, kd);
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1). */
- svuint64_t t
- = svld1_gather_index (pg, Texp, svand_x (pg, ki, V_POWF_EXP2_N - 1));
- svuint64_t ski = svadd_x (pg, ki, sign_bias);
- t = svadd_x (pg, t, svlsl_x (pg, ski, 52 - V_POWF_EXP2_TABLE_BITS));
+ svuint64_t t = svld1_gather_index (
+ svptrue_b64 (), Texp, svand_x (svptrue_b64 (), ki, V_POWF_EXP2_N - 1));
+ svuint64_t ski = svadd_x (svptrue_b64 (), ki, sign_bias);
+ t = svadd_x (svptrue_b64 (), t,
+ svlsl_x (svptrue_b64 (), ski, 52 - V_POWF_EXP2_TABLE_BITS));
svfloat64_t s = svreinterpret_f64 (t);
svfloat64_t p = C (0);
p = svmla_x (pg, C (1), p, r);
p = svmla_x (pg, C (2), p, r);
- p = svmla_x (pg, s, p, svmul_x (pg, s, r));
+ p = svmla_x (pg, s, p, svmul_x (svptrue_b64 (), s, r));
return p;
}
@@ -219,19 +222,16 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
{
const svbool_t ptrue = svptrue_b64 ();
- /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two in
- order to perform core computation in double precision. */
+ /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two
+ * in order to perform core computation in double precision. */
const svbool_t pg_lo = svunpklo (pg);
const svbool_t pg_hi = svunpkhi (pg);
- svfloat64_t y_lo = svcvt_f64_x (
- ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
- svfloat64_t y_hi = svcvt_f64_x (
- ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
- svfloat32_t z = svreinterpret_f32 (iz);
- svfloat64_t z_lo = svcvt_f64_x (
- ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (z))));
- svfloat64_t z_hi = svcvt_f64_x (
- ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (z))));
+ svfloat64_t y_lo
+ = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
+ svfloat64_t y_hi
+ = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
+ svfloat64_t z_lo = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (iz)));
+ svfloat64_t z_hi = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (iz)));
svuint64_t i_lo = svunpklo (i);
svuint64_t i_hi = svunpkhi (i);
svint64_t k_lo = svunpklo (k);
@@ -258,9 +258,9 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
/* Implementation of SVE powf.
Provides the same accuracy as AdvSIMD powf, since it relies on the same
algorithm. The theoretical maximum error is under 2.60 ULPs.
- Maximum measured error is 2.56 ULPs:
- SV_NAME_F2 (pow) (0x1.004118p+0, 0x1.5d14a4p+16) got 0x1.fd4bp+127
- want 0x1.fd4b06p+127. */
+ Maximum measured error is 2.57 ULPs:
+ SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127
+ want 0x1.fff862p+127. */
svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -269,21 +269,19 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
svuint32_t viy0 = svreinterpret_u32 (y);
/* Negative x cases. */
- svuint32_t sign_bit = svand_m (pg, vix0, d->sign_mask);
- svbool_t xisneg = svcmpeq (pg, sign_bit, d->sign_mask);
+ svbool_t xisneg = svcmplt (pg, x, sv_f32 (0));
/* Set sign_bias and ix depending on sign of x and nature of y. */
- svbool_t yisnotint_xisneg = svpfalse_b ();
+ svbool_t yint_or_xpos = pg;
svuint32_t sign_bias = sv_u32 (0);
svuint32_t vix = vix0;
if (__glibc_unlikely (svptest_any (pg, xisneg)))
{
/* Determine nature of y. */
- yisnotint_xisneg = svisnotint (xisneg, y);
- svbool_t yisint_xisneg = svisint (xisneg, y);
+ yint_or_xpos = svisint (xisneg, y);
svbool_t yisodd_xisneg = svisodd (xisneg, y);
/* ix set to abs(ix) if y is integer. */
- vix = svand_m (yisint_xisneg, vix0, 0x7fffffff);
+ vix = svand_m (yint_or_xpos, vix0, 0x7fffffff);
/* Set to SignBias if x is negative and y is odd. */
sign_bias = svsel (yisodd_xisneg, sv_u32 (d->sign_bias), sv_u32 (0));
}
@@ -294,8 +292,8 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
svbool_t cmp = svorr_z (pg, xspecial, yspecial);
/* Small cases of x: |x| < 0x1p-126. */
- svbool_t xsmall = svaclt (pg, x, d->small_bound);
- if (__glibc_unlikely (svptest_any (pg, xsmall)))
+ svbool_t xsmall = svaclt (yint_or_xpos, x, d->small_bound);
+ if (__glibc_unlikely (svptest_any (yint_or_xpos, xsmall)))
{
/* Normalize subnormal x so exponent becomes negative. */
svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm));
@@ -304,32 +302,35 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
vix = svsel (xsmall, vix_norm, vix);
}
/* Part of core computation carried in working precision. */
- svuint32_t tmp = svsub_x (pg, vix, d->off);
- svuint32_t i = svand_x (pg, svlsr_x (pg, tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
- V_POWF_LOG2_N - 1);
- svuint32_t top = svand_x (pg, tmp, 0xff800000);
- svuint32_t iz = svsub_x (pg, vix, top);
- svint32_t k
- = svasr_x (pg, svreinterpret_s32 (top), (23 - V_POWF_EXP2_TABLE_BITS));
-
- /* Compute core in extended precision and return intermediate ylogx results to
- handle cases of underflow and underflow in exp. */
+ svuint32_t tmp = svsub_x (yint_or_xpos, vix, d->off);
+ svuint32_t i = svand_x (
+ yint_or_xpos, svlsr_x (yint_or_xpos, tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
+ V_POWF_LOG2_N - 1);
+ svuint32_t top = svand_x (yint_or_xpos, tmp, 0xff800000);
+ svuint32_t iz = svsub_x (yint_or_xpos, vix, top);
+ svint32_t k = svasr_x (yint_or_xpos, svreinterpret_s32 (top),
+ (23 - V_POWF_EXP2_TABLE_BITS));
+
+ /* Compute core in extended precision and return intermediate ylogx results
+ * to handle cases of underflow and underflow in exp. */
svfloat32_t ylogx;
- svfloat32_t ret = sv_powf_core (pg, i, iz, k, y, sign_bias, &ylogx, d);
+ svfloat32_t ret
+ = sv_powf_core (yint_or_xpos, i, iz, k, y, sign_bias, &ylogx, d);
/* Handle exp special cases of underflow and overflow. */
- svuint32_t sign = svlsl_x (pg, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS);
+ svuint32_t sign
+ = svlsl_x (yint_or_xpos, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS);
svfloat32_t ret_oflow
- = svreinterpret_f32 (svorr_x (pg, sign, asuint (INFINITY)));
+ = svreinterpret_f32 (svorr_x (yint_or_xpos, sign, asuint (INFINITY)));
svfloat32_t ret_uflow = svreinterpret_f32 (sign);
- ret = svsel (svcmple (pg, ylogx, d->uflow_bound), ret_uflow, ret);
- ret = svsel (svcmpgt (pg, ylogx, d->oflow_bound), ret_oflow, ret);
+ ret = svsel (svcmple (yint_or_xpos, ylogx, d->uflow_bound), ret_uflow, ret);
+ ret = svsel (svcmpgt (yint_or_xpos, ylogx, d->oflow_bound), ret_oflow, ret);
/* Cases of finite y and finite negative x. */
- ret = svsel (yisnotint_xisneg, sv_f32 (__builtin_nanf ("")), ret);
+ ret = svsel (yint_or_xpos, ret, sv_f32 (__builtin_nanf ("")));
- if (__glibc_unlikely (svptest_any (pg, cmp)))
- return sv_call_powf_sc (x, y, ret, cmp);
+ if (__glibc_unlikely (svptest_any (cmp, cmp)))
+ return sv_call_powf_sc (x, y, ret);
return ret;
}

401
glibc-RHEL-118273-28.patch Normal file
View File

@ -0,0 +1,401 @@
commit 0b195651db3ae793187c7dd6d78b5a7a8da9d5e6
Author: Yat Long Poon <yatlong.poon@arm.com>
Date: Thu Feb 13 18:02:01 2025 +0000
AArch64: Improve codegen for SVE pow
Move constants to struct. Improve memory access with indexed/unpredicated
instructions. Eliminate register spills. Speedup on Neoverse V1: 24%.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/pow_sve.c b/sysdeps/aarch64/fpu/pow_sve.c
index 4c0bf8956c584be7..4242d22a491ed17e 100644
--- a/sysdeps/aarch64/fpu/pow_sve.c
+++ b/sysdeps/aarch64/fpu/pow_sve.c
@@ -44,19 +44,18 @@
/* Data is defined in v_pow_log_data.c. */
#define N_LOG (1 << V_POW_LOG_TABLE_BITS)
-#define A __v_pow_log_data.poly
#define Off 0x3fe6955500000000
/* Data is defined in v_pow_exp_data.c. */
#define N_EXP (1 << V_POW_EXP_TABLE_BITS)
#define SignBias (0x800 << V_POW_EXP_TABLE_BITS)
-#define C __v_pow_exp_data.poly
#define SmallExp 0x3c9 /* top12(0x1p-54). */
#define BigExp 0x408 /* top12(512.). */
#define ThresExp 0x03f /* BigExp - SmallExp. */
#define HugeExp 0x409 /* top12(1024.). */
/* Constants associated with pow. */
+#define SmallBoundX 0x1p-126
#define SmallPowX 0x001 /* top12(0x1p-126). */
#define BigPowX 0x7ff /* top12(INFINITY). */
#define ThresPowX 0x7fe /* BigPowX - SmallPowX. */
@@ -64,6 +63,31 @@
#define BigPowY 0x43e /* top12(0x1.749p62). */
#define ThresPowY 0x080 /* BigPowY - SmallPowY. */
+static const struct data
+{
+ double log_c0, log_c2, log_c4, log_c6, ln2_hi, ln2_lo;
+ double log_c1, log_c3, log_c5, off;
+ double n_over_ln2, exp_c2, ln2_over_n_hi, ln2_over_n_lo;
+ double exp_c0, exp_c1;
+} data = {
+ .log_c0 = -0x1p-1,
+ .log_c1 = -0x1.555555555556p-1,
+ .log_c2 = 0x1.0000000000006p-1,
+ .log_c3 = 0x1.999999959554ep-1,
+ .log_c4 = -0x1.555555529a47ap-1,
+ .log_c5 = -0x1.2495b9b4845e9p0,
+ .log_c6 = 0x1.0002b8b263fc3p0,
+ .off = Off,
+ .exp_c0 = 0x1.fffffffffffd4p-2,
+ .exp_c1 = 0x1.5555571d6ef9p-3,
+ .exp_c2 = 0x1.5555576a5adcep-5,
+ .ln2_hi = 0x1.62e42fefa3800p-1,
+ .ln2_lo = 0x1.ef35793c76730p-45,
+ .n_over_ln2 = 0x1.71547652b82fep0 * N_EXP,
+ .ln2_over_n_hi = 0x1.62e42fefc0000p-9,
+ .ln2_over_n_lo = -0x1.c610ca86c3899p-45,
+};
+
/* Check if x is an integer. */
static inline svbool_t
sv_isint (svbool_t pg, svfloat64_t x)
@@ -82,7 +106,7 @@ sv_isnotint (svbool_t pg, svfloat64_t x)
static inline svbool_t
sv_isodd (svbool_t pg, svfloat64_t x)
{
- svfloat64_t y = svmul_x (pg, x, 0.5);
+ svfloat64_t y = svmul_x (svptrue_b64 (), x, 0.5);
return sv_isnotint (pg, y);
}
@@ -121,7 +145,7 @@ zeroinfnan (uint64_t i)
static inline svbool_t
sv_zeroinfnan (svbool_t pg, svuint64_t i)
{
- return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2), 1),
+ return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),
2 * asuint64 (INFINITY) - 1);
}
@@ -174,16 +198,17 @@ sv_call_specialcase (svfloat64_t x1, svuint64_t u1, svuint64_t u2,
additional 15 bits precision. IX is the bit representation of x, but
normalized in the subnormal range using the sign bit for the exponent. */
static inline svfloat64_t
-sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail)
+sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail,
+ const struct data *d)
{
/* x = 2^k z; where z is in range [Off,2*Off) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
- svuint64_t tmp = svsub_x (pg, ix, Off);
+ svuint64_t tmp = svsub_x (pg, ix, d->off);
svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, 52 - V_POW_LOG_TABLE_BITS),
sv_u64 (N_LOG - 1));
svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52);
- svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, sv_u64 (0xfffULL << 52)));
+ svuint64_t iz = svsub_x (pg, ix, svlsl_x (pg, svreinterpret_u64 (k), 52));
svfloat64_t z = svreinterpret_f64 (iz);
svfloat64_t kd = svcvt_f64_x (pg, k);
@@ -199,40 +224,85 @@ sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail)
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
svfloat64_t r = svmad_x (pg, z, invc, -1.0);
/* k*Ln2 + log(c) + r. */
- svfloat64_t t1 = svmla_x (pg, logc, kd, __v_pow_log_data.ln2_hi);
+
+ svfloat64_t ln2_hilo = svld1rq_f64 (svptrue_b64 (), &d->ln2_hi);
+ svfloat64_t t1 = svmla_lane_f64 (logc, kd, ln2_hilo, 0);
svfloat64_t t2 = svadd_x (pg, t1, r);
- svfloat64_t lo1 = svmla_x (pg, logctail, kd, __v_pow_log_data.ln2_lo);
+ svfloat64_t lo1 = svmla_lane_f64 (logctail, kd, ln2_hilo, 1);
svfloat64_t lo2 = svadd_x (pg, svsub_x (pg, t1, t2), r);
/* Evaluation is optimized assuming superscalar pipelined execution. */
- svfloat64_t ar = svmul_x (pg, r, -0.5); /* A[0] = -0.5. */
- svfloat64_t ar2 = svmul_x (pg, r, ar);
- svfloat64_t ar3 = svmul_x (pg, r, ar2);
+
+ svfloat64_t log_c02 = svld1rq_f64 (svptrue_b64 (), &d->log_c0);
+ svfloat64_t ar = svmul_lane_f64 (r, log_c02, 0);
+ svfloat64_t ar2 = svmul_x (svptrue_b64 (), r, ar);
+ svfloat64_t ar3 = svmul_x (svptrue_b64 (), r, ar2);
/* k*Ln2 + log(c) + r + A[0]*r*r. */
svfloat64_t hi = svadd_x (pg, t2, ar2);
- svfloat64_t lo3 = svmla_x (pg, svneg_x (pg, ar2), ar, r);
+ svfloat64_t lo3 = svmls_x (pg, ar2, ar, r);
svfloat64_t lo4 = svadd_x (pg, svsub_x (pg, t2, hi), ar2);
/* p = log1p(r) - r - A[0]*r*r. */
/* p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r *
A[6])))). */
- svfloat64_t a56 = svmla_x (pg, sv_f64 (A[5]), r, A[6]);
- svfloat64_t a34 = svmla_x (pg, sv_f64 (A[3]), r, A[4]);
- svfloat64_t a12 = svmla_x (pg, sv_f64 (A[1]), r, A[2]);
+
+ svfloat64_t log_c46 = svld1rq_f64 (svptrue_b64 (), &d->log_c4);
+ svfloat64_t a56 = svmla_lane_f64 (sv_f64 (d->log_c5), r, log_c46, 1);
+ svfloat64_t a34 = svmla_lane_f64 (sv_f64 (d->log_c3), r, log_c46, 0);
+ svfloat64_t a12 = svmla_lane_f64 (sv_f64 (d->log_c1), r, log_c02, 1);
svfloat64_t p = svmla_x (pg, a34, ar2, a56);
p = svmla_x (pg, a12, ar2, p);
- p = svmul_x (pg, ar3, p);
+ p = svmul_x (svptrue_b64 (), ar3, p);
svfloat64_t lo = svadd_x (
- pg, svadd_x (pg, svadd_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p);
+ pg, svadd_x (pg, svsub_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p);
svfloat64_t y = svadd_x (pg, hi, lo);
*tail = svadd_x (pg, svsub_x (pg, hi, y), lo);
return y;
}
+static inline svfloat64_t
+sv_exp_core (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
+ svuint64_t sign_bias, svfloat64_t *tmp, svuint64_t *sbits,
+ svuint64_t *ki, const struct data *d)
+{
+ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
+ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
+ svfloat64_t n_over_ln2_and_c2 = svld1rq_f64 (svptrue_b64 (), &d->n_over_ln2);
+ svfloat64_t z = svmul_lane_f64 (x, n_over_ln2_and_c2, 0);
+ /* z - kd is in [-1, 1] in non-nearest rounding modes. */
+ svfloat64_t kd = svrinta_x (pg, z);
+ *ki = svreinterpret_u64 (svcvt_s64_x (pg, kd));
+
+ svfloat64_t ln2_over_n_hilo
+ = svld1rq_f64 (svptrue_b64 (), &d->ln2_over_n_hi);
+ svfloat64_t r = x;
+ r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 0);
+ r = svmls_lane_f64 (r, kd, ln2_over_n_hilo, 1);
+ /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
+ r = svadd_x (pg, r, xtail);
+ /* 2^(k/N) ~= scale. */
+ svuint64_t idx = svand_x (pg, *ki, N_EXP - 1);
+ svuint64_t top
+ = svlsl_x (pg, svadd_x (pg, *ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ *sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx);
+ *sbits = svadd_x (pg, *sbits, top);
+ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */
+ svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+ *tmp = svmla_lane_f64 (sv_f64 (d->exp_c1), r, n_over_ln2_and_c2, 1);
+ *tmp = svmla_x (pg, sv_f64 (d->exp_c0), r, *tmp);
+ *tmp = svmla_x (pg, r, r2, *tmp);
+ svfloat64_t scale = svreinterpret_f64 (*sbits);
+ /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+ is no spurious underflow here even without fma. */
+ z = svmla_x (pg, scale, scale, *tmp);
+ return z;
+}
+
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1. */
static inline svfloat64_t
sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
- svuint64_t sign_bias)
+ svuint64_t sign_bias, const struct data *d)
{
/* 3 types of special cases: tiny (uflow and spurious uflow), huge (oflow)
and other cases of large values of x (scale * (1 + TMP) oflow). */
@@ -240,73 +310,46 @@ sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
/* |x| is large (|x| >= 512) or tiny (|x| <= 0x1p-54). */
svbool_t uoflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), ThresExp);
- /* Conditions special, uflow and oflow are all expressed as uoflow &&
- something, hence do not bother computing anything if no lane in uoflow is
- true. */
- svbool_t special = svpfalse_b ();
- svbool_t uflow = svpfalse_b ();
- svbool_t oflow = svpfalse_b ();
+ svfloat64_t tmp;
+ svuint64_t sbits, ki;
if (__glibc_unlikely (svptest_any (pg, uoflow)))
{
+ svfloat64_t z
+ = sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d);
+
/* |x| is tiny (|x| <= 0x1p-54). */
- uflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000);
+ svbool_t uflow
+ = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000);
uflow = svand_z (pg, uoflow, uflow);
/* |x| is huge (|x| >= 1024). */
- oflow = svcmpge (pg, abstop, HugeExp);
+ svbool_t oflow = svcmpge (pg, abstop, HugeExp);
oflow = svand_z (pg, uoflow, svbic_z (pg, oflow, uflow));
+
/* For large |x| values (512 < |x| < 1024) scale * (1 + TMP) can overflow
- or underflow. */
- special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));
+ or underflow. */
+ svbool_t special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));
+
+ /* Update result with special and large cases. */
+ z = sv_call_specialcase (tmp, sbits, ki, z, special);
+
+ /* Handle underflow and overflow. */
+ svbool_t x_is_neg = svcmplt (pg, x, 0);
+ svuint64_t sign_mask
+ = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS);
+ svfloat64_t res_uoflow
+ = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY));
+ res_uoflow = svreinterpret_f64 (
+ svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask));
+ /* Avoid spurious underflow for tiny x. */
+ svfloat64_t res_spurious_uflow
+ = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));
+
+ z = svsel (oflow, res_uoflow, z);
+ z = svsel (uflow, res_spurious_uflow, z);
+ return z;
}
- /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
- /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
- svfloat64_t z = svmul_x (pg, x, __v_pow_exp_data.n_over_ln2);
- /* z - kd is in [-1, 1] in non-nearest rounding modes. */
- svfloat64_t shift = sv_f64 (__v_pow_exp_data.shift);
- svfloat64_t kd = svadd_x (pg, z, shift);
- svuint64_t ki = svreinterpret_u64 (kd);
- kd = svsub_x (pg, kd, shift);
- svfloat64_t r = x;
- r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_hi);
- r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_lo);
- /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
- r = svadd_x (pg, r, xtail);
- /* 2^(k/N) ~= scale. */
- svuint64_t idx = svand_x (pg, ki, N_EXP - 1);
- svuint64_t top
- = svlsl_x (pg, svadd_x (pg, ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS);
- /* This is only a valid scale when -1023*N < k < 1024*N. */
- svuint64_t sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx);
- sbits = svadd_x (pg, sbits, top);
- /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */
- svfloat64_t r2 = svmul_x (pg, r, r);
- svfloat64_t tmp = svmla_x (pg, sv_f64 (C[1]), r, C[2]);
- tmp = svmla_x (pg, sv_f64 (C[0]), r, tmp);
- tmp = svmla_x (pg, r, r2, tmp);
- svfloat64_t scale = svreinterpret_f64 (sbits);
- /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
- is no spurious underflow here even without fma. */
- z = svmla_x (pg, scale, scale, tmp);
-
- /* Update result with special and large cases. */
- if (__glibc_unlikely (svptest_any (pg, special)))
- z = sv_call_specialcase (tmp, sbits, ki, z, special);
-
- /* Handle underflow and overflow. */
- svuint64_t sign_bit = svlsr_x (pg, svreinterpret_u64 (x), 63);
- svbool_t x_is_neg = svcmpne (pg, sign_bit, 0);
- svuint64_t sign_mask = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS);
- svfloat64_t res_uoflow = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY));
- res_uoflow = svreinterpret_f64 (
- svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask));
- z = svsel (oflow, res_uoflow, z);
- /* Avoid spurious underflow for tiny x. */
- svfloat64_t res_spurious_uflow
- = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));
- z = svsel (uflow, res_spurious_uflow, z);
-
- return z;
+ return sv_exp_core (pg, x, xtail, sign_bias, &tmp, &sbits, &ki, d);
}
static inline double
@@ -341,47 +384,39 @@ pow_sc (double x, double y)
svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
{
+ const struct data *d = ptr_barrier (&data);
+
/* This preamble handles special case conditions used in the final scalar
fallbacks. It also updates ix and sign_bias, that are used in the core
computation too, i.e., exp( y * log (x) ). */
svuint64_t vix0 = svreinterpret_u64 (x);
svuint64_t viy0 = svreinterpret_u64 (y);
- svuint64_t vtopx0 = svlsr_x (svptrue_b64 (), vix0, 52);
/* Negative x cases. */
- svuint64_t sign_bit = svlsr_m (pg, vix0, 63);
- svbool_t xisneg = svcmpeq (pg, sign_bit, 1);
+ svbool_t xisneg = svcmplt (pg, x, 0);
/* Set sign_bias and ix depending on sign of x and nature of y. */
- svbool_t yisnotint_xisneg = svpfalse_b ();
+ svbool_t yint_or_xpos = pg;
svuint64_t sign_bias = sv_u64 (0);
svuint64_t vix = vix0;
- svuint64_t vtopx1 = vtopx0;
if (__glibc_unlikely (svptest_any (pg, xisneg)))
{
/* Determine nature of y. */
- yisnotint_xisneg = sv_isnotint (xisneg, y);
- svbool_t yisint_xisneg = sv_isint (xisneg, y);
+ yint_or_xpos = sv_isint (xisneg, y);
svbool_t yisodd_xisneg = sv_isodd (xisneg, y);
/* ix set to abs(ix) if y is integer. */
- vix = svand_m (yisint_xisneg, vix0, 0x7fffffffffffffff);
- vtopx1 = svand_m (yisint_xisneg, vtopx0, 0x7ff);
+ vix = svand_m (yint_or_xpos, vix0, 0x7fffffffffffffff);
/* Set to SignBias if x is negative and y is odd. */
sign_bias = svsel (yisodd_xisneg, sv_u64 (SignBias), sv_u64 (0));
}
- /* Special cases of x or y: zero, inf and nan. */
- svbool_t xspecial = sv_zeroinfnan (pg, vix0);
- svbool_t yspecial = sv_zeroinfnan (pg, viy0);
- svbool_t special = svorr_z (pg, xspecial, yspecial);
-
/* Small cases of x: |x| < 0x1p-126. */
- svuint64_t vabstopx0 = svand_x (pg, vtopx0, 0x7ff);
- svbool_t xsmall = svcmplt (pg, vabstopx0, SmallPowX);
- if (__glibc_unlikely (svptest_any (pg, xsmall)))
+ svbool_t xsmall = svaclt (yint_or_xpos, x, SmallBoundX);
+ if (__glibc_unlikely (svptest_any (yint_or_xpos, xsmall)))
{
/* Normalize subnormal x so exponent becomes negative. */
- svbool_t topx_is_null = svcmpeq (xsmall, vtopx1, 0);
+ svuint64_t vtopx = svlsr_x (svptrue_b64 (), vix, 52);
+ svbool_t topx_is_null = svcmpeq (xsmall, vtopx, 0);
svuint64_t vix_norm = svreinterpret_u64 (svmul_m (xsmall, x, 0x1p52));
vix_norm = svand_m (xsmall, vix_norm, 0x7fffffffffffffff);
@@ -391,20 +426,24 @@ svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
/* y_hi = log(ix, &y_lo). */
svfloat64_t vlo;
- svfloat64_t vhi = sv_log_inline (pg, vix, &vlo);
+ svfloat64_t vhi = sv_log_inline (yint_or_xpos, vix, &vlo, d);
/* z = exp(y_hi, y_lo, sign_bias). */
- svfloat64_t vehi = svmul_x (pg, y, vhi);
- svfloat64_t velo = svmul_x (pg, y, vlo);
- svfloat64_t vemi = svmls_x (pg, vehi, y, vhi);
- velo = svsub_x (pg, velo, vemi);
- svfloat64_t vz = sv_exp_inline (pg, vehi, velo, sign_bias);
+ svfloat64_t vehi = svmul_x (svptrue_b64 (), y, vhi);
+ svfloat64_t vemi = svmls_x (yint_or_xpos, vehi, y, vhi);
+ svfloat64_t velo = svnmls_x (yint_or_xpos, vemi, y, vlo);
+ svfloat64_t vz = sv_exp_inline (yint_or_xpos, vehi, velo, sign_bias, d);
/* Cases of finite y and finite negative x. */
- vz = svsel (yisnotint_xisneg, sv_f64 (__builtin_nan ("")), vz);
+ vz = svsel (yint_or_xpos, vz, sv_f64 (__builtin_nan ("")));
+
+ /* Special cases of x or y: zero, inf and nan. */
+ svbool_t xspecial = sv_zeroinfnan (svptrue_b64 (), vix0);
+ svbool_t yspecial = sv_zeroinfnan (svptrue_b64 (), viy0);
+ svbool_t special = svorr_z (svptrue_b64 (), xspecial, yspecial);
/* Cases of zero/inf/nan x or y. */
- if (__glibc_unlikely (svptest_any (pg, special)))
+ if (__glibc_unlikely (svptest_any (svptrue_b64 (), special)))
vz = sv_call2_f64 (pow_sc, x, y, vz, special);
return vz;

View File

@ -0,0 +1,45 @@
commit f5ff34cb3c75ec1061c75bb9188b3c1176426947
Author: Yat Long Poon <yatlong.poon@arm.com>
Date: Thu Feb 13 18:00:50 2025 +0000
AArch64: Improve codegen for SVE erfcf
Reduce number of MOV/MOVPRFXs and use unpredicated FMUL.
Replace MUL with LSL. Speedup on Neoverse V1: 6%.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/erfcf_sve.c b/sysdeps/aarch64/fpu/erfcf_sve.c
index ecacb933aca40855..e4869263e31e18bc 100644
--- a/sysdeps/aarch64/fpu/erfcf_sve.c
+++ b/sysdeps/aarch64/fpu/erfcf_sve.c
@@ -76,7 +76,7 @@ svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg)
svuint32_t i = svqadd (svreinterpret_u32 (z), dat->off_idx);
/* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables. */
- i = svmul_x (pg, i, 2);
+ i = svlsl_x (svptrue_b32 (), i, 1);
const float32_t *p = &__v_erfcf_data.tab[0].erfc - 2 * dat->off_arr;
svfloat32_t erfcr = svld1_gather_index (pg, p, i);
svfloat32_t scale = svld1_gather_index (pg, p + 1, i);
@@ -84,15 +84,15 @@ svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg)
/* erfc(x) ~ erfc(r) - scale * d * poly(r, d). */
svfloat32_t r = svsub_x (pg, z, shift);
svfloat32_t d = svsub_x (pg, a, r);
- svfloat32_t d2 = svmul_x (pg, d, d);
- svfloat32_t r2 = svmul_x (pg, r, r);
+ svfloat32_t d2 = svmul_x (svptrue_b32 (), d, d);
+ svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
svfloat32_t coeffs = svld1rq (svptrue_b32 (), &dat->third);
- svfloat32_t third = svdup_lane (coeffs, 0);
svfloat32_t p1 = r;
- svfloat32_t p2 = svmls_lane (third, r2, coeffs, 1);
- svfloat32_t p3 = svmul_x (pg, r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0));
+ svfloat32_t p2 = svmls_lane (sv_f32 (dat->third), r2, coeffs, 1);
+ svfloat32_t p3
+ = svmul_x (svptrue_b32 (), r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0));
svfloat32_t p4 = svmla_lane (sv_f32 (dat->two_over_five), r2, coeffs, 2);
p4 = svmls_x (pg, sv_f32 (dat->tenth), r2, p4);

873
glibc-RHEL-118273-3.patch Normal file
View File

@ -0,0 +1,873 @@
commit b09fee1d21650428a6a3335408a46ebe1165d30d
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Tue Feb 20 16:59:40 2024 +0000
aarch64/fpu: Add vector variants of acosh
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index 019c3a51880e2306..2e5bbb5a07f4c9b0 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -1,4 +1,5 @@
libmvec-supported-funcs = acos \
+ acosh \
asin \
atan \
atan2 \
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index 884b4b57f097635f..60e1cdeacec3f77e 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -79,6 +79,11 @@ libmvec {
_ZGVsMxv_tan;
}
GLIBC_2.40 {
+ _ZGVnN2v_acosh;
+ _ZGVnN2v_acoshf;
+ _ZGVnN4v_acoshf;
+ _ZGVsMxv_acosh;
+ _ZGVsMxv_acoshf;
_ZGVnN2v_cosh;
_ZGVnN2v_coshf;
_ZGVnN4v_coshf;
diff --git a/sysdeps/aarch64/fpu/acosh_advsimd.c b/sysdeps/aarch64/fpu/acosh_advsimd.c
new file mode 100644
index 0000000000000000..c88283cf1191f4eb
--- /dev/null
+++ b/sysdeps/aarch64/fpu/acosh_advsimd.c
@@ -0,0 +1,67 @@
+/* Double-precision vector (Advanced SIMD) acosh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define WANT_V_LOG1P_K0_SHORTCUT 1
+#include "v_log1p_inline.h"
+
+const static struct data
+{
+ struct v_log1p_data log1p_consts;
+ uint64x2_t one, thresh;
+} data = {
+ .log1p_consts = V_LOG1P_CONSTANTS_TABLE,
+ .one = V2 (0x3ff0000000000000),
+ .thresh = V2 (0x1ff0000000000000) /* asuint64(0x1p511) - asuint64(1). */
+};
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special,
+ const struct v_log1p_data *d)
+{
+ return v_call_f64 (acosh, x, log1p_inline (y, d), special);
+}
+
+/* Vector approximation for double-precision acosh, based on log1p.
+ The largest observed error is 3.02 ULP in the region where the
+ argument to log1p falls in the k=0 interval, i.e. x close to 1:
+ _ZGVnN2v_acosh(0x1.00798aaf80739p+0) got 0x1.f2d6d823bc9dfp-5
+ want 0x1.f2d6d823bc9e2p-5. */
+VPCS_ATTR float64x2_t V_NAME_D1 (acosh) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint64x2_t special
+ = vcgeq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (x), d->one), d->thresh);
+ float64x2_t special_arg = x;
+
+#if WANT_SIMD_EXCEPT
+ if (__glibc_unlikely (v_any_u64 (special)))
+ x = vbslq_f64 (special, vreinterpretq_f64_u64 (d->one), x);
+#endif
+
+ float64x2_t xm1 = vsubq_f64 (x, v_f64 (1));
+ float64x2_t y;
+ y = vaddq_f64 (x, v_f64 (1));
+ y = vmulq_f64 (y, xm1);
+ y = vsqrtq_f64 (y);
+ y = vaddq_f64 (xm1, y);
+
+ if (__glibc_unlikely (v_any_u64 (special)))
+ return special_case (special_arg, y, special, &d->log1p_consts);
+ return log1p_inline (y, &d->log1p_consts);
+}
diff --git a/sysdeps/aarch64/fpu/acosh_sve.c b/sysdeps/aarch64/fpu/acosh_sve.c
new file mode 100644
index 0000000000000000..3e4faaa5ca686c18
--- /dev/null
+++ b/sysdeps/aarch64/fpu/acosh_sve.c
@@ -0,0 +1,51 @@
+/* Double-precision vector (SVE) acosh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define WANT_SV_LOG1P_K0_SHORTCUT 1
+#include "sv_log1p_inline.h"
+
+#define One (0x3ff0000000000000)
+#define Thres (0x1ff0000000000000) /* asuint64 (0x1p511) - One. */
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (acosh, x, y, special);
+}
+
+/* SVE approximation for double-precision acosh, based on log1p.
+ The largest observed error is 3.19 ULP in the region where the
+ argument to log1p falls in the k=0 interval, i.e. x close to 1:
+ SV_NAME_D1 (acosh)(0x1.1e4388d4ca821p+0) got 0x1.ed23399f5137p-2
+ want 0x1.ed23399f51373p-2. */
+svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
+{
+ /* (ix - One) >= (BigBound - One). */
+ svuint64_t ix = svreinterpret_u64 (x);
+ svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
+
+ svfloat64_t xm1 = svsub_x (pg, x, 1.0);
+ svfloat64_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0));
+ svfloat64_t y = svadd_x (pg, xm1, svsqrt_x (pg, u));
+
+ /* Fall back to scalar routine for special lanes. */
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (x, sv_log1p_inline (y, pg), special);
+ return sv_log1p_inline (y, pg);
+}
diff --git a/sysdeps/aarch64/fpu/acoshf_advsimd.c b/sysdeps/aarch64/fpu/acoshf_advsimd.c
new file mode 100644
index 0000000000000000..8916dcbf409922a9
--- /dev/null
+++ b/sysdeps/aarch64/fpu/acoshf_advsimd.c
@@ -0,0 +1,78 @@
+/* Single-precision vector (Advanced SIMD) acosh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_log1pf_inline.h"
+
+#define SquareLim 0x1p64
+
+const static struct data
+{
+ struct v_log1pf_data log1pf_consts;
+ uint32x4_t one;
+ uint16x4_t thresh;
+} data = {
+ .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
+ .one = V4 (0x3f800000),
+ .thresh = V4 (0x2000) /* top(asuint(SquareLim) - asuint(1)). */
+};
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint16x4_t special,
+ const struct v_log1pf_data d)
+{
+ return v_call_f32 (acoshf, x, log1pf_inline (y, d), vmovl_u16 (special));
+}
+
+/* Vector approximation for single-precision acosh, based on log1p. Maximum
+ error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it
+ is 2.78 ULP:
+ __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3
+ want 0x1.ef9ea2p-3.
+ With exceptions disabled, we can compute u with a shorter dependency chain,
+ which gives maximum error of 3.07 ULP:
+ __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4
+ want 0x1.fbc7f4p-4. */
+
+VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (acosh) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), d->thresh);
+
+#if WANT_SIMD_EXCEPT
+ /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use
+ only xm1 to calculate u, as operating on x will trigger invalid for NaN.
+ Widening sign-extend special predicate in order to mask with it. */
+ uint32x4_t p
+ = vreinterpretq_u32_s32 (vmovl_s16 (vreinterpret_s16_u16 (special)));
+ float32x4_t xm1 = v_zerofy_f32 (vsubq_f32 (x, v_f32 (1)), p);
+ float32x4_t u = vfmaq_f32 (vaddq_f32 (xm1, xm1), xm1, xm1);
+#else
+ float32x4_t xm1 = vsubq_f32 (x, v_f32 (1));
+ float32x4_t u = vmulq_f32 (xm1, vaddq_f32 (x, v_f32 (1.0f)));
+#endif
+
+ float32x4_t y = vaddq_f32 (xm1, vsqrtq_f32 (u));
+
+ if (__glibc_unlikely (v_any_u16h (special)))
+ return special_case (x, y, special, d->log1pf_consts);
+ return log1pf_inline (y, d->log1pf_consts);
+}
+libmvec_hidden_def (V_NAME_F1 (acosh))
+HALF_WIDTH_ALIAS_F1 (acosh)
diff --git a/sysdeps/aarch64/fpu/acoshf_sve.c b/sysdeps/aarch64/fpu/acoshf_sve.c
new file mode 100644
index 0000000000000000..2110894e629500be
--- /dev/null
+++ b/sysdeps/aarch64/fpu/acoshf_sve.c
@@ -0,0 +1,49 @@
+/* Single-precision vector (SVE) acosh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define One 0x3f800000
+#define Thres 0x20000000 /* asuint(0x1p64) - One. */
+
+#include "sv_log1pf_inline.h"
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (acoshf, x, y, special);
+}
+
+/* Single-precision SVE acosh(x) routine. Implements the same algorithm as
+ vector acoshf and log1p.
+
+ Maximum error is 2.78 ULPs:
+ SV_NAME_F1 (acosh) (0x1.01e996p+0) got 0x1.f45b42p-4
+ want 0x1.f45b3cp-4. */
+svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
+{
+ svuint32_t ix = svreinterpret_u32 (x);
+ svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
+
+ svfloat32_t xm1 = svsub_x (pg, x, 1.0f);
+ svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f));
+ svfloat32_t y = sv_log1pf_inline (svadd_x (pg, xm1, svsqrt_x (pg, u)), pg);
+
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (x, y, special);
+ return y;
+}
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index c63b2948d4938b0d..22fec4de77395e60 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -18,6 +18,7 @@
<https://www.gnu.org/licenses/>. */
libmvec_hidden_proto (V_NAME_F1(acos));
+libmvec_hidden_proto (V_NAME_F1(acosh));
libmvec_hidden_proto (V_NAME_F1(asin));
libmvec_hidden_proto (V_NAME_F1(atan));
libmvec_hidden_proto (V_NAME_F1(cos));
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index 8ca55098706a54c2..841330956c102ff1 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -33,6 +33,10 @@
# define __DECL_SIMD_acos __DECL_SIMD_aarch64
# undef __DECL_SIMD_acosf
# define __DECL_SIMD_acosf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_acosh
+# define __DECL_SIMD_acosh __DECL_SIMD_aarch64
+# undef __DECL_SIMD_acoshf
+# define __DECL_SIMD_acoshf __DECL_SIMD_aarch64
# undef __DECL_SIMD_asin
# define __DECL_SIMD_asin __DECL_SIMD_aarch64
# undef __DECL_SIMD_asinf
@@ -125,6 +129,7 @@ typedef __SVBool_t __sv_bool_t;
__vpcs __f32x4_t _ZGVnN4vv_atan2f (__f32x4_t, __f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_acosf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
@@ -143,6 +148,7 @@ __vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t);
__vpcs __f64x2_t _ZGVnN2vv_atan2 (__f64x2_t, __f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_acos (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
@@ -166,6 +172,7 @@ __vpcs __f64x2_t _ZGVnN2v_tan (__f64x2_t);
__sv_f32_t _ZGVsMxvv_atan2f (__sv_f32_t, __sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_acosf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_acoshf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_asinf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_atanf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
@@ -184,6 +191,7 @@ __sv_f32_t _ZGVsMxv_tanf (__sv_f32_t, __sv_bool_t);
__sv_f64_t _ZGVsMxvv_atan2 (__sv_f64_t, __sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_acos (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_acosh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_asin (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_atan (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
diff --git a/sysdeps/aarch64/fpu/sv_log1p_inline.h b/sysdeps/aarch64/fpu/sv_log1p_inline.h
new file mode 100644
index 0000000000000000..da019674f94dbac7
--- /dev/null
+++ b/sysdeps/aarch64/fpu/sv_log1p_inline.h
@@ -0,0 +1,109 @@
+/* Helper for double-precision SVE routines which depend on log1p
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef AARCH64_FPU_SV_LOG1P_INLINE_H
+#define AARCH64_FPU_SV_LOG1P_INLINE_H
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+
+static const struct sv_log1p_data
+{
+ double poly[19], ln2[2];
+ uint64_t hf_rt2_top;
+ uint64_t one_m_hf_rt2_top;
+ uint32_t bottom_mask;
+ int64_t one_top;
+} sv_log1p_data = {
+ /* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1].
+ */
+ .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
+ 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
+ -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
+ 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
+ -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
+ 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
+ -0x1.cfa7385bdb37ep-6 },
+ .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 },
+ .hf_rt2_top = 0x3fe6a09e00000000,
+ .one_m_hf_rt2_top = 0x00095f6200000000,
+ .bottom_mask = 0xffffffff,
+ .one_top = 0x3ff
+};
+
+static inline svfloat64_t
+sv_log1p_inline (svfloat64_t x, const svbool_t pg)
+{
+ /* Helper for calculating log(x + 1). Adapted from v_log1p_inline.h, which
+ differs from v_log1p_2u5.c by:
+ - No special-case handling - this should be dealt with by the caller.
+ - Pairwise Horner polynomial evaluation for improved accuracy.
+ - Optionally simulate the shortcut for k=0, used in the scalar routine,
+ using svsel, for improved accuracy when the argument to log1p is close
+ to 0. This feature is enabled by defining WANT_SV_LOG1P_K0_SHORTCUT as 1
+ in the source of the caller before including this file.
+ See sv_log1p_2u1.c for details of the algorithm. */
+ const struct sv_log1p_data *d = ptr_barrier (&sv_log1p_data);
+ svfloat64_t m = svadd_x (pg, x, 1);
+ svuint64_t mi = svreinterpret_u64 (m);
+ svuint64_t u = svadd_x (pg, mi, d->one_m_hf_rt2_top);
+
+ svint64_t ki
+ = svsub_x (pg, svreinterpret_s64 (svlsr_x (pg, u, 52)), d->one_top);
+ svfloat64_t k = svcvt_f64_x (pg, ki);
+
+ /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
+ svuint64_t utop
+ = svadd_x (pg, svand_x (pg, u, 0x000fffff00000000), d->hf_rt2_top);
+ svuint64_t u_red = svorr_x (pg, utop, svand_x (pg, mi, d->bottom_mask));
+ svfloat64_t f = svsub_x (pg, svreinterpret_f64 (u_red), 1);
+
+ /* Correction term c/m. */
+ svfloat64_t c = svsub_x (pg, x, svsub_x (pg, m, 1));
+ svfloat64_t cm;
+
+#ifndef WANT_SV_LOG1P_K0_SHORTCUT
+#error \
+ "Cannot use sv_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
+#elif WANT_SV_LOG1P_K0_SHORTCUT
+ /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
+ that the approximation is solely the polynomial. */
+ svbool_t knot0 = svcmpne (pg, k, 0);
+ cm = svdiv_z (knot0, c, m);
+ if (__glibc_likely (!svptest_any (pg, knot0)))
+ {
+ f = svsel (knot0, f, x);
+ }
+#else
+ /* No shortcut. */
+ cm = svdiv_x (pg, c, m);
+#endif
+
+ /* Approximate log1p(f) on the reduced input using a polynomial. */
+ svfloat64_t f2 = svmul_x (pg, f, f);
+ svfloat64_t p = sv_pw_horner_18_f64_x (pg, f, f2, d->poly);
+
+ /* Assemble log1p(x) = k * log2 + log1p(f) + c/m. */
+ svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2[0]);
+ svfloat64_t yhi = svmla_x (pg, f, k, d->ln2[1]);
+
+ return svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p);
+}
+
+#endif
diff --git a/sysdeps/aarch64/fpu/sv_log1pf_inline.h b/sysdeps/aarch64/fpu/sv_log1pf_inline.h
new file mode 100644
index 0000000000000000..b94b2da055a6c59b
--- /dev/null
+++ b/sysdeps/aarch64/fpu/sv_log1pf_inline.h
@@ -0,0 +1,76 @@
+/* Helper for single-precision SVE routines which depend on log1p
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef AARCH64_FPU_SV_LOG1PF_INLINE_H
+#define AARCH64_FPU_SV_LOG1PF_INLINE_H
+
+#include "sv_math.h"
+#include "vecmath_config.h"
+#include "poly_sve_f32.h"
+
+static const struct sv_log1pf_data
+{
+ float32_t poly[9];
+ float32_t ln2;
+ float32_t scale_back;
+} sv_log1pf_data = {
+ /* Polynomial generated using FPMinimax in [-0.25, 0.5]. */
+ .poly = { -0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
+ -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
+ -0x1.6f0d5ep-5f },
+ .scale_back = 0x1.0p-23f,
+ .ln2 = 0x1.62e43p-1f,
+};
+
+static inline svfloat32_t
+eval_poly (svfloat32_t m, const float32_t *c, svbool_t pg)
+{
+ svfloat32_t p_12 = svmla_x (pg, sv_f32 (c[0]), m, sv_f32 (c[1]));
+ svfloat32_t m2 = svmul_x (pg, m, m);
+ svfloat32_t q = svmla_x (pg, m, m2, p_12);
+ svfloat32_t p = sv_pw_horner_6_f32_x (pg, m, m2, c + 2);
+ p = svmul_x (pg, m2, p);
+
+ return svmla_x (pg, q, m2, p);
+}
+
+static inline svfloat32_t
+sv_log1pf_inline (svfloat32_t x, svbool_t pg)
+{
+ const struct sv_log1pf_data *d = ptr_barrier (&sv_log1pf_data);
+
+ svfloat32_t m = svadd_x (pg, x, 1.0f);
+
+ svint32_t ks = svsub_x (pg, svreinterpret_s32 (m),
+ svreinterpret_s32 (svdup_f32 (0.75f)));
+ ks = svand_x (pg, ks, 0xff800000);
+ svuint32_t k = svreinterpret_u32 (ks);
+ svfloat32_t s = svreinterpret_f32 (
+ svsub_x (pg, svreinterpret_u32 (svdup_f32 (4.0f)), k));
+
+ svfloat32_t m_scale
+ = svreinterpret_f32 (svsub_x (pg, svreinterpret_u32 (x), k));
+ m_scale
+ = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1.0f), sv_f32 (0.25f), s));
+ svfloat32_t p = eval_poly (m_scale, d->poly, pg);
+ svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->scale_back);
+ return svmla_x (pg, p, scale_back, d->ln2);
+}
+
+#endif
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index b37cb7d5e9c0d96a..f4ce1d70096888aa 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -24,6 +24,7 @@
#define VEC_TYPE float64x2_t
VPCS_VECTOR_WRAPPER (acos_advsimd, _ZGVnN2v_acos)
+VPCS_VECTOR_WRAPPER (acosh_advsimd, _ZGVnN2v_acosh)
VPCS_VECTOR_WRAPPER (asin_advsimd, _ZGVnN2v_asin)
VPCS_VECTOR_WRAPPER (atan_advsimd, _ZGVnN2v_atan)
VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index 011f07d2c15b148f..0e973cc9d7ade813 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -43,6 +43,7 @@
}
SVE_VECTOR_WRAPPER (acos_sve, _ZGVsMxv_acos)
+SVE_VECTOR_WRAPPER (acosh_sve, _ZGVsMxv_acosh)
SVE_VECTOR_WRAPPER (asin_sve, _ZGVsMxv_asin)
SVE_VECTOR_WRAPPER (atan_sve, _ZGVsMxv_atan)
SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index 35452991431e238a..0ce026b5ea96a064 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -24,6 +24,7 @@
#define VEC_TYPE float32x4_t
VPCS_VECTOR_WRAPPER (acosf_advsimd, _ZGVnN4v_acosf)
+VPCS_VECTOR_WRAPPER (acoshf_advsimd, _ZGVnN4v_acoshf)
VPCS_VECTOR_WRAPPER (asinf_advsimd, _ZGVnN4v_asinf)
VPCS_VECTOR_WRAPPER (atanf_advsimd, _ZGVnN4v_atanf)
VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index bbc74ede88c9e6c8..398b7373e800cd5b 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -43,6 +43,7 @@
}
SVE_VECTOR_WRAPPER (acosf_sve, _ZGVsMxv_acosf)
+SVE_VECTOR_WRAPPER (acoshf_sve, _ZGVsMxv_acoshf)
SVE_VECTOR_WRAPPER (asinf_sve, _ZGVsMxv_asinf)
SVE_VECTOR_WRAPPER (atanf_sve, _ZGVsMxv_atanf)
SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f)
diff --git a/sysdeps/aarch64/fpu/v_log1p_inline.h b/sysdeps/aarch64/fpu/v_log1p_inline.h
new file mode 100644
index 0000000000000000..242e43b6eecc0b6e
--- /dev/null
+++ b/sysdeps/aarch64/fpu/v_log1p_inline.h
@@ -0,0 +1,103 @@
+/* Helper for double-precision Advanced SIMD routines which depend on log1p
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef AARCH64_FPU_V_LOG1P_INLINE_H
+#define AARCH64_FPU_V_LOG1P_INLINE_H
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+
+struct v_log1p_data
+{
+ float64x2_t poly[19], ln2[2];
+ uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask;
+ int64x2_t one_top;
+};
+
+/* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1]. */
+#define V_LOG1P_CONSTANTS_TABLE \
+ { \
+ .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2), \
+ V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3), \
+ V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3), \
+ V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4), \
+ V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4), \
+ V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4), \
+ V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4), \
+ V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5), \
+ V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4), \
+ V2 (-0x1.cfa7385bdb37ep-6) }, \
+ .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) }, \
+ .hf_rt2_top = V2 (0x3fe6a09e00000000), \
+ .one_m_hf_rt2_top = V2 (0x00095f6200000000), \
+ .umask = V2 (0x000fffff00000000), .one_top = V2 (0x3ff) \
+ }
+
+#define BottomMask v_u64 (0xffffffff)
+
+static inline float64x2_t
+log1p_inline (float64x2_t x, const struct v_log1p_data *d)
+{
+ /* Helper for calculating log(x + 1). Copied from v_log1p_2u5.c, with several
+ modifications:
+ - No special-case handling - this should be dealt with by the caller.
+ - Pairwise Horner polynomial evaluation for improved accuracy.
+ - Optionally simulate the shortcut for k=0, used in the scalar routine,
+ using v_sel, for improved accuracy when the argument to log1p is close to
+ 0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1 in
+ the source of the caller before including this file.
+ See v_log1pf_2u1.c for details of the algorithm. */
+ float64x2_t m = vaddq_f64 (x, v_f64 (1));
+ uint64x2_t mi = vreinterpretq_u64_f64 (m);
+ uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
+
+ int64x2_t ki
+ = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top);
+ float64x2_t k = vcvtq_f64_s64 (ki);
+
+ /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
+ uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
+ uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask));
+ float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1));
+
+ /* Correction term c/m. */
+ float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m);
+
+#ifndef WANT_V_LOG1P_K0_SHORTCUT
+#error \
+ "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
+#elif WANT_V_LOG1P_K0_SHORTCUT
+ /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
+ that the approximation is solely the polynomial. */
+ uint64x2_t k0 = vceqzq_f64 (k);
+ cm = v_zerofy_f64 (cm, k0);
+ f = vbslq_f64 (k0, x, f);
+#endif
+
+ /* Approximate log1p(f) on the reduced input using a polynomial. */
+ float64x2_t f2 = vmulq_f64 (f, f);
+ float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly);
+
+ /* Assemble log1p(x) = k * log2 + log1p(f) + c/m. */
+ float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]);
+ float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]);
+ return vfmaq_f64 (vaddq_f64 (ylo, yhi), f2, p);
+}
+
+#endif
diff --git a/sysdeps/aarch64/fpu/v_log1pf_inline.h b/sysdeps/aarch64/fpu/v_log1pf_inline.h
new file mode 100644
index 0000000000000000..643a6cdcfc498970
--- /dev/null
+++ b/sysdeps/aarch64/fpu/v_log1pf_inline.h
@@ -0,0 +1,78 @@
+/* Helper for single-precision Advanced SIMD routines which depend on log1p
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef AARCH64_FPU_V_LOG1PF_INLINE_H
+#define AARCH64_FPU_V_LOG1PF_INLINE_H
+
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+
+struct v_log1pf_data
+{
+ float32x4_t poly[8], ln2;
+ uint32x4_t four;
+ int32x4_t three_quarters;
+};
+
+/* Polynomial generated using FPMinimax in [-0.25, 0.5]. First two coefficients
+ (1, -0.5) are not stored as they can be generated more efficiently. */
+#define V_LOG1PF_CONSTANTS_TABLE \
+ { \
+ .poly \
+ = { V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f), \
+ V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f), \
+ V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) }, \
+ .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000), \
+ .three_quarters = V4 (0x3f400000) \
+ }
+
+static inline float32x4_t
+eval_poly (float32x4_t m, const float32x4_t *c)
+{
+ /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner (main routine
+ uses split Estrin, but this way reduces register pressure in the calling
+ routine). */
+ float32x4_t q = vfmaq_f32 (v_f32 (-0.5), m, c[0]);
+ float32x4_t m2 = vmulq_f32 (m, m);
+ q = vfmaq_f32 (m, m2, q);
+ float32x4_t p = v_pw_horner_6_f32 (m, m2, c + 1);
+ p = vmulq_f32 (m2, p);
+ return vfmaq_f32 (q, m2, p);
+}
+
+static inline float32x4_t
+log1pf_inline (float32x4_t x, const struct v_log1pf_data d)
+{
+ /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no
+ special-case handling. See that file for details of the algorithm. */
+ float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
+ int32x4_t k
+ = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d.three_quarters),
+ v_s32 (0xff800000));
+ uint32x4_t ku = vreinterpretq_u32_s32 (k);
+ float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d.four, ku));
+ float32x4_t m_scale
+ = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
+ m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
+ float32x4_t p = eval_poly (m_scale, d.poly);
+ float32x4_t scale_back = vmulq_f32 (vcvtq_f32_s32 (k), v_f32 (0x1.0p-23f));
+ return vfmaq_f32 (p, scale_back, d.ln2);
+}
+
+#endif
diff --git a/sysdeps/aarch64/fpu/v_math.h b/sysdeps/aarch64/fpu/v_math.h
index d4d78bc4027abebb..12824fce8c698cf4 100644
--- a/sysdeps/aarch64/fpu/v_math.h
+++ b/sysdeps/aarch64/fpu/v_math.h
@@ -108,6 +108,11 @@ v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
p[2] ? f (x1[2], x2[2]) : y[2],
p[3] ? f (x1[3], x2[3]) : y[3] };
}
+static inline float32x4_t
+v_zerofy_f32 (float32x4_t x, uint32x4_t mask)
+{
+ return vreinterpretq_f32_u32 (vbicq_u32 (vreinterpretq_u32_f32 (x), mask));
+}
static inline float64x2_t
v_f64 (double x)
@@ -167,5 +172,10 @@ v_call2_f64 (double (*f) (double, double), float64x2_t x1, float64x2_t x2,
return (float64x2_t){ p[0] ? f (x1[0], x2[0]) : y[0],
p[1] ? f (x1[1], x2[1]) : y[1] };
}
+static inline float64x2_t
+v_zerofy_f64 (float64x2_t x, uint64x2_t mask)
+{
+ return vreinterpretq_f64_u64 (vbicq_u64 (vreinterpretq_u64_f64 (x), mask));
+}
#endif
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 48d747ad5793be96..1646cdbdd22d93d9 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -34,11 +34,19 @@ double: 2
float: 2
ldouble: 4
+Function: "acosh_advsimd":
+double: 2
+float: 2
+
Function: "acosh_downward":
double: 2
float: 2
ldouble: 3
+Function: "acosh_sve":
+double: 2
+float: 2
+
Function: "acosh_towardzero":
double: 2
float: 2
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index f66da42c3630bf48..f5aaa519f2c8663e 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -73,12 +73,17 @@ GLIBC_2.39 _ZGVsMxv_tan F
GLIBC_2.39 _ZGVsMxv_tanf F
GLIBC_2.39 _ZGVsMxvv_atan2 F
GLIBC_2.39 _ZGVsMxvv_atan2f F
+GLIBC_2.40 _ZGVnN2v_acosh F
+GLIBC_2.40 _ZGVnN2v_acoshf F
GLIBC_2.40 _ZGVnN2v_cosh F
GLIBC_2.40 _ZGVnN2v_coshf F
GLIBC_2.40 _ZGVnN2v_erf F
GLIBC_2.40 _ZGVnN2v_erff F
+GLIBC_2.40 _ZGVnN4v_acoshf F
GLIBC_2.40 _ZGVnN4v_coshf F
GLIBC_2.40 _ZGVnN4v_erff F
+GLIBC_2.40 _ZGVsMxv_acosh F
+GLIBC_2.40 _ZGVsMxv_acoshf F
GLIBC_2.40 _ZGVsMxv_cosh F
GLIBC_2.40 _ZGVsMxv_coshf F
GLIBC_2.40 _ZGVsMxv_erf F

303
glibc-RHEL-118273-30.patch Normal file
View File

@ -0,0 +1,303 @@
commit c0ff447edf19bd4630fe79adf5e8b896405b059f
Author: Luna Lamb <luna.lamb@arm.com>
Date: Thu Feb 13 17:54:46 2025 +0000
Aarch64: Improve codegen in SVE exp and users, and update expf_inline
Use unpredicted muls, and improve memory access.
7%, 3% and 1% improvement in throughput microbenchmark on Neoverse V1,
for exp, exp2 and cosh respectively.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/cosh_sve.c b/sysdeps/aarch64/fpu/cosh_sve.c
index 919f34604a452b4a..e375dd8a3407feb2 100644
--- a/sysdeps/aarch64/fpu/cosh_sve.c
+++ b/sysdeps/aarch64/fpu/cosh_sve.c
@@ -23,7 +23,7 @@ static const struct data
{
float64_t poly[3];
float64_t inv_ln2, ln2_hi, ln2_lo, shift, thres;
- uint64_t index_mask, special_bound;
+ uint64_t special_bound;
} data = {
.poly = { 0x1.fffffffffffd4p-2, 0x1.5555571d6b68cp-3,
0x1.5555576a59599p-5, },
@@ -35,14 +35,16 @@ static const struct data
.shift = 0x1.8p+52,
.thres = 704.0,
- .index_mask = 0xff,
/* 0x1.6p9, above which exp overflows. */
.special_bound = 0x4086000000000000,
};
static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+special_case (svfloat64_t x, svbool_t pg, svfloat64_t t, svbool_t special)
{
+ svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5);
+ svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
+ svfloat64_t y = svadd_x (pg, half_t, half_over_t);
return sv_call_f64 (cosh, x, y, special);
}
@@ -60,12 +62,12 @@ exp_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
svuint64_t u = svreinterpret_u64 (z);
svuint64_t e = svlsl_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS);
- svuint64_t i = svand_x (pg, u, d->index_mask);
+ svuint64_t i = svand_x (svptrue_b64 (), u, 0xff);
svfloat64_t y = svmla_x (pg, sv_f64 (d->poly[1]), r, d->poly[2]);
y = svmla_x (pg, sv_f64 (d->poly[0]), r, y);
y = svmla_x (pg, sv_f64 (1.0), r, y);
- y = svmul_x (pg, r, y);
+ y = svmul_x (svptrue_b64 (), r, y);
/* s = 2^(n/N). */
u = svld1_gather_index (pg, __v_exp_tail_data, i);
@@ -94,12 +96,12 @@ svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
/* Up to the point that exp overflows, we can use it to calculate cosh by
exp(|x|) / 2 + 1 / (2 * exp(|x|)). */
svfloat64_t t = exp_inline (ax, pg, d);
- svfloat64_t half_t = svmul_x (pg, t, 0.5);
- svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
/* Fall back to scalar for any special cases. */
if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, svadd_x (pg, half_t, half_over_t), special);
+ return special_case (x, pg, t, special);
+ svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5);
+ svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
return svadd_x (pg, half_t, half_over_t);
}
diff --git a/sysdeps/aarch64/fpu/exp10_sve.c b/sysdeps/aarch64/fpu/exp10_sve.c
index ddf64708cb1773cd..bfd3fb9e1948a3b8 100644
--- a/sysdeps/aarch64/fpu/exp10_sve.c
+++ b/sysdeps/aarch64/fpu/exp10_sve.c
@@ -18,21 +18,23 @@
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
-#include "poly_sve_f64.h"
#define SpecialBound 307.0 /* floor (log10 (2^1023)). */
static const struct data
{
- double poly[5];
+ double c1, c3, c2, c4, c0;
double shift, log10_2, log2_10_hi, log2_10_lo, scale_thres, special_bound;
} data = {
/* Coefficients generated using Remez algorithm.
rel error: 0x1.9fcb9b3p-60
abs error: 0x1.a20d9598p-60 in [ -log10(2)/128, log10(2)/128 ]
max ulp err 0.52 +0.5. */
- .poly = { 0x1.26bb1bbb55516p1, 0x1.53524c73cd32ap1, 0x1.0470591daeafbp1,
- 0x1.2bd77b1361ef6p0, 0x1.142b5d54e9621p-1 },
+ .c0 = 0x1.26bb1bbb55516p1,
+ .c1 = 0x1.53524c73cd32ap1,
+ .c2 = 0x1.0470591daeafbp1,
+ .c3 = 0x1.2bd77b1361ef6p0,
+ .c4 = 0x1.142b5d54e9621p-1,
/* 1.5*2^46+1023. This value is further explained below. */
.shift = 0x1.800000000ffc0p+46,
.log10_2 = 0x1.a934f0979a371p1, /* 1/log2(10). */
@@ -70,9 +72,9 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n,
/* |n| > 1280 => 2^(n) overflows. */
svbool_t p_cmp = svacgt (pg, n, d->scale_thres);
- svfloat64_t r1 = svmul_x (pg, s1, s1);
+ svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
svfloat64_t r2 = svmla_x (pg, s2, s2, y);
- svfloat64_t r0 = svmul_x (pg, r2, s1);
+ svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
return svsel (p_cmp, r1, r0);
}
@@ -103,11 +105,14 @@ svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg)
comes at significant performance cost. */
svuint64_t u = svreinterpret_u64 (z);
svfloat64_t scale = svexpa (u);
-
+ svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
/* Approximate exp10(r) using polynomial. */
- svfloat64_t r2 = svmul_x (pg, r, r);
- svfloat64_t y = svmla_x (pg, svmul_x (pg, r, d->poly[0]), r2,
- sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly + 1));
+ svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+ svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+ svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+ svfloat64_t p14 = svmla_x (pg, p12, p34, r2);
+
+ svfloat64_t y = svmla_x (pg, svmul_x (svptrue_b64 (), r, d->c0), r2, p14);
/* Assemble result as exp10(x) = 2^n * exp10(r). If |x| > SpecialBound
multiplication may overflow, so use special case routine. */
diff --git a/sysdeps/aarch64/fpu/exp2_sve.c b/sysdeps/aarch64/fpu/exp2_sve.c
index 22848ebfa5ac21d8..5dfb77cdbc2f6a51 100644
--- a/sysdeps/aarch64/fpu/exp2_sve.c
+++ b/sysdeps/aarch64/fpu/exp2_sve.c
@@ -18,7 +18,6 @@
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
-#include "poly_sve_f64.h"
#define N (1 << V_EXP_TABLE_BITS)
@@ -27,15 +26,15 @@
static const struct data
{
- double poly[4];
+ double c0, c2;
+ double c1, c3;
double shift, big_bound, uoflow_bound;
} data = {
/* Coefficients are computed using Remez algorithm with
minimisation of the absolute error. */
- .poly = { 0x1.62e42fefa3686p-1, 0x1.ebfbdff82c241p-3, 0x1.c6b09b16de99ap-5,
- 0x1.3b2abf5571ad8p-7 },
- .shift = 0x1.8p52 / N,
- .uoflow_bound = UOFlowBound,
+ .c0 = 0x1.62e42fefa3686p-1, .c1 = 0x1.ebfbdff82c241p-3,
+ .c2 = 0x1.c6b09b16de99ap-5, .c3 = 0x1.3b2abf5571ad8p-7,
+ .shift = 0x1.8p52 / N, .uoflow_bound = UOFlowBound,
.big_bound = BigBound,
};
@@ -67,9 +66,9 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n,
/* |n| > 1280 => 2^(n) overflows. */
svbool_t p_cmp = svacgt (pg, n, d->uoflow_bound);
- svfloat64_t r1 = svmul_x (pg, s1, s1);
+ svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
svfloat64_t r2 = svmla_x (pg, s2, s2, y);
- svfloat64_t r0 = svmul_x (pg, r2, s1);
+ svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
return svsel (p_cmp, r1, r0);
}
@@ -99,11 +98,14 @@ svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg)
svuint64_t top = svlsl_x (pg, ki, 52 - V_EXP_TABLE_BITS);
svfloat64_t scale = svreinterpret_f64 (svadd_x (pg, sbits, top));
+ svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
/* Approximate exp2(r) using polynomial. */
- svfloat64_t r2 = svmul_x (pg, r, r);
- svfloat64_t p = sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly);
- svfloat64_t y = svmul_x (pg, r, p);
-
+ /* y = exp2(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4. */
+ svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+ svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0);
+ svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1);
+ svfloat64_t p = svmla_x (pg, p01, p23, r2);
+ svfloat64_t y = svmul_x (svptrue_b64 (), r, p);
/* Assemble exp2(x) = exp2(r) * scale. */
if (__glibc_unlikely (svptest_any (pg, special)))
return special_case (pg, scale, y, kd, d);
diff --git a/sysdeps/aarch64/fpu/exp_sve.c b/sysdeps/aarch64/fpu/exp_sve.c
index aabaaa1d61dbab27..b2421d493f2e119f 100644
--- a/sysdeps/aarch64/fpu/exp_sve.c
+++ b/sysdeps/aarch64/fpu/exp_sve.c
@@ -21,12 +21,15 @@
static const struct data
{
- double poly[4];
+ double c0, c2;
+ double c1, c3;
double ln2_hi, ln2_lo, inv_ln2, shift, thres;
+
} data = {
- .poly = { /* ulp error: 0.53. */
- 0x1.fffffffffdbcdp-2, 0x1.555555555444cp-3, 0x1.555573c6a9f7dp-5,
- 0x1.1111266d28935p-7 },
+ .c0 = 0x1.fffffffffdbcdp-2,
+ .c1 = 0x1.555555555444cp-3,
+ .c2 = 0x1.555573c6a9f7dp-5,
+ .c3 = 0x1.1111266d28935p-7,
.ln2_hi = 0x1.62e42fefa3800p-1,
.ln2_lo = 0x1.ef35793c76730p-45,
/* 1/ln2. */
@@ -36,7 +39,6 @@ static const struct data
.thres = 704.0,
};
-#define C(i) sv_f64 (d->poly[i])
#define SpecialOffset 0x6000000000000000 /* 0x1p513. */
/* SpecialBias1 + SpecialBias1 = asuint(1.0). */
#define SpecialBias1 0x7000000000000000 /* 0x1p769. */
@@ -56,20 +58,20 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n)
svuint64_t b
= svdup_u64_z (p_sign, SpecialOffset); /* Inactive lanes set to 0. */
- /* Set s1 to generate overflow depending on sign of exponent n. */
- svfloat64_t s1 = svreinterpret_f64 (
- svsubr_x (pg, b, SpecialBias1)); /* 0x70...0 - b. */
- /* Offset s to avoid overflow in final result if n is below threshold. */
+ /* Set s1 to generate overflow depending on sign of exponent n,
+ ie. s1 = 0x70...0 - b. */
+ svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1));
+ /* Offset s to avoid overflow in final result if n is below threshold.
+ ie. s2 = as_u64 (s) - 0x3010...0 + b. */
svfloat64_t s2 = svreinterpret_f64 (
- svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2),
- b)); /* as_u64 (s) - 0x3010...0 + b. */
+ svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
/* |n| > 1280 => 2^(n) overflows. */
svbool_t p_cmp = svacgt (pg, n, 1280.0);
- svfloat64_t r1 = svmul_x (pg, s1, s1);
+ svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
svfloat64_t r2 = svmla_x (pg, s2, s2, y);
- svfloat64_t r0 = svmul_x (pg, r2, s1);
+ svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
return svsel (p_cmp, r1, r0);
}
@@ -103,16 +105,16 @@ svfloat64_t SV_NAME_D1 (exp) (svfloat64_t x, const svbool_t pg)
svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
svuint64_t u = svreinterpret_u64 (z);
svfloat64_t n = svsub_x (pg, z, d->shift);
-
+ svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
/* r = x - n * ln2, r is in [-ln2/(2N), ln2/(2N)]. */
svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
svfloat64_t r = svmls_lane (x, n, ln2, 0);
r = svmls_lane (r, n, ln2, 1);
/* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5. */
- svfloat64_t r2 = svmul_x (pg, r, r);
- svfloat64_t p01 = svmla_x (pg, C (0), C (1), r);
- svfloat64_t p23 = svmla_x (pg, C (2), C (3), r);
+ svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+ svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0);
+ svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1);
svfloat64_t p04 = svmla_x (pg, p01, p23, r2);
svfloat64_t y = svmla_x (pg, r, p04, r2);
diff --git a/sysdeps/aarch64/fpu/sv_expf_inline.h b/sysdeps/aarch64/fpu/sv_expf_inline.h
index 6166df65533555a6..75781fb4ddcb9790 100644
--- a/sysdeps/aarch64/fpu/sv_expf_inline.h
+++ b/sysdeps/aarch64/fpu/sv_expf_inline.h
@@ -61,7 +61,7 @@ expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
/* scale = 2^(n/N). */
svfloat32_t scale = svexpa (svreinterpret_u32 (z));
- /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */
+ /* poly(r) = exp(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4 + C4 r^5. */
svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
@@ -71,5 +71,4 @@ expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
return svmla_x (pg, scale, scale, poly);
}
-
#endif

194
glibc-RHEL-118273-31.patch Normal file
View File

@ -0,0 +1,194 @@
commit 8f0e7fe61e0a2ad5ed777933703ce09053810ec4
Author: Luna Lamb <luna.lamb@arm.com>
Date: Thu Feb 13 17:52:09 2025 +0000
Aarch64: Improve codegen in SVE asinh
Use unpredicated muls, use lanewise mla's and improve memory access.
1% regression in throughput microbenchmark on Neoverse V1.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/asinh_sve.c b/sysdeps/aarch64/fpu/asinh_sve.c
index 28dc5c458750bac4..fe8715e06c92ac51 100644
--- a/sysdeps/aarch64/fpu/asinh_sve.c
+++ b/sysdeps/aarch64/fpu/asinh_sve.c
@@ -18,36 +18,49 @@
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
-#include "poly_sve_f64.h"
#define SignMask (0x8000000000000000)
#define One (0x3ff0000000000000)
#define Thres (0x5fe0000000000000) /* asuint64 (0x1p511). */
+#define IndexMask (((1 << V_LOG_TABLE_BITS) - 1) << 1)
static const struct data
{
- double poly[18];
- double ln2, p3, p1, p4, p0, p2;
- uint64_t n;
- uint64_t off;
+ double even_coeffs[9];
+ double ln2, p3, p1, p4, p0, p2, c1, c3, c5, c7, c9, c11, c13, c15, c17;
+ uint64_t off, mask;
} data = {
- /* Polynomial generated using Remez on [2^-26, 1]. */
- .poly
- = { -0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5,
- 0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6,
- -0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7,
- 0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8,
- -0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11,
- 0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14, 0x1.93d4ba83d34dap-18 },
+ /* Polynomial generated using Remez on [2^-26, 1]. */
+ .even_coeffs ={
+ -0x1.55555555554a7p-3,
+ -0x1.6db6db68332e6p-5,
+ -0x1.6e8b8b654a621p-6,
+ -0x1.c9871d10885afp-7,
+ -0x1.3ddca533e9f54p-7,
+ -0x1.b90c7099dd397p-8,
+ -0x1.d217026a669ecp-9,
+ -0x1.e0f37daef9127p-11,
+ -0x1.021a48685e287p-14, },
+
+ .c1 = 0x1.3333333326c7p-4,
+ .c3 = 0x1.f1c71b26fb40dp-6,
+ .c5 = 0x1.1c4daa9e67871p-6,
+ .c7 = 0x1.7a16e8d9d2ecfp-7,
+ .c9 = 0x1.0becef748dafcp-7,
+ .c11 = 0x1.541f2bb1ffe51p-8,
+ .c13 = 0x1.0b5c7977aaf7p-9,
+ .c15 = 0x1.388b5fe542a6p-12,
+ .c17 = 0x1.93d4ba83d34dap-18,
+
.ln2 = 0x1.62e42fefa39efp-1,
.p0 = -0x1.ffffffffffff7p-2,
.p1 = 0x1.55555555170d4p-2,
.p2 = -0x1.0000000399c27p-2,
.p3 = 0x1.999b2e90e94cap-3,
.p4 = -0x1.554e550bd501ep-3,
- .n = 1 << V_LOG_TABLE_BITS,
- .off = 0x3fe6900900000000
+ .off = 0x3fe6900900000000,
+ .mask = 0xfffULL << 52,
};
static svfloat64_t NOINLINE
@@ -64,11 +77,10 @@ __sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg)
of the algorithm used. */
svuint64_t ix = svreinterpret_u64 (x);
- svuint64_t tmp = svsub_x (pg, ix, d->off);
- svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)),
- (d->n - 1) << 1);
- svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52);
- svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
+ svuint64_t i_off = svsub_x (pg, ix, d->off);
+ svuint64_t i
+ = svand_x (pg, svlsr_x (pg, i_off, (51 - V_LOG_TABLE_BITS)), IndexMask);
+ svuint64_t iz = svsub_x (pg, ix, svand_x (pg, i_off, d->mask));
svfloat64_t z = svreinterpret_f64 (iz);
svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
@@ -78,14 +90,14 @@ __sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg)
svfloat64_t p1_p4 = svld1rq (svptrue_b64 (), &d->p1);
svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), invc, z);
- svfloat64_t kd = svcvt_f64_x (pg, k);
+ svfloat64_t kd
+ = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (i_off), 52));
svfloat64_t hi = svmla_lane (svadd_x (pg, logc, r), kd, ln2_p3, 0);
- svfloat64_t r2 = svmul_x (pg, r, r);
-
+ svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
svfloat64_t y = svmla_lane (sv_f64 (d->p2), r, ln2_p3, 1);
-
svfloat64_t p = svmla_lane (sv_f64 (d->p0), r, p1_p4, 0);
+
y = svmla_lane (y, r2, p1_p4, 1);
y = svmla_x (pg, p, r2, y);
y = svmla_x (pg, hi, r2, y);
@@ -111,7 +123,6 @@ svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
svuint64_t iax = svbic_x (pg, ix, SignMask);
svuint64_t sign = svand_x (pg, ix, SignMask);
svfloat64_t ax = svreinterpret_f64 (iax);
-
svbool_t ge1 = svcmpge (pg, iax, One);
svbool_t special = svcmpge (pg, iax, Thres);
@@ -120,7 +131,7 @@ svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
svfloat64_t option_1 = sv_f64 (0);
if (__glibc_likely (svptest_any (pg, ge1)))
{
- svfloat64_t x2 = svmul_x (pg, ax, ax);
+ svfloat64_t x2 = svmul_x (svptrue_b64 (), ax, ax);
option_1 = __sv_log_inline (
svadd_x (pg, ax, svsqrt_x (pg, svadd_x (pg, x2, 1))), d, pg);
}
@@ -130,21 +141,53 @@ svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
The largest observed error in this region is 1.51 ULPs:
_ZGVsMxv_asinh(0x1.fe12bf8c616a2p-1) got 0x1.c1e649ee2681bp-1
want 0x1.c1e649ee2681dp-1. */
+
svfloat64_t option_2 = sv_f64 (0);
if (__glibc_likely (svptest_any (pg, svnot_z (pg, ge1))))
{
- svfloat64_t x2 = svmul_x (pg, ax, ax);
- svfloat64_t x4 = svmul_x (pg, x2, x2);
- svfloat64_t p = sv_pw_horner_17_f64_x (pg, x2, x4, d->poly);
- option_2 = svmla_x (pg, ax, p, svmul_x (pg, x2, ax));
+ svfloat64_t x2 = svmul_x (svptrue_b64 (), ax, ax);
+ svfloat64_t x4 = svmul_x (svptrue_b64 (), x2, x2);
+ /* Order-17 Pairwise Horner scheme. */
+ svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
+ svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5);
+ svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9);
+ svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13);
+
+ svfloat64_t p01 = svmla_lane (sv_f64 (d->even_coeffs[0]), x2, c13, 0);
+ svfloat64_t p23 = svmla_lane (sv_f64 (d->even_coeffs[1]), x2, c13, 1);
+ svfloat64_t p45 = svmla_lane (sv_f64 (d->even_coeffs[2]), x2, c57, 0);
+ svfloat64_t p67 = svmla_lane (sv_f64 (d->even_coeffs[3]), x2, c57, 1);
+ svfloat64_t p89 = svmla_lane (sv_f64 (d->even_coeffs[4]), x2, c911, 0);
+ svfloat64_t p1011 = svmla_lane (sv_f64 (d->even_coeffs[5]), x2, c911, 1);
+ svfloat64_t p1213
+ = svmla_lane (sv_f64 (d->even_coeffs[6]), x2, c1315, 0);
+ svfloat64_t p1415
+ = svmla_lane (sv_f64 (d->even_coeffs[7]), x2, c1315, 1);
+ svfloat64_t p1617 = svmla_x (pg, sv_f64 (d->even_coeffs[8]), x2, d->c17);
+
+ svfloat64_t p = svmla_x (pg, p1415, x4, p1617);
+ p = svmla_x (pg, p1213, x4, p);
+ p = svmla_x (pg, p1011, x4, p);
+ p = svmla_x (pg, p89, x4, p);
+
+ p = svmla_x (pg, p67, x4, p);
+ p = svmla_x (pg, p45, x4, p);
+
+ p = svmla_x (pg, p23, x4, p);
+
+ p = svmla_x (pg, p01, x4, p);
+
+ option_2 = svmla_x (pg, ax, p, svmul_x (svptrue_b64 (), x2, ax));
}
- /* Choose the right option for each lane. */
- svfloat64_t y = svsel (ge1, option_1, option_2);
-
if (__glibc_unlikely (svptest_any (pg, special)))
return special_case (
- x, svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign)),
+ x,
+ svreinterpret_f64 (sveor_x (
+ pg, svreinterpret_u64 (svsel (ge1, option_1, option_2)), sign)),
special);
+
+ /* Choose the right option for each lane. */
+ svfloat64_t y = svsel (ge1, option_1, option_2);
return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
}

531
glibc-RHEL-118273-32.patch Normal file
View File

@ -0,0 +1,531 @@
commit ce2f26a22e6b6f5c108d156afd9b43a452bb024c
Author: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date: Tue Dec 31 18:07:36 2024 +0000
AArch64: Remove PTR_ARG/SIZE_ARG defines
This series removes various ILP32 defines that are now
no longer needed.
Remove PTR_ARG/SIZE_ARG.
Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
Conflicts:
sysdeps/aarch64/dl-start.S
(Fixup context to apply without out-of-scope dependency 01f52b11de)
sysdeps/aarch64/multiarch/memcpy_thunderx.S
(Dropped by upstream commit e162ab2)
sysdeps/aarch64/multiarch/memcpy_oryon1.S
(Skipped: file from 4dc83cac is out-of-scope)
sysdeps/aarch64/multiarch/memset_oryon1.S
(Skipped: file from 2f1f7a5f is out-of-scope)
diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S
index 7b6add751e6bd96b..452ba0da6d788ce8 100644
--- a/sysdeps/aarch64/__longjmp.S
+++ b/sysdeps/aarch64/__longjmp.S
@@ -47,8 +47,6 @@ ENTRY (__longjmp)
cfi_offset(d14, JB_D14<<3)
cfi_offset(d15, JB_D15<<3)
- PTR_ARG (0)
-
#if IS_IN(libc)
/* Disable ZA state of SME in libc.a and libc.so, but not in ld.so. */
# if HAVE_AARCH64_PAC_RET
diff --git a/sysdeps/aarch64/__mtag_tag_region.S b/sysdeps/aarch64/__mtag_tag_region.S
index 22e8d8b75372c8aa..90ac17ced4801f21 100644
--- a/sysdeps/aarch64/__mtag_tag_region.S
+++ b/sysdeps/aarch64/__mtag_tag_region.S
@@ -40,9 +40,6 @@
#define zva_val x4
ENTRY (__libc_mtag_tag_region)
- PTR_ARG (0)
- SIZE_ARG (1)
-
add dstend, dstin, count
cmp count, 96
diff --git a/sysdeps/aarch64/__mtag_tag_zero_region.S b/sysdeps/aarch64/__mtag_tag_zero_region.S
index 566698e9146e7da8..e975a2f8bdb85ae0 100644
--- a/sysdeps/aarch64/__mtag_tag_zero_region.S
+++ b/sysdeps/aarch64/__mtag_tag_zero_region.S
@@ -40,9 +40,6 @@
#define zva_val x4
ENTRY (__libc_mtag_tag_zero_region)
- PTR_ARG (0)
- SIZE_ARG (1)
-
add dstend, dstin, count
cmp count, 96
diff --git a/sysdeps/aarch64/dl-start.S b/sysdeps/aarch64/dl-start.S
index d645484e79858013..b7ac6c31432e07c9 100644
--- a/sysdeps/aarch64/dl-start.S
+++ b/sysdeps/aarch64/dl-start.S
@@ -26,7 +26,6 @@ ENTRY (_start)
mov x30, #0
mov x0, sp
- PTR_ARG (0)
bl _dl_start
/* Returns user entry point in x0. */
mov PTR_REG (21), PTR_REG (0)
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index 9b253b39dd1d9d46..0aeaf64edd2594f1 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -75,7 +75,6 @@
.align 2
_dl_tlsdesc_return:
BTI_C
- PTR_ARG (0)
ldr PTR_REG (0), [x0, #PTR_SIZE]
RET
cfi_endproc
@@ -99,7 +98,6 @@ _dl_tlsdesc_undefweak:
BTI_C
str x1, [sp, #-16]!
cfi_adjust_cfa_offset (16)
- PTR_ARG (0)
ldr PTR_REG (0), [x0, #PTR_SIZE]
mrs x1, tpidr_el0
sub PTR_REG (0), PTR_REG (0), PTR_REG (1)
@@ -145,7 +143,6 @@ _dl_tlsdesc_undefweak:
.align 2
_dl_tlsdesc_dynamic:
BTI_C
- PTR_ARG (0)
/* Save just enough registers to support fast path, if we fall
into slow path we will save additional registers. */
diff --git a/sysdeps/aarch64/memchr.S b/sysdeps/aarch64/memchr.S
index a9fa40519c78b7df..7173c7fafa7d6eb5 100644
--- a/sysdeps/aarch64/memchr.S
+++ b/sysdeps/aarch64/memchr.S
@@ -57,8 +57,6 @@
exactly which byte matched. */
ENTRY (MEMCHR)
- PTR_ARG (0)
- SIZE_ARG (2)
bic src, srcin, 15
cbz cntin, L(nomatch)
ld1 {vdata.16b}, [src]
diff --git a/sysdeps/aarch64/memcmp.S b/sysdeps/aarch64/memcmp.S
index 5afa79494bf9cb7f..68dfa604f4b1bd43 100644
--- a/sysdeps/aarch64/memcmp.S
+++ b/sysdeps/aarch64/memcmp.S
@@ -44,10 +44,6 @@
ENTRY (memcmp)
- PTR_ARG (0)
- PTR_ARG (1)
- SIZE_ARG (2)
-
cmp limit, 16
b.lo L(less16)
ldp data1, data3, [src1]
diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S
index f21c21d3f2a21d89..fba93faeba52447f 100644
--- a/sysdeps/aarch64/memcpy.S
+++ b/sysdeps/aarch64/memcpy.S
@@ -70,10 +70,6 @@
from the end. */
ENTRY (MEMCPY)
- PTR_ARG (0)
- PTR_ARG (1)
- SIZE_ARG (2)
-
add srcend, src, count
add dstend, dstin, count
cmp count, 128
@@ -187,10 +183,6 @@ libc_hidden_builtin_def (MEMCPY)
ENTRY (MEMMOVE)
- PTR_ARG (0)
- PTR_ARG (1)
- SIZE_ARG (2)
-
add srcend, src, count
add dstend, dstin, count
cmp count, 128
diff --git a/sysdeps/aarch64/memrchr.S b/sysdeps/aarch64/memrchr.S
index c5274f5ebf595268..1bd3e230ca197581 100644
--- a/sysdeps/aarch64/memrchr.S
+++ b/sysdeps/aarch64/memrchr.S
@@ -55,8 +55,6 @@
exactly which byte matched. */
ENTRY (__memrchr)
- PTR_ARG (0)
- SIZE_ARG (2)
add end, srcin, cntin
sub endm1, end, 1
bic src, endm1, 15
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
index 71814d0b2f6dd3a7..496ad332882a7e3d 100644
--- a/sysdeps/aarch64/memset.S
+++ b/sysdeps/aarch64/memset.S
@@ -40,9 +40,6 @@
#define dstend2 x5
ENTRY (MEMSET)
- PTR_ARG (0)
- SIZE_ARG (2)
-
dup v0.16B, valw
cmp count, 16
b.lo L(set_small)
diff --git a/sysdeps/aarch64/multiarch/memchr_nosimd.S b/sysdeps/aarch64/multiarch/memchr_nosimd.S
index 0a65139b0810e95b..b47059de1ee61f71 100644
--- a/sysdeps/aarch64/multiarch/memchr_nosimd.S
+++ b/sysdeps/aarch64/multiarch/memchr_nosimd.S
@@ -60,9 +60,6 @@
ENTRY (__memchr_nosimd)
- PTR_ARG (0)
- SIZE_ARG (2)
-
/* Do not dereference srcin if no bytes to compare. */
cbz cntin, L(none_chr)
diff --git a/sysdeps/aarch64/multiarch/memcpy_a64fx.S b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
index d826aafd80ed7b0b..fa693f7c3a5c28a3 100644
--- a/sysdeps/aarch64/multiarch/memcpy_a64fx.S
+++ b/sysdeps/aarch64/multiarch/memcpy_a64fx.S
@@ -96,10 +96,6 @@
ENTRY (__memcpy_a64fx)
- PTR_ARG (0)
- PTR_ARG (1)
- SIZE_ARG (2)
-
cntb vlen
cmp n, vlen, lsl 1
b.hi L(copy_small)
@@ -236,10 +232,6 @@ END (__memcpy_a64fx)
ENTRY_ALIGN (__memmove_a64fx, 4)
- PTR_ARG (0)
- PTR_ARG (1)
- SIZE_ARG (2)
-
/* Fast case for up to 2 vectors. */
cntb vlen
cmp n, vlen, lsl 1
diff --git a/sysdeps/aarch64/multiarch/memcpy_mops.S b/sysdeps/aarch64/multiarch/memcpy_mops.S
index b094af3d22bc4aeb..2c426f008e699101 100644
--- a/sysdeps/aarch64/multiarch/memcpy_mops.S
+++ b/sysdeps/aarch64/multiarch/memcpy_mops.S
@@ -26,10 +26,6 @@
*/
ENTRY (__memcpy_mops)
- PTR_ARG (0)
- PTR_ARG (1)
- SIZE_ARG (2)
-
mov x3, x0
.inst 0x19010443 /* cpyfp [x3]!, [x1]!, x2! */
.inst 0x19410443 /* cpyfm [x3]!, [x1]!, x2! */
diff --git a/sysdeps/aarch64/multiarch/memcpy_sve.S b/sysdeps/aarch64/multiarch/memcpy_sve.S
index 3ce49d79ecdb94e0..26375b47174f1ba8 100644
--- a/sysdeps/aarch64/multiarch/memcpy_sve.S
+++ b/sysdeps/aarch64/multiarch/memcpy_sve.S
@@ -61,10 +61,6 @@
.arch armv8.2-a+sve
ENTRY (__memcpy_sve)
- PTR_ARG (0)
- PTR_ARG (1)
- SIZE_ARG (2)
-
cmp count, 128
b.hi L(copy_long)
cntb vlen
@@ -144,10 +140,6 @@ END (__memcpy_sve)
ENTRY (__memmove_sve)
- PTR_ARG (0)
- PTR_ARG (1)
- SIZE_ARG (2)
-
cmp count, 128
b.hi L(move_long)
cntb vlen
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
index 5d8438a82ea2a3be..02ea27f356fe8ea1 100644
--- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S
+++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
@@ -67,10 +67,6 @@
ENTRY (__memmove_thunderx)
- PTR_ARG (0)
- PTR_ARG (1)
- SIZE_ARG (2)
-
sub tmp1, dstin, src
cmp count, 96
ccmp tmp1, count, 2, hi
diff --git a/sysdeps/aarch64/multiarch/memmove_mops.S b/sysdeps/aarch64/multiarch/memmove_mops.S
index 7df0d22454ead375..229fccd9d5a7abd2 100644
--- a/sysdeps/aarch64/multiarch/memmove_mops.S
+++ b/sysdeps/aarch64/multiarch/memmove_mops.S
@@ -26,10 +26,6 @@
*/
ENTRY (__memmove_mops)
- PTR_ARG (0)
- PTR_ARG (1)
- SIZE_ARG (2)
-
mov x3, x0
.inst 0x1d010443 /* cpyp [x3]!, [x1]!, x2! */
.inst 0x1d410443 /* cpym [x3]!, [x1]!, x2! */
diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S
index 2e6d882fc931a882..9ea329a82ae7d0f6 100644
--- a/sysdeps/aarch64/multiarch/memset_a64fx.S
+++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
@@ -48,8 +48,6 @@
#define BTI_C
ENTRY (__memset_a64fx)
- PTR_ARG (0)
- SIZE_ARG (2)
cntb vector_length
dup z0.b, valw
diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S
index 6d714ed0e1b396ef..5c33280e0f8bf85a 100644
--- a/sysdeps/aarch64/multiarch/memset_emag.S
+++ b/sysdeps/aarch64/multiarch/memset_emag.S
@@ -28,9 +28,6 @@
ENTRY (__memset_emag)
- PTR_ARG (0)
- SIZE_ARG (2)
-
bfi valw, valw, 8, 8
bfi valw, valw, 16, 16
bfi val, val, 32, 32
diff --git a/sysdeps/aarch64/multiarch/memset_kunpeng.S b/sysdeps/aarch64/multiarch/memset_kunpeng.S
index 7b215501376cbe03..93f3bfb8cf7238a5 100644
--- a/sysdeps/aarch64/multiarch/memset_kunpeng.S
+++ b/sysdeps/aarch64/multiarch/memset_kunpeng.S
@@ -28,9 +28,6 @@
ENTRY (__memset_kunpeng)
- PTR_ARG (0)
- SIZE_ARG (2)
-
dup v0.16B, valw
add dstend, dstin, count
diff --git a/sysdeps/aarch64/multiarch/memset_mops.S b/sysdeps/aarch64/multiarch/memset_mops.S
index e879c81ab2d047b1..f13a0b561078137e 100644
--- a/sysdeps/aarch64/multiarch/memset_mops.S
+++ b/sysdeps/aarch64/multiarch/memset_mops.S
@@ -26,9 +26,6 @@
*/
ENTRY (__memset_mops)
- PTR_ARG (0)
- SIZE_ARG (2)
-
mov x3, x0
.inst 0x19c10443 /* setp [x3]!, x2!, x1 */
.inst 0x19c14443 /* setm [x3]!, x2!, x1 */
diff --git a/sysdeps/aarch64/multiarch/strlen_asimd.S b/sysdeps/aarch64/multiarch/strlen_asimd.S
index 67dcc94adc587928..3118cd00663b0b25 100644
--- a/sysdeps/aarch64/multiarch/strlen_asimd.S
+++ b/sysdeps/aarch64/multiarch/strlen_asimd.S
@@ -87,7 +87,6 @@
character, return the length, if not, continue in the main loop. */
ENTRY (__strlen_asimd)
- PTR_ARG (0)
and tmp1, srcin, MIN_PAGE_SIZE - 1
cmp tmp1, MIN_PAGE_SIZE - 32
b.hi L(page_cross)
diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S
index 43fdb1b2fb1b7b78..92dc34e3e9a2650c 100644
--- a/sysdeps/aarch64/setjmp.S
+++ b/sysdeps/aarch64/setjmp.S
@@ -34,8 +34,6 @@ END (_setjmp)
libc_hidden_def (_setjmp)
ENTRY (__sigsetjmp)
- PTR_ARG (0)
-
1:
stp x19, x20, [x0, #JB_X19<<3]
stp x21, x22, [x0, #JB_X21<<3]
diff --git a/sysdeps/aarch64/strchr.S b/sysdeps/aarch64/strchr.S
index ca4c99e6bf9ac960..bc57283361e172ab 100644
--- a/sysdeps/aarch64/strchr.S
+++ b/sysdeps/aarch64/strchr.S
@@ -52,7 +52,6 @@
If it is not a multiple of 4, there was no match. */
ENTRY (strchr)
- PTR_ARG (0)
bic src, srcin, 15
dup vrepchr.16b, chrin
ld1 {vdata.16b}, [src]
diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S
index e1a1c7eb4383e0f6..09e092bf5f847a7f 100644
--- a/sysdeps/aarch64/strchrnul.S
+++ b/sysdeps/aarch64/strchrnul.S
@@ -51,7 +51,6 @@
exactly which byte matched. */
ENTRY (__strchrnul)
- PTR_ARG (0)
bic src, srcin, 15
dup vrepchr.16b, chrin
ld1 {vdata.16b}, [src]
diff --git a/sysdeps/aarch64/strcmp.S b/sysdeps/aarch64/strcmp.S
index 47f6fb1448c464bf..7bf87073be304e0f 100644
--- a/sysdeps/aarch64/strcmp.S
+++ b/sysdeps/aarch64/strcmp.S
@@ -62,8 +62,6 @@
NUL too in big-endian, byte-reverse the data before the NUL check. */
ENTRY(strcmp)
- PTR_ARG (0)
- PTR_ARG (1)
sub off2, src2, src1
mov zeroones, REP8_01
and tmp, src1, 7
diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
index 705354060055a45e..62fb0248fa5a7ba3 100644
--- a/sysdeps/aarch64/strcpy.S
+++ b/sysdeps/aarch64/strcpy.S
@@ -69,8 +69,6 @@
exactly which byte matched. */
ENTRY (STRCPY)
- PTR_ARG (0)
- PTR_ARG (1)
bic src, srcin, 15
ld1 {vdata.16b}, [src]
cmeq vhas_nul.16b, vdata.16b, 0
diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S
index 352fb40d3abbb44b..0d10b6efb7b31e54 100644
--- a/sysdeps/aarch64/strlen.S
+++ b/sysdeps/aarch64/strlen.S
@@ -49,7 +49,6 @@
identifies the first zero byte. */
ENTRY (STRLEN)
- PTR_ARG (0)
bic src, srcin, 15
ld1 {vdata.16b}, [src]
cmeq vhas_nul.16b, vdata.16b, 0
diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S
index e4fb3506a80756b3..2a2264c0e5427225 100644
--- a/sysdeps/aarch64/strnlen.S
+++ b/sysdeps/aarch64/strnlen.S
@@ -49,8 +49,6 @@
identifies the first zero byte. */
ENTRY (__strnlen)
- PTR_ARG (0)
- SIZE_ARG (1)
bic src, srcin, 15
cbz cntin, L(nomatch)
ld1 {vdata.16b}, [src]
diff --git a/sysdeps/aarch64/strrchr.S b/sysdeps/aarch64/strrchr.S
index e52c9b275347978c..402bce444ef3bb28 100644
--- a/sysdeps/aarch64/strrchr.S
+++ b/sysdeps/aarch64/strrchr.S
@@ -55,7 +55,6 @@
if the relevant byte matched the NUL end of string. */
ENTRY (strrchr)
- PTR_ARG (0)
bic src, srcin, 15
dup vrepchr.16b, chrin
movi vrepmask.16b, 0x33
diff --git a/sysdeps/unix/sysv/linux/aarch64/clone.S b/sysdeps/unix/sysv/linux/aarch64/clone.S
index 0e7ee24e68c85377..fed19acc2f78351f 100644
--- a/sysdeps/unix/sysv/linux/aarch64/clone.S
+++ b/sysdeps/unix/sysv/linux/aarch64/clone.S
@@ -33,12 +33,6 @@
*/
.text
ENTRY(__clone)
- PTR_ARG (0)
- PTR_ARG (1)
- PTR_ARG (3)
- PTR_ARG (4)
- PTR_ARG (5)
- PTR_ARG (6)
/* Save args for the child. */
mov x10, x0
mov x11, x2
diff --git a/sysdeps/unix/sysv/linux/aarch64/clone3.S b/sysdeps/unix/sysv/linux/aarch64/clone3.S
index 92d69a5430518cbc..9b00b6b8853e9b8b 100644
--- a/sysdeps/unix/sysv/linux/aarch64/clone3.S
+++ b/sysdeps/unix/sysv/linux/aarch64/clone3.S
@@ -36,10 +36,6 @@
.text
ENTRY(__clone3)
- PTR_ARG (0)
- PTR_ARG (1)
- PTR_ARG (3)
- PTR_ARG (4)
/* Save args for the child. */
mov x10, x0 /* cl_args */
mov x11, x2 /* func */
diff --git a/sysdeps/unix/sysv/linux/aarch64/getcontext.S b/sysdeps/unix/sysv/linux/aarch64/getcontext.S
index e5b69c9a82b7a448..862bd67aa484ae1a 100644
--- a/sysdeps/unix/sysv/linux/aarch64/getcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/getcontext.S
@@ -30,7 +30,6 @@
.text
ENTRY(__getcontext)
- PTR_ARG (0)
/* The saved context will return to the getcontext() call point
with a return value of 0 */
str xzr, [x0, oX0 + 0 * SZREG]
diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
index ba659438c564dc3b..8c072781cdf98c2b 100644
--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
@@ -34,7 +34,6 @@
.text
ENTRY (__setcontext)
- PTR_ARG (0)
/* Save a copy of UCP. */
mov x9, x0
diff --git a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
index f049140d35b79ba6..7000f220368bb094 100644
--- a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
@@ -27,7 +27,6 @@
.text
ENTRY(__swapcontext)
- PTR_ARG (0)
/* Set the value returned when swapcontext() returns in this context.
And set up x1 to become the return address of the caller, so we
can return there with a normal RET instead of an indirect jump. */

113
glibc-RHEL-118273-33.patch Normal file
View File

@ -0,0 +1,113 @@
commit cf56eb28fa277d9dbb301654682ca89f71c30a48
Author: Pierre Blanchard <pierre.blanchard@arm.com>
Date: Tue Mar 18 17:07:31 2025 +0000
AArch64: Optimize algorithm in users of SVE expf helper
Polynomial order was unnecessarily high, unlocking multiple
optimizations.
Max error for new SVE expf is 0.88 +0.5ULP.
Max error for new SVE coshf is 2.56 +0.5ULP.
Performance improvement on Neoverse V1: expf (30%), coshf (26%).
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/coshf_sve.c b/sysdeps/aarch64/fpu/coshf_sve.c
index 7ad6efa0fc218278..508c0790ee89e0cd 100644
--- a/sysdeps/aarch64/fpu/coshf_sve.c
+++ b/sysdeps/aarch64/fpu/coshf_sve.c
@@ -39,9 +39,9 @@ special_case (svfloat32_t x, svfloat32_t half_e, svfloat32_t half_over_e,
}
/* Single-precision vector cosh, using vector expf.
- Maximum error is 2.77 ULP:
- _ZGVsMxv_coshf(-0x1.5b38f4p+1) got 0x1.e45946p+2
- want 0x1.e4594cp+2. */
+ Maximum error is 2.56 +0.5 ULP:
+ _ZGVsMxv_coshf(-0x1.5b40f4p+1) got 0x1.e47748p+2
+ want 0x1.e4774ep+2. */
svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
diff --git a/sysdeps/aarch64/fpu/expf_sve.c b/sysdeps/aarch64/fpu/expf_sve.c
index da93e01b87e0e890..aee86a203379efb3 100644
--- a/sysdeps/aarch64/fpu/expf_sve.c
+++ b/sysdeps/aarch64/fpu/expf_sve.c
@@ -40,9 +40,9 @@ special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d)
}
/* Optimised single-precision SVE exp function.
- Worst-case error is 1.04 ulp:
- SV_NAME_F1 (exp)(0x1.a8eda4p+1) got 0x1.ba74bcp+4
- want 0x1.ba74bap+4. */
+ Worst-case error is 0.88 +0.50 ULP:
+ _ZGVsMxv_expf(-0x1.bba276p-6) got 0x1.f25288p-1
+ want 0x1.f2528ap-1. */
svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
diff --git a/sysdeps/aarch64/fpu/sv_expf_inline.h b/sysdeps/aarch64/fpu/sv_expf_inline.h
index 75781fb4ddcb9790..01fbb4d4c046eb3b 100644
--- a/sysdeps/aarch64/fpu/sv_expf_inline.h
+++ b/sysdeps/aarch64/fpu/sv_expf_inline.h
@@ -24,50 +24,40 @@
struct sv_expf_data
{
- float c1, c3, inv_ln2;
- float ln2_lo, c0, c2, c4;
- float ln2_hi, shift;
+ float ln2_hi, ln2_lo, c1, null;
+ float inv_ln2, shift;
};
-/* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
- compatibility with polynomial helpers. Shift is 1.5*2^17 + 127. */
+/* Shift is 1.5*2^17 + 127. */
#define SV_EXPF_DATA \
{ \
- /* Coefficients copied from the polynomial in AdvSIMD variant. */ \
- .c0 = 0x1.ffffecp-1f, .c1 = 0x1.fffdb6p-2f, .c2 = 0x1.555e66p-3f, \
- .c3 = 0x1.573e2ep-5f, .c4 = 0x1.0e4020p-7f, .inv_ln2 = 0x1.715476p+0f, \
- .ln2_hi = 0x1.62e4p-1f, .ln2_lo = 0x1.7f7d1cp-20f, \
- .shift = 0x1.803f8p17f, \
+ .c1 = 0.5f, .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \
+ .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f, \
}
-#define C(i) sv_f32 (d->poly[i])
-
static inline svfloat32_t
expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
{
/* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
- svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2_lo);
+ svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2_hi);
/* n = round(x/(ln2/N)). */
svfloat32_t z = svmad_x (pg, sv_f32 (d->inv_ln2), x, d->shift);
svfloat32_t n = svsub_x (pg, z, d->shift);
/* r = x - n*ln2/N. */
- svfloat32_t r = svmsb_x (pg, sv_f32 (d->ln2_hi), n, x);
+ svfloat32_t r = x;
r = svmls_lane (r, n, lane_consts, 0);
+ r = svmls_lane (r, n, lane_consts, 1);
/* scale = 2^(n/N). */
svfloat32_t scale = svexpa (svreinterpret_u32 (z));
- /* poly(r) = exp(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4 + C4 r^5. */
- svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
- svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
+ /* poly(r) = exp(r) - 1 ~= r + 0.5 r^2. */
svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
- svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
- svfloat32_t p0 = svmul_lane (r, lane_consts, 1);
- svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+ svfloat32_t poly = svmla_lane (r, r2, lane_consts, 2);
return svmla_x (pg, scale, scale, poly);
}

217
glibc-RHEL-118273-34.patch Normal file
View File

@ -0,0 +1,217 @@
commit 4352e2cc934b2874dba37397157bf890fcee455a
Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Fri Mar 28 14:27:45 2025 -0300
aarch64: Fix _dl_tlsdesc_dynamic unwind for pac-ret (BZ 32612)
When libgcc is built with pac-ret, it requires to autenticate the
unwinding frame based on CFI information. The _dl_tlsdesc_dynamic
uses a custom calling convention, where it is responsible to save
and restore all registers it might use (even volatile).
The pac-ret support added by 1be3d6eb823d8b952fa54b7bbc90cbecb8981380
was added only on the slow-path, but the fast path also adds DWARF
Register Rule Instruction (cfi_adjust_cfa_offset) since it requires
to save/restore some auxiliary register. It seems that this is not
fully supported neither by libgcc nor AArch64 ABI [1].
Instead, move paciasp/autiasp to function prologue/epilogue to be
used on both fast and slow paths.
I also corrected the _dl_tlsdesc_dynamic comment description, it was
copied from i386 implementation without any adjustment.
Checked on aarch64-linux-gnu with a toolchain built with
--enable-standard-branch-protection on a system with pac-ret
support.
[1] https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst#id1
Reviewed-by: Yury Khrustalev <yury.khrustalev@arm.com>
Conflicts:
sysdeps/unix/sysv/linux/aarch64/Makefile
(Fixup context to apply without out-of-scope dependency f4d00dd60d)
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index 0aeaf64edd2594f1..36195c956855e024 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -119,20 +119,19 @@ _dl_tlsdesc_undefweak:
object referenced by the argument.
ptrdiff_t
- __attribute__ ((__regparm__ (1)))
_dl_tlsdesc_dynamic (struct tlsdesc *tdp)
{
struct tlsdesc_dynamic_arg *td = tdp->arg;
- dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
+ dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer() + TCBHEAD_DTV);
if (__builtin_expect (td->gen_count <= dtv[0].counter
&& (dtv[td->tlsinfo.ti_module].pointer.val
!= TLS_DTV_UNALLOCATED),
1))
return dtv[td->tlsinfo.ti_module].pointer.val
+ td->tlsinfo.ti_offset
- - __thread_pointer;
+ - __thread_pointer();
- return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+ return __tls_get_addr (&td->tlsinfo) - __thread_pointer();
}
*/
@@ -142,7 +141,12 @@ _dl_tlsdesc_undefweak:
cfi_startproc
.align 2
_dl_tlsdesc_dynamic:
+# if HAVE_AARCH64_PAC_RET
+ PACIASP
+ cfi_window_save
+# else
BTI_C
+# endif
/* Save just enough registers to support fast path, if we fall
into slow path we will save additional registers. */
@@ -173,6 +177,10 @@ _dl_tlsdesc_dynamic:
1:
ldp x3, x4, [sp, #16]
ldp x1, x2, [sp], #32
+# if HAVE_AARCH64_PAC_RET
+ AUTIASP
+ cfi_window_save
+# endif
cfi_adjust_cfa_offset (-32)
RET
2:
@@ -182,10 +190,6 @@ _dl_tlsdesc_dynamic:
/* Save the remaining registers that we must treat as caller save. */
cfi_restore_state
-# if HAVE_AARCH64_PAC_RET
- PACIASP
- cfi_window_save
-# endif
# define NSAVEXREGPAIRS 8
stp x29, x30, [sp,#-16*NSAVEXREGPAIRS]!
cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
@@ -236,10 +240,6 @@ _dl_tlsdesc_dynamic:
cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
cfi_restore (x29)
cfi_restore (x30)
-# if HAVE_AARCH64_PAC_RET
- AUTIASP
- cfi_window_save
-# endif
b 1b
cfi_endproc
.size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
diff --git a/sysdeps/unix/sysv/linux/aarch64/Makefile b/sysdeps/unix/sysv/linux/aarch64/Makefile
index 40b9a2e5dea1ea89..607a0c56d8dfad8d 100644
--- a/sysdeps/unix/sysv/linux/aarch64/Makefile
+++ b/sysdeps/unix/sysv/linux/aarch64/Makefile
@@ -1,3 +1,16 @@
+ifeq ($(subdir),elf)
+tests += \
+ tst-tlsdesc-pac \
+ # tests
+modules-names += \
+ tst-tlsdesc-pac-mod \
+ # modules-names
+
+LDFLAGS-tst-tlsdesc-pac = -rdynamic
+
+$(objpfx)tst-tlsdesc-pac.out: $(objpfx)tst-tlsdesc-pac-mod.so
+endif
+
ifeq ($(subdir),misc)
sysdep_headers += sys/elf.h
endif
diff --git a/sysdeps/unix/sysv/linux/aarch64/tst-tlsdesc-pac-mod.c b/sysdeps/unix/sysv/linux/aarch64/tst-tlsdesc-pac-mod.c
new file mode 100644
index 0000000000000000..d34c8beda9b1986d
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/aarch64/tst-tlsdesc-pac-mod.c
@@ -0,0 +1,27 @@
+/* AArch64 tests for unwinding TLSDESC (BZ 32612)
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+_Thread_local int foo;
+/* Make the TLS segment large enough to trigger _dl_tlsdesc_dynamic. */
+_Thread_local int foobar[1000];
+
+void
+bar (void)
+{
+ foo = 1;
+}
diff --git a/sysdeps/unix/sysv/linux/aarch64/tst-tlsdesc-pac.c b/sysdeps/unix/sysv/linux/aarch64/tst-tlsdesc-pac.c
new file mode 100644
index 0000000000000000..24d656aafc2784b4
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/aarch64/tst-tlsdesc-pac.c
@@ -0,0 +1,48 @@
+/* AArch64 tests for unwinding TLSDESC (BZ 32612)
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include <unwind.h>
+#include <support/xdlfcn.h>
+
+static _Unwind_Reason_Code
+unwind_callback (struct _Unwind_Context* context, void* closure)
+{
+ return _URC_NO_REASON;
+}
+
+/* Assume that TLS variable from tst-tlsdesc-pac-mod.so will trigger
+ the slow-path that allocates the required memory with malloc. */
+void *
+malloc (size_t s)
+{
+ _Unwind_Backtrace (unwind_callback, NULL);
+ return calloc (1, s);
+}
+
+static int
+do_test (void)
+{
+ void *h = xdlopen ("tst-tlsdesc-pac-mod.so", RTLD_LAZY);
+ void (*func)(void) = xdlsym (h, "bar");
+ func ();
+
+ return 0;
+}
+
+#include <support/test-driver.c>

View File

@ -0,0 +1,76 @@
commit 691edbdf7727466ba87e27a8eeae1c3bc5824ef5
Author: Yury Khrustalev <yury.khrustalev@arm.com>
Date: Thu May 8 13:53:38 2025 +0100
aarch64: fix unwinding in longjmp
Previously, longjmp() on aarch64 was using CFI directives around the
call to __libc_arm_za_disable() after CFA was redefined at the start
of longjmp(). This may result in unwinding issues. Move the call and
surrounding CFI directives to the beginning of longjmp().
Suggested-by: Wilco Dijkstra <wilco.dijkstra@arm.com>
diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S
index 452ba0da6d788ce8..30b36cb25d921795 100644
--- a/sysdeps/aarch64/__longjmp.S
+++ b/sysdeps/aarch64/__longjmp.S
@@ -24,28 +24,6 @@
/* __longjmp(jmpbuf, val) */
ENTRY (__longjmp)
- cfi_def_cfa(x0, 0)
- cfi_offset(x19, JB_X19<<3)
- cfi_offset(x20, JB_X20<<3)
- cfi_offset(x21, JB_X21<<3)
- cfi_offset(x22, JB_X22<<3)
- cfi_offset(x23, JB_X23<<3)
- cfi_offset(x24, JB_X24<<3)
- cfi_offset(x25, JB_X25<<3)
- cfi_offset(x26, JB_X26<<3)
- cfi_offset(x27, JB_X27<<3)
- cfi_offset(x28, JB_X28<<3)
- cfi_offset(x29, JB_X29<<3)
- cfi_offset(x30, JB_LR<<3)
-
- cfi_offset( d8, JB_D8<<3)
- cfi_offset( d9, JB_D9<<3)
- cfi_offset(d10, JB_D10<<3)
- cfi_offset(d11, JB_D11<<3)
- cfi_offset(d12, JB_D12<<3)
- cfi_offset(d13, JB_D13<<3)
- cfi_offset(d14, JB_D14<<3)
- cfi_offset(d15, JB_D15<<3)
#if IS_IN(libc)
/* Disable ZA state of SME in libc.a and libc.so, but not in ld.so. */
@@ -69,6 +47,29 @@ ENTRY (__longjmp)
# endif
#endif
+ cfi_def_cfa (x0, 0)
+ cfi_offset (x19, JB_X19<<3)
+ cfi_offset (x20, JB_X20<<3)
+ cfi_offset (x21, JB_X21<<3)
+ cfi_offset (x22, JB_X22<<3)
+ cfi_offset (x23, JB_X23<<3)
+ cfi_offset (x24, JB_X24<<3)
+ cfi_offset (x25, JB_X25<<3)
+ cfi_offset (x26, JB_X26<<3)
+ cfi_offset (x27, JB_X27<<3)
+ cfi_offset (x28, JB_X28<<3)
+ cfi_offset (x29, JB_X29<<3)
+ cfi_offset (x30, JB_LR<<3)
+
+ cfi_offset ( d8, JB_D8<<3)
+ cfi_offset ( d9, JB_D9<<3)
+ cfi_offset (d10, JB_D10<<3)
+ cfi_offset (d11, JB_D11<<3)
+ cfi_offset (d12, JB_D12<<3)
+ cfi_offset (d13, JB_D13<<3)
+ cfi_offset (d14, JB_D14<<3)
+ cfi_offset (d15, JB_D15<<3)
+
ldp x19, x20, [x0, #JB_X19<<3]
ldp x21, x22, [x0, #JB_X21<<3]
ldp x23, x24, [x0, #JB_X23<<3]

View File

@ -0,0 +1,29 @@
commit aa18367c1169700f610565eba8acf3e08429fcf5
Author: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date: Thu May 29 15:08:15 2025 +0000
AArch64: Improve enabling of SVE for libmvec
When using a -mcpu option in CFLAGS, GCC can report errors when building libmvec.
Fix this by overriding both -mcpu and -march with a generic variant with SVE added.
Also use a tune for a modern SVE core.
Reviewed-by: Yury Khrustalev <yury.khrustalev@arm.com>
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index be8541f6496d6688..aa547b21df5f41d9 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -49,8 +49,11 @@ libmvec-support = $(addsuffix f_advsimd,$(float-advsimd-funcs)) \
v_powf_data
endif
-sve-cflags = -march=armv8-a+sve
+# Enable SVE for building libmvec. Since CFLAGS may contain a -mcpu or -march,
+# add a generic -mcpu and -march with SVE enabled. Also use a tune for a modern
+# SVE core.
+sve-cflags = -mcpu=generic+sve -march=armv8-a+sve -mtune=neoverse-v2
ifeq ($(build-mathvec),yes)
bench-libmvec = $(addprefix float-advsimd-,$(float-advsimd-funcs)) \

View File

@ -0,0 +1,24 @@
commit 09795c5612c630db605886dfd55dbf56f381d128
Author: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date: Fri Jun 6 13:15:30 2025 +0000
AArch64: Fix builderror with GCC 12.1/12.2
Early versions of GCC 12 didn't support -mtune=neoverse-v2, so use
-mtune=neoverse-v1 instead.
Reported-by: Yury Khrustalev <yury.khrustalev@arm.com>
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index aa547b21df5f41d9..c8a6fb4628d13aec 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -53,7 +53,7 @@ endif
# add a generic -mcpu and -march with SVE enabled. Also use a tune for a modern
# SVE core.
-sve-cflags = -mcpu=generic+sve -march=armv8-a+sve -mtune=neoverse-v2
+sve-cflags = -mcpu=generic+sve -march=armv8-a+sve -mtune=neoverse-v1
ifeq ($(build-mathvec),yes)
bench-libmvec = $(addprefix float-advsimd-,$(float-advsimd-funcs)) \

188
glibc-RHEL-118273-38.patch Normal file
View File

@ -0,0 +1,188 @@
commit 6849c5b791edd216f2ec3fdbe4d138bc69b9b333
Author: Luna Lamb <luna.lamb@arm.com>
Date: Wed Jun 18 16:12:19 2025 +0000
AArch64: Improve codegen SVE log1p helper
Improve codegen by packing coefficients.
4% and 2% improvement in throughput microbenchmark on Neoverse V1, for acosh
and atanh respectively.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/acosh_sve.c b/sysdeps/aarch64/fpu/acosh_sve.c
index 3e4faaa5ca686c18..78ebcffbb5737641 100644
--- a/sysdeps/aarch64/fpu/acosh_sve.c
+++ b/sysdeps/aarch64/fpu/acosh_sve.c
@@ -30,10 +30,10 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
}
/* SVE approximation for double-precision acosh, based on log1p.
- The largest observed error is 3.19 ULP in the region where the
+ The largest observed error is 3.14 ULP in the region where the
argument to log1p falls in the k=0 interval, i.e. x close to 1:
- SV_NAME_D1 (acosh)(0x1.1e4388d4ca821p+0) got 0x1.ed23399f5137p-2
- want 0x1.ed23399f51373p-2. */
+ SV_NAME_D1 (acosh)(0x1.1e80ed12f0ad1p+0) got 0x1.ef0cee7c33ce1p-2
+ want 0x1.ef0cee7c33ce4p-2. */
svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
{
/* (ix - One) >= (BigBound - One). */
diff --git a/sysdeps/aarch64/fpu/atanh_sve.c b/sysdeps/aarch64/fpu/atanh_sve.c
index 7a52728d70f6d226..a4803e5c1305379e 100644
--- a/sysdeps/aarch64/fpu/atanh_sve.c
+++ b/sysdeps/aarch64/fpu/atanh_sve.c
@@ -30,7 +30,7 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
}
/* SVE approximation for double-precision atanh, based on log1p.
- The greatest observed error is 2.81 ULP:
+ The greatest observed error is 3.3 ULP:
_ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
want 0x1.ffd8ff31b501cp-6. */
svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
@@ -42,7 +42,6 @@ svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, Half));
/* It is special if iax >= 1. */
-// svbool_t special = svcmpge (pg, iax, One);
svbool_t special = svacge (pg, x, 1.0);
/* Computation is performed based on the following sequence of equality:
diff --git a/sysdeps/aarch64/fpu/sv_log1p_inline.h b/sysdeps/aarch64/fpu/sv_log1p_inline.h
index da019674f94dbac7..a9ecd75d19e95d39 100644
--- a/sysdeps/aarch64/fpu/sv_log1p_inline.h
+++ b/sysdeps/aarch64/fpu/sv_log1p_inline.h
@@ -21,11 +21,12 @@
#define AARCH64_FPU_SV_LOG1P_INLINE_H
#include "sv_math.h"
-#include "poly_sve_f64.h"
static const struct sv_log1p_data
{
- double poly[19], ln2[2];
+ double c0, c2, c4, c6, c8, c10, c12, c14, c16;
+ double c1, c3, c5, c7, c9, c11, c13, c15, c17, c18;
+ double ln2_lo, ln2_hi;
uint64_t hf_rt2_top;
uint64_t one_m_hf_rt2_top;
uint32_t bottom_mask;
@@ -33,15 +34,30 @@ static const struct sv_log1p_data
} sv_log1p_data = {
/* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1].
*/
- .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
- 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
- -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
- 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
- -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
- 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
- -0x1.cfa7385bdb37ep-6 },
- .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 },
+ .c0 = -0x1.ffffffffffffbp-2,
+ .c1 = 0x1.55555555551a9p-2,
+ .c2 = -0x1.00000000008e3p-2,
+ .c3 = 0x1.9999999a32797p-3,
+ .c4 = -0x1.555555552fecfp-3,
+ .c5 = 0x1.249248e071e5ap-3,
+ .c6 = -0x1.ffffff8bf8482p-4,
+ .c7 = 0x1.c71c8f07da57ap-4,
+ .c8 = -0x1.9999ca4ccb617p-4,
+ .c9 = 0x1.7459ad2e1dfa3p-4,
+ .c10 = -0x1.554d2680a3ff2p-4,
+ .c11 = 0x1.3b4c54d487455p-4,
+ .c12 = -0x1.2548a9ffe80e6p-4,
+ .c13 = 0x1.0f389a24b2e07p-4,
+ .c14 = -0x1.eee4db15db335p-5,
+ .c15 = 0x1.e95b494d4a5ddp-5,
+ .c16 = -0x1.15fdf07cb7c73p-4,
+ .c17 = 0x1.0310b70800fcfp-4,
+ .c18 = -0x1.cfa7385bdb37ep-6,
+ .ln2_lo = 0x1.62e42fefa3800p-1,
+ .ln2_hi = 0x1.ef35793c76730p-45,
+ /* top32(asuint64(sqrt(2)/2)) << 32. */
.hf_rt2_top = 0x3fe6a09e00000000,
+ /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32. */
.one_m_hf_rt2_top = 0x00095f6200000000,
.bottom_mask = 0xffffffff,
.one_top = 0x3ff
@@ -51,14 +67,14 @@ static inline svfloat64_t
sv_log1p_inline (svfloat64_t x, const svbool_t pg)
{
/* Helper for calculating log(x + 1). Adapted from v_log1p_inline.h, which
- differs from v_log1p_2u5.c by:
+ differs from advsimd/log1p.c by:
- No special-case handling - this should be dealt with by the caller.
- Pairwise Horner polynomial evaluation for improved accuracy.
- Optionally simulate the shortcut for k=0, used in the scalar routine,
using svsel, for improved accuracy when the argument to log1p is close
to 0. This feature is enabled by defining WANT_SV_LOG1P_K0_SHORTCUT as 1
in the source of the caller before including this file.
- See sv_log1p_2u1.c for details of the algorithm. */
+ See sve/log1p.c for details of the algorithm. */
const struct sv_log1p_data *d = ptr_barrier (&sv_log1p_data);
svfloat64_t m = svadd_x (pg, x, 1);
svuint64_t mi = svreinterpret_u64 (m);
@@ -79,7 +95,7 @@ sv_log1p_inline (svfloat64_t x, const svbool_t pg)
svfloat64_t cm;
#ifndef WANT_SV_LOG1P_K0_SHORTCUT
-#error \
+#error \
"Cannot use sv_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
#elif WANT_SV_LOG1P_K0_SHORTCUT
/* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
@@ -96,14 +112,46 @@ sv_log1p_inline (svfloat64_t x, const svbool_t pg)
#endif
/* Approximate log1p(f) on the reduced input using a polynomial. */
- svfloat64_t f2 = svmul_x (pg, f, f);
- svfloat64_t p = sv_pw_horner_18_f64_x (pg, f, f2, d->poly);
+ svfloat64_t f2 = svmul_x (svptrue_b64 (), f, f),
+ f4 = svmul_x (svptrue_b64 (), f2, f2),
+ f8 = svmul_x (svptrue_b64 (), f4, f4),
+ f16 = svmul_x (svptrue_b64 (), f8, f8);
+
+ svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
+ svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5);
+ svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9);
+ svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13);
+ svfloat64_t c1718 = svld1rq (svptrue_b64 (), &d->c17);
+
+ /* Order-18 Estrin scheme. */
+ svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), f, c13, 0);
+ svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), f, c13, 1);
+ svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), f, c57, 0);
+ svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), f, c57, 1);
+
+ svfloat64_t p03 = svmla_x (pg, p01, f2, p23);
+ svfloat64_t p47 = svmla_x (pg, p45, f2, p67);
+ svfloat64_t p07 = svmla_x (pg, p03, f4, p47);
+
+ svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), f, c911, 0);
+ svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), f, c911, 1);
+ svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), f, c1315, 0);
+ svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), f, c1315, 1);
+
+ svfloat64_t p811 = svmla_x (pg, p89, f2, p1011);
+ svfloat64_t p1215 = svmla_x (pg, p1213, f2, p1415);
+ svfloat64_t p815 = svmla_x (pg, p811, f4, p1215);
+
+ svfloat64_t p015 = svmla_x (pg, p07, f8, p815);
+ svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), f, c1718, 0);
+ svfloat64_t p1618 = svmla_lane (p1617, f2, c1718, 1);
+ svfloat64_t p = svmla_x (pg, p015, f16, p1618);
/* Assemble log1p(x) = k * log2 + log1p(f) + c/m. */
- svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2[0]);
- svfloat64_t yhi = svmla_x (pg, f, k, d->ln2[1]);
+ svfloat64_t ln2_lo_hi = svld1rq (svptrue_b64 (), &d->ln2_lo);
+ svfloat64_t ylo = svmla_lane (cm, k, ln2_lo_hi, 0);
+ svfloat64_t yhi = svmla_lane (f, k, ln2_lo_hi, 1);
- return svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p);
+ return svmad_x (pg, p, f2, svadd_x (pg, ylo, yhi));
}
-
#endif

583
glibc-RHEL-118273-39.patch Normal file
View File

@ -0,0 +1,583 @@
commit dee22d2a81ab59afc165fb6dcb45d723f13582a0
Author: Dylan Fleming <Dylan.Fleming@arm.com>
Date: Wed Jun 18 16:19:22 2025 +0000
AArch64: Optimise SVE FP64 Hyperbolics
Reworke SVE FP64 hyperbolics to use the SVE FEXPA
instruction.
Also update the special case handelling for large
inputs to be entirely vectorised.
Performance improvements on Neoverse V1:
cosh_sve: 19% for |x| < 709, 5x otherwise
sinh_sve: 24% for |x| < 709, 5.9x otherwise
tanh_sve: 12% for |x| < 19, 9x otherwise
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/cosh_sve.c b/sysdeps/aarch64/fpu/cosh_sve.c
index e375dd8a3407feb2..3561893ae614e2ea 100644
--- a/sysdeps/aarch64/fpu/cosh_sve.c
+++ b/sysdeps/aarch64/fpu/cosh_sve.c
@@ -21,71 +21,99 @@
static const struct data
{
- float64_t poly[3];
- float64_t inv_ln2, ln2_hi, ln2_lo, shift, thres;
+ double c0, c2;
+ double c1, c3;
+ float64_t inv_ln2, ln2_hi, ln2_lo, shift;
uint64_t special_bound;
} data = {
- .poly = { 0x1.fffffffffffd4p-2, 0x1.5555571d6b68cp-3,
- 0x1.5555576a59599p-5, },
-
- .inv_ln2 = 0x1.71547652b82fep8, /* N/ln2. */
- /* -ln2/N. */
- .ln2_hi = -0x1.62e42fefa39efp-9,
- .ln2_lo = -0x1.abc9e3b39803f3p-64,
- .shift = 0x1.8p+52,
- .thres = 704.0,
-
- /* 0x1.6p9, above which exp overflows. */
- .special_bound = 0x4086000000000000,
+ /* Generated using Remez, in [-log(2)/128, log(2)/128]. */
+ .c0 = 0x1.fffffffffdbcdp-2,
+ .c1 = 0x1.555555555444cp-3,
+ .c2 = 0x1.555573c6a9f7dp-5,
+ .c3 = 0x1.1111266d28935p-7,
+ .ln2_hi = 0x1.62e42fefa3800p-1,
+ .ln2_lo = 0x1.ef35793c76730p-45,
+ /* 1/ln2. */
+ .inv_ln2 = 0x1.71547652b82fep+0,
+ .shift = 0x1.800000000ff80p+46, /* 1.5*2^46+1022. */
+
+ /* asuint(ln(2^(1024 - 1/128))), the value above which exp overflows. */
+ .special_bound = 0x40862e37e7d8ba72,
};
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svbool_t pg, svfloat64_t t, svbool_t special)
-{
- svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5);
- svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
- svfloat64_t y = svadd_x (pg, half_t, half_over_t);
- return sv_call_f64 (cosh, x, y, special);
-}
-
-/* Helper for approximating exp(x). Copied from sv_exp_tail, with no
- special-case handling or tail. */
+/* Helper for approximating exp(x)/2.
+ Functionally identical to FEXPA exp(x), but an adjustment in
+ the shift value which leads to a reduction in the exponent of scale by 1,
+ thus halving the result at no cost. */
static inline svfloat64_t
-exp_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
+exp_over_two_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
{
/* Calculate exp(x). */
svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
+ svuint64_t u = svreinterpret_u64 (z);
svfloat64_t n = svsub_x (pg, z, d->shift);
- svfloat64_t r = svmla_x (pg, x, n, d->ln2_hi);
- r = svmla_x (pg, r, n, d->ln2_lo);
+ svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
+ svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
- svuint64_t u = svreinterpret_u64 (z);
- svuint64_t e = svlsl_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS);
- svuint64_t i = svand_x (svptrue_b64 (), u, 0xff);
+ svfloat64_t r = x;
+ r = svmls_lane (r, n, ln2, 0);
+ r = svmls_lane (r, n, ln2, 1);
- svfloat64_t y = svmla_x (pg, sv_f64 (d->poly[1]), r, d->poly[2]);
- y = svmla_x (pg, sv_f64 (d->poly[0]), r, y);
- y = svmla_x (pg, sv_f64 (1.0), r, y);
- y = svmul_x (svptrue_b64 (), r, y);
+ svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+ svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0);
+ svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1);
+ svfloat64_t p04 = svmla_x (pg, p01, p23, r2);
+ svfloat64_t p = svmla_x (pg, r, p04, r2);
- /* s = 2^(n/N). */
- u = svld1_gather_index (pg, __v_exp_tail_data, i);
- svfloat64_t s = svreinterpret_f64 (svadd_x (pg, u, e));
+ svfloat64_t scale = svexpa (u);
- return svmla_x (pg, s, s, y);
+ return svmla_x (pg, scale, scale, p);
+}
+
+/* Vectorised special case to handle values past where exp_inline overflows.
+ Halves the input value and uses the identity exp(x) = exp(x/2)^2 to double
+ the valid range of inputs, and returns inf for anything past that. */
+static svfloat64_t NOINLINE
+special_case (svbool_t pg, svbool_t special, svfloat64_t ax, svfloat64_t t,
+ const struct data *d)
+{
+ /* Finish fast path to compute values for non-special cases. */
+ svfloat64_t inv_twoexp = svdivr_x (pg, t, 0.25);
+ svfloat64_t y = svadd_x (pg, t, inv_twoexp);
+
+ /* Halves input value, and then check if any cases
+ are still going to overflow. */
+ ax = svmul_x (special, ax, 0.5);
+ svbool_t is_safe
+ = svcmplt (special, svreinterpret_u64 (ax), d->special_bound);
+
+ /* Computes exp(x/2), and sets any overflowing lanes to inf. */
+ svfloat64_t half_exp = exp_over_two_inline (special, ax, d);
+ half_exp = svsel (is_safe, half_exp, sv_f64 (INFINITY));
+
+ /* Construct special case cosh(x) = (exp(x/2)^2)/2. */
+ svfloat64_t exp = svmul_x (svptrue_b64 (), half_exp, 2);
+ svfloat64_t special_y = svmul_x (special, exp, half_exp);
+
+ /* Select correct return values for special and non-special cases. */
+ special_y = svsel (special, special_y, y);
+
+ /* Ensure an input of nan is correctly propagated. */
+ svbool_t is_nan
+ = svcmpgt (special, svreinterpret_u64 (ax), sv_u64 (0x7ff0000000000000));
+ return svsel (is_nan, ax, svsel (special, special_y, y));
}
/* Approximation for SVE double-precision cosh(x) using exp_inline.
cosh(x) = (exp(x) + exp(-x)) / 2.
- The greatest observed error is in the scalar fall-back region, so is the
- same as the scalar routine, 1.93 ULP:
- _ZGVsMxv_cosh (0x1.628ad45039d2fp+9) got 0x1.fd774e958236dp+1021
- want 0x1.fd774e958236fp+1021.
-
- The greatest observed error in the non-special region is 1.54 ULP:
- _ZGVsMxv_cosh (0x1.ba5651dd4486bp+2) got 0x1.f5e2bb8d5c98fp+8
- want 0x1.f5e2bb8d5c991p+8. */
+ The greatest observed error in special case region is 2.66 + 0.5 ULP:
+ _ZGVsMxv_cosh (0x1.633b532ffbc1ap+9) got 0x1.f9b2d3d22399ep+1023
+ want 0x1.f9b2d3d22399bp+1023
+
+ The greatest observed error in the non-special region is 1.01 + 0.5 ULP:
+ _ZGVsMxv_cosh (0x1.998ecbb3c1f81p+1) got 0x1.890b225657f84p+3
+ want 0x1.890b225657f82p+3. */
svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -94,14 +122,13 @@ svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
svbool_t special = svcmpgt (pg, svreinterpret_u64 (ax), d->special_bound);
/* Up to the point that exp overflows, we can use it to calculate cosh by
- exp(|x|) / 2 + 1 / (2 * exp(|x|)). */
- svfloat64_t t = exp_inline (ax, pg, d);
+ (exp(|x|)/2 + 1) / (2 * exp(|x|)). */
+ svfloat64_t half_exp = exp_over_two_inline (pg, ax, d);
- /* Fall back to scalar for any special cases. */
+ /* Falls back to entirely standalone vectorized special case. */
if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, pg, t, special);
+ return special_case (pg, special, ax, half_exp, d);
- svfloat64_t half_t = svmul_x (svptrue_b64 (), t, 0.5);
- svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
- return svadd_x (pg, half_t, half_over_t);
+ svfloat64_t inv_twoexp = svdivr_x (pg, half_exp, 0.25);
+ return svadd_x (pg, half_exp, inv_twoexp);
}
diff --git a/sysdeps/aarch64/fpu/sinh_sve.c b/sysdeps/aarch64/fpu/sinh_sve.c
index df5f6c8c06e5b173..ac7b306018bda613 100644
--- a/sysdeps/aarch64/fpu/sinh_sve.c
+++ b/sysdeps/aarch64/fpu/sinh_sve.c
@@ -18,90 +18,153 @@
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
-#include "poly_sve_f64.h"
static const struct data
{
- float64_t poly[11];
- float64_t inv_ln2, m_ln2_hi, m_ln2_lo, shift;
uint64_t halff;
- int64_t onef;
- uint64_t large_bound;
+ double c2, c4;
+ double inv_ln2;
+ double ln2_hi, ln2_lo;
+ double c0, c1, c3;
+ double shift, special_bound, bound;
+ uint64_t expm1_data[20];
} data = {
- /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */
- .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
- 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10,
- 0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16,
- 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
- 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
-
- .inv_ln2 = 0x1.71547652b82fep0,
- .m_ln2_hi = -0x1.62e42fefa39efp-1,
- .m_ln2_lo = -0x1.abc9e3b39803fp-56,
- .shift = 0x1.8p52,
-
+ /* Table lookup of 2^(i/64) - 1, for values of i from 0..19. */
+ .expm1_data = {
+ 0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901,
+ 0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb,
+ 0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2,
+ 0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a,
+ 0x3fc837f0518db8a9, 0x3fc9e0459320b7fa, 0x3fcb8d39b9d54e55, 0x3fcd3ed9a72cffb7,
+ },
+
+ /* Generated using Remez, in [-log(2)/128, log(2)/128]. */
+ .c0 = 0x1p-1,
+ .c1 = 0x1.55555555548f9p-3,
+ .c2 = 0x1.5555555554c22p-5,
+ .c3 = 0x1.111123aaa2fb2p-7,
+ .c4 = 0x1.6c16d77d98e5bp-10,
+ .ln2_hi = 0x1.62e42fefa3800p-1,
+ .ln2_lo = 0x1.ef35793c76730p-45,
+ .inv_ln2 = 0x1.71547652b82fep+0,
+ .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023. */
.halff = 0x3fe0000000000000,
- .onef = 0x3ff0000000000000,
- /* 2^9. expm1 helper overflows for large input. */
- .large_bound = 0x4080000000000000,
+ .special_bound = 0x1.62e37e7d8ba72p+9, /* ln(2^(1024 - 1/128)). */
+ .bound = 0x1.a56ef8ec924ccp-3 /* 19*ln2/64. */
};
+/* A specialised FEXPA expm1 that is only valid for positive inputs and
+ has no special cases. Based off the full FEXPA expm1 implementated for
+ _ZGVsMxv_expm1, with a slightly modified file to keep sinh under 3.5ULP. */
static inline svfloat64_t
-expm1_inline (svfloat64_t x, svbool_t pg)
+expm1_inline (svbool_t pg, svfloat64_t x)
{
const struct data *d = ptr_barrier (&data);
- /* Reduce argument:
- exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
- where i = round(x / ln2)
- and f = x - i * ln2 (f in [-ln2/2, ln2/2]). */
- svfloat64_t j
- = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift);
- svint64_t i = svcvt_s64_x (pg, j);
- svfloat64_t f = svmla_x (pg, x, j, d->m_ln2_hi);
- f = svmla_x (pg, f, j, d->m_ln2_lo);
- /* Approximate expm1(f) using polynomial. */
- svfloat64_t f2 = svmul_x (pg, f, f);
- svfloat64_t f4 = svmul_x (pg, f2, f2);
- svfloat64_t f8 = svmul_x (pg, f4, f4);
- svfloat64_t p
- = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly));
- /* t = 2^i. */
- svfloat64_t t = svscale_x (pg, sv_f64 (1), i);
- /* expm1(x) ~= p * t + (t - 1). */
- return svmla_x (pg, svsub_x (pg, t, 1.0), p, t);
+ svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
+ svuint64_t u = svreinterpret_u64 (z);
+ svfloat64_t n = svsub_x (pg, z, d->shift);
+
+ svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
+ svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
+
+ svfloat64_t r = x;
+ r = svmls_lane (r, n, ln2, 0);
+ r = svmls_lane (r, n, ln2, 1);
+
+ svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+
+ svfloat64_t p;
+ svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+ svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+ p = svmad_x (pg, c34, r2, c12);
+ p = svmad_x (pg, p, r, sv_f64 (d->c0));
+ p = svmad_x (pg, p, r2, r);
+
+ svfloat64_t scale = svexpa (u);
+
+ /* We want to construct expm1(x) = (scale - 1) + scale * poly.
+ However, for values of scale close to 1, scale-1 causes large ULP errors
+ due to cancellation.
+
+ This can be circumvented by using a small lookup for scale-1
+ when our input is below a certain bound, otherwise we can use FEXPA. */
+ svbool_t is_small = svaclt (pg, x, d->bound);
+
+ /* Index via the input of FEXPA, but we only care about the lower 5 bits. */
+ svuint64_t base_idx = svand_x (pg, u, 0x1f);
+
+ /* Compute scale - 1 from FEXPA, and lookup values where this fails. */
+ svfloat64_t scalem1_estimate = svsub_x (pg, scale, sv_f64 (1.0));
+ svuint64_t scalem1_lookup
+ = svld1_gather_index (is_small, d->expm1_data, base_idx);
+
+ /* Select the appropriate scale - 1 value based on x. */
+ svfloat64_t scalem1
+ = svsel (is_small, svreinterpret_f64 (scalem1_lookup), scalem1_estimate);
+
+ /* return expm1 = scale - 1 + (scale * poly). */
+ return svmla_x (pg, scalem1, scale, p);
}
+/* Vectorised special case to handle values past where exp_inline overflows.
+ Halves the input value and uses the identity exp(x) = exp(x/2)^2 to double
+ the valid range of inputs, and returns inf for anything past that. */
static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svbool_t pg)
+special_case (svbool_t pg, svbool_t special, svfloat64_t ax,
+ svfloat64_t halfsign, const struct data *d)
{
- return sv_call_f64 (sinh, x, x, pg);
+ /* Halves input value, and then check if any cases
+ are still going to overflow. */
+ ax = svmul_x (special, ax, 0.5);
+ svbool_t is_safe = svaclt (special, ax, d->special_bound);
+
+ svfloat64_t t = expm1_inline (pg, ax);
+
+ /* Finish fastpass to compute values for non-special cases. */
+ svfloat64_t y = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0)));
+ y = svmul_x (pg, y, halfsign);
+
+ /* Computes special lane, and set remaining overflow lanes to inf. */
+ svfloat64_t half_special_y = svmul_x (svptrue_b64 (), t, halfsign);
+ svfloat64_t special_y = svmul_x (svptrue_b64 (), half_special_y, t);
+
+ svuint64_t signed_inf
+ = svorr_x (svptrue_b64 (), svreinterpret_u64 (halfsign),
+ sv_u64 (0x7ff0000000000000));
+ special_y = svsel (is_safe, special_y, svreinterpret_f64 (signed_inf));
+
+ /* Join resulting vectors together and return. */
+ return svsel (special, special_y, y);
}
-/* Approximation for SVE double-precision sinh(x) using expm1.
- sinh(x) = (exp(x) - exp(-x)) / 2.
- The greatest observed error is 2.57 ULP:
- _ZGVsMxv_sinh (0x1.a008538399931p-2) got 0x1.ab929fc64bd66p-2
- want 0x1.ab929fc64bd63p-2. */
+/* Approximation for SVE double-precision sinh(x) using FEXPA expm1.
+ Uses sinh(x) = e^2x - 1 / 2e^x, rewritten for accuracy.
+ The greatest observed error in the non-special region is 2.63 + 0.5 ULP:
+ _ZGVsMxv_sinh (0x1.b5e0e13ba88aep-2) got 0x1.c3587faf97b0cp-2
+ want 0x1.c3587faf97b09p-2
+
+ The greatest observed error in the special region is 2.65 + 0.5 ULP:
+ _ZGVsMxv_sinh (0x1.633ce847dab1ap+9) got 0x1.fffd30eea0066p+1023
+ want 0x1.fffd30eea0063p+1023. */
svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
+ svbool_t special = svacge (pg, x, d->special_bound);
svfloat64_t ax = svabs_x (pg, x);
svuint64_t sign
= sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax));
svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, d->halff));
- svbool_t special = svcmpge (pg, svreinterpret_u64 (ax), d->large_bound);
-
/* Fall back to scalar variant for all lanes if any are special. */
if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, pg);
+ return special_case (pg, special, ax, halfsign, d);
/* Up to the point that expm1 overflows, we can use it to calculate sinh
using a slight rearrangement of the definition of sinh. This allows us to
retain acceptable accuracy for very small inputs. */
- svfloat64_t t = expm1_inline (ax, pg);
+ svfloat64_t t = expm1_inline (pg, ax);
t = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0)));
return svmul_x (pg, t, halfsign);
}
diff --git a/sysdeps/aarch64/fpu/tanh_sve.c b/sysdeps/aarch64/fpu/tanh_sve.c
index d25e011cea305094..805669845d09e098 100644
--- a/sysdeps/aarch64/fpu/tanh_sve.c
+++ b/sysdeps/aarch64/fpu/tanh_sve.c
@@ -18,83 +18,117 @@
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
-#include "poly_sve_f64.h"
static const struct data
{
- float64_t poly[11];
- float64_t inv_ln2, ln2_hi, ln2_lo, shift;
- uint64_t thresh, tiny_bound;
+ double ln2_hi, ln2_lo;
+ double c2, c4;
+ double c0, c1, c3;
+ double two_over_ln2, shift;
+ uint64_t tiny_bound;
+ double large_bound, fexpa_bound;
+ uint64_t e2xm1_data[20];
} data = {
- /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */
- .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
- 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10,
- 0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16,
- 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
- 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
-
- .inv_ln2 = 0x1.71547652b82fep0,
- .ln2_hi = -0x1.62e42fefa39efp-1,
- .ln2_lo = -0x1.abc9e3b39803fp-56,
- .shift = 0x1.8p52,
-
+ /* Generated using Remez, in [-log(2)/128, log(2)/128]. */
+ .c0 = 0x1p-1,
+ .c1 = 0x1.55555555548f9p-3,
+ .c2 = 0x1.5555555554c22p-5,
+ .c3 = 0x1.111123aaa2fb2p-7,
+ .c4 = 0x1.6c16d77d98e5bp-10,
+ .ln2_hi = 0x1.62e42fefa3800p-1,
+ .ln2_lo = 0x1.ef35793c76730p-45,
+ .two_over_ln2 = 0x1.71547652b82fep+1,
+ .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023. */
.tiny_bound = 0x3e40000000000000, /* asuint64 (0x1p-27). */
- /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound). */
- .thresh = 0x01f241bf835f9d5f,
+ .large_bound = 0x1.30fc1931f09cap+4, /* arctanh(1 - 2^-54). */
+ .fexpa_bound = 0x1.a56ef8ec924ccp-4, /* 19/64 * ln2/2. */
+ /* Table lookup of 2^(i/64) - 1, for values of i from 0..19. */
+ .e2xm1_data = {
+ 0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901,
+ 0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb,
+ 0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2,
+ 0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a,
+ 0x3fc837f0518db8a9, 0x3fc9e0459320b7fa, 0x3fcb8d39b9d54e55, 0x3fcd3ed9a72cffb7,
+ },
};
+/* An expm1 inspired, FEXPA based helper function that returns an
+ accurate estimate for e^2x - 1. With no special case or support for
+ negative inputs of x. */
static inline svfloat64_t
-expm1_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
-{
- /* Helper routine for calculating exp(x) - 1. Vector port of the helper from
- the scalar variant of tanh. */
-
- /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
- svfloat64_t j
- = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift);
- svint64_t i = svcvt_s64_x (pg, j);
- svfloat64_t f = svmla_x (pg, x, j, d->ln2_hi);
- f = svmla_x (pg, f, j, d->ln2_lo);
-
- /* Approximate expm1(f) using polynomial. */
- svfloat64_t f2 = svmul_x (pg, f, f);
- svfloat64_t f4 = svmul_x (pg, f2, f2);
- svfloat64_t p = svmla_x (
- pg, f, f2,
- sv_estrin_10_f64_x (pg, f, f2, f4, svmul_x (pg, f4, f4), d->poly));
-
- /* t = 2 ^ i. */
- svfloat64_t t = svscale_x (pg, sv_f64 (1), i);
- /* expm1(x) = p * t + (t - 1). */
- return svmla_x (pg, svsub_x (pg, t, 1), p, t);
-}
-
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+e2xm1_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
{
- return sv_call_f64 (tanh, x, y, special);
+ svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->two_over_ln2);
+ svuint64_t u = svreinterpret_u64 (z);
+ svfloat64_t n = svsub_x (pg, z, d->shift);
+
+ /* r = x - n * ln2/2, r is in [-ln2/(2N), ln2/(2N)]. */
+ svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
+ svfloat64_t r = svadd_x (pg, x, x);
+ r = svmls_lane (r, n, ln2, 0);
+ r = svmls_lane (r, n, ln2, 1);
+
+ /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */
+ svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+ svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
+
+ svfloat64_t p;
+ svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+ svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+ p = svmad_x (pg, c34, r2, c12);
+ p = svmad_x (pg, p, r, sv_f64 (d->c0));
+ p = svmad_x (pg, p, r2, r);
+
+ svfloat64_t scale = svexpa (u);
+
+ /* We want to construct e2xm1(x) = (scale - 1) + scale * poly.
+ However, for values of scale close to 1, scale-1 causes large ULP errors
+ due to cancellation.
+
+ This can be circumvented by using a small lookup for scale-1
+ when our input is below a certain bound, otherwise we can use FEXPA. */
+ svbool_t is_small = svaclt (pg, x, d->fexpa_bound);
+
+ /* Index via the input of FEXPA, but we only care about the lower 5 bits. */
+ svuint64_t base_idx = svand_x (pg, u, 0x1f);
+
+ /* Compute scale - 1 from FEXPA, and lookup values where this fails. */
+ svfloat64_t scalem1_estimate = svsub_x (pg, scale, sv_f64 (1.0));
+ svuint64_t scalem1_lookup
+ = svld1_gather_index (is_small, d->e2xm1_data, base_idx);
+
+ /* Select the appropriate scale - 1 value based on x. */
+ svfloat64_t scalem1
+ = svsel (is_small, svreinterpret_f64 (scalem1_lookup), scalem1_estimate);
+ return svmla_x (pg, scalem1, scale, p);
}
-/* SVE approximation for double-precision tanh(x), using a simplified
- version of expm1. The greatest observed error is 2.77 ULP:
- _ZGVsMxv_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
- want -0x1.bd6a21a163624p-3. */
+/* SVE approximation for double-precision tanh(x), using a modified version of
+ FEXPA expm1 to calculate e^2x - 1.
+ The greatest observed error is 2.79 + 0.5 ULP:
+ _ZGVsMxv_tanh (0x1.fff868eb3c223p-9) got 0x1.fff7be486cae6p-9
+ want 0x1.fff7be486cae9p-9. */
svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
- svuint64_t ia = svreinterpret_u64 (svabs_x (pg, x));
+ svbool_t large = svacge (pg, x, d->large_bound);
- /* Trigger special-cases for tiny, boring and infinity/NaN. */
- svbool_t special = svcmpgt (pg, svsub_x (pg, ia, d->tiny_bound), d->thresh);
+ /* We can use tanh(x) = (e^2x - 1) / (e^2x + 1) to approximate tanh.
+ As an additional optimisation, we can ensure more accurate values of e^x
+ by only using positive inputs. So we calculate tanh(|x|), and restore the
+ sign of the input before returning. */
+ svfloat64_t ax = svabs_x (pg, x);
+ svuint64_t sign_bit
+ = sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax));
- svfloat64_t u = svadd_x (pg, x, x);
+ svfloat64_t p = e2xm1_inline (pg, ax, d);
+ svfloat64_t q = svadd_x (pg, p, 2);
- /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
- svfloat64_t q = expm1_inline (u, pg, d);
- svfloat64_t qp2 = svadd_x (pg, q, 2);
+ /* For sufficiently high inputs, the result of tanh(|x|) is 1 when correctly
+ rounded, at this point we can return 1 directly, with sign correction.
+ This will also act as a guard against our approximation overflowing. */
+ svfloat64_t y = svsel (large, sv_f64 (1.0), svdiv_x (pg, p, q));
- if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, svdiv_x (pg, q, qp2), special);
- return svdiv_x (pg, q, qp2);
+ return svreinterpret_f64 (svorr_x (pg, sign_bit, svreinterpret_u64 (y)));
}

673
glibc-RHEL-118273-4.patch Normal file
View File

@ -0,0 +1,673 @@
commit 81406ea3c5b5ad19e307302c13dd642785b47948
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Tue Feb 20 16:59:41 2024 +0000
aarch64/fpu: Add vector variants of asinh
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index 2e5bbb5a07f4c9b0..d474f2969dd05c26 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -1,6 +1,7 @@
libmvec-supported-funcs = acos \
acosh \
asin \
+ asinh \
atan \
atan2 \
cos \
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index 60e1cdeacec3f77e..08ea15efaec959fb 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -84,6 +84,11 @@ libmvec {
_ZGVnN4v_acoshf;
_ZGVsMxv_acosh;
_ZGVsMxv_acoshf;
+ _ZGVnN2v_asinh;
+ _ZGVnN2v_asinhf;
+ _ZGVnN4v_asinhf;
+ _ZGVsMxv_asinh;
+ _ZGVsMxv_asinhf;
_ZGVnN2v_cosh;
_ZGVnN2v_coshf;
_ZGVnN4v_coshf;
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index 22fec4de77395e60..1e80721c9f73ba12 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -20,6 +20,7 @@
libmvec_hidden_proto (V_NAME_F1(acos));
libmvec_hidden_proto (V_NAME_F1(acosh));
libmvec_hidden_proto (V_NAME_F1(asin));
+libmvec_hidden_proto (V_NAME_F1(asinh));
libmvec_hidden_proto (V_NAME_F1(atan));
libmvec_hidden_proto (V_NAME_F1(cos));
libmvec_hidden_proto (V_NAME_F1(cosh));
diff --git a/sysdeps/aarch64/fpu/asinh_advsimd.c b/sysdeps/aarch64/fpu/asinh_advsimd.c
new file mode 100644
index 0000000000000000..544a52f6515d3201
--- /dev/null
+++ b/sysdeps/aarch64/fpu/asinh_advsimd.c
@@ -0,0 +1,171 @@
+/* Double-precision vector (Advanced SIMD) asinh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+
+#define A(i) v_f64 (__v_log_data.poly[i])
+#define N (1 << V_LOG_TABLE_BITS)
+
+const static struct data
+{
+ float64x2_t poly[18];
+ uint64x2_t off, huge_bound, abs_mask;
+ float64x2_t ln2, tiny_bound;
+} data = {
+ .off = V2 (0x3fe6900900000000),
+ .ln2 = V2 (0x1.62e42fefa39efp-1),
+ .huge_bound = V2 (0x5fe0000000000000),
+ .tiny_bound = V2 (0x1p-26),
+ .abs_mask = V2 (0x7fffffffffffffff),
+ /* Even terms of polynomial s.t. asinh(x) is approximated by
+ asinh(x) ~= x + x^3 * (C0 + C1 * x + C2 * x^2 + C3 * x^3 + ...).
+ Generated using Remez, f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2). */
+ .poly = { V2 (-0x1.55555555554a7p-3), V2 (0x1.3333333326c7p-4),
+ V2 (-0x1.6db6db68332e6p-5), V2 (0x1.f1c71b26fb40dp-6),
+ V2 (-0x1.6e8b8b654a621p-6), V2 (0x1.1c4daa9e67871p-6),
+ V2 (-0x1.c9871d10885afp-7), V2 (0x1.7a16e8d9d2ecfp-7),
+ V2 (-0x1.3ddca533e9f54p-7), V2 (0x1.0becef748dafcp-7),
+ V2 (-0x1.b90c7099dd397p-8), V2 (0x1.541f2bb1ffe51p-8),
+ V2 (-0x1.d217026a669ecp-9), V2 (0x1.0b5c7977aaf7p-9),
+ V2 (-0x1.e0f37daef9127p-11), V2 (0x1.388b5fe542a6p-12),
+ V2 (-0x1.021a48685e287p-14), V2 (0x1.93d4ba83d34dap-18) },
+};
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+ return v_call_f64 (asinh, x, y, special);
+}
+
+struct entry
+{
+ float64x2_t invc;
+ float64x2_t logc;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+ float64x2_t e0 = vld1q_f64 (
+ &__v_log_data.table[(i[0] >> (52 - V_LOG_TABLE_BITS)) & (N - 1)].invc);
+ float64x2_t e1 = vld1q_f64 (
+ &__v_log_data.table[(i[1] >> (52 - V_LOG_TABLE_BITS)) & (N - 1)].invc);
+ return (struct entry){ vuzp1q_f64 (e0, e1), vuzp2q_f64 (e0, e1) };
+}
+
+static inline float64x2_t
+log_inline (float64x2_t x, const struct data *d)
+{
+ /* Double-precision vector log, copied from ordinary vector log with some
+ cosmetic modification and special-cases removed. */
+ uint64x2_t ix = vreinterpretq_u64_f64 (x);
+ uint64x2_t tmp = vsubq_u64 (ix, d->off);
+ int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
+ uint64x2_t iz
+ = vsubq_u64 (ix, vandq_u64 (tmp, vdupq_n_u64 (0xfffULL << 52)));
+ float64x2_t z = vreinterpretq_f64_u64 (iz);
+ struct entry e = lookup (tmp);
+ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+ float64x2_t kd = vcvtq_f64_s64 (k);
+ float64x2_t hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2);
+ float64x2_t r2 = vmulq_f64 (r, r);
+ float64x2_t y = vfmaq_f64 (A (2), A (3), r);
+ float64x2_t p = vfmaq_f64 (A (0), A (1), r);
+ y = vfmaq_f64 (y, A (4), r2);
+ y = vfmaq_f64 (p, y, r2);
+ y = vfmaq_f64 (hi, y, r2);
+ return y;
+}
+
+/* Double-precision implementation of vector asinh(x).
+ asinh is very sensitive around 1, so it is impractical to devise a single
+ low-cost algorithm which is sufficiently accurate on a wide range of input.
+ Instead we use two different algorithms:
+ asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1) if |x| >= 1
+ = sign(x) * (|x| + |x|^3 * P(x^2)) otherwise
+ where log(x) is an optimized log approximation, and P(x) is a polynomial
+ shared with the scalar routine. The greatest observed error 3.29 ULP, in
+ |x| >= 1:
+ __v_asinh(0x1.2cd9d717e2c9bp+0) got 0x1.ffffcfd0e234fp-1
+ want 0x1.ffffcfd0e2352p-1. */
+VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t ax = vabsq_f64 (x);
+ uint64x2_t iax = vreinterpretq_u64_f64 (ax);
+
+ uint64x2_t gt1 = vcgeq_f64 (ax, v_f64 (1));
+ uint64x2_t special = vcgeq_u64 (iax, d->huge_bound);
+
+#if WANT_SIMD_EXCEPT
+ uint64x2_t tiny = vcltq_f64 (ax, d->tiny_bound);
+ special = vorrq_u64 (special, tiny);
+#endif
+
+ /* Option 1: |x| >= 1.
+ Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)).
+ If WANT_SIMD_EXCEPT is enabled, sidestep special values, which will
+ overflow, by setting special lanes to 1. These will be fixed later. */
+ float64x2_t option_1 = v_f64 (0);
+ if (__glibc_likely (v_any_u64 (gt1)))
+ {
+#if WANT_SIMD_EXCEPT
+ float64x2_t xm = v_zerofy_f64 (ax, special);
+#else
+ float64x2_t xm = ax;
+#endif
+ option_1 = log_inline (
+ vaddq_f64 (xm, vsqrtq_f64 (vfmaq_f64 (v_f64 (1), xm, xm))), d);
+ }
+
+ /* Option 2: |x| < 1.
+ Compute asinh(x) using a polynomial.
+ If WANT_SIMD_EXCEPT is enabled, sidestep special lanes, which will
+ overflow, and tiny lanes, which will underflow, by setting them to 0. They
+ will be fixed later, either by selecting x or falling back to the scalar
+ special-case. The largest observed error in this region is 1.47 ULPs:
+ __v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
+ want 0x1.c1d6bf874019cp-1. */
+ float64x2_t option_2 = v_f64 (0);
+ if (__glibc_likely (v_any_u64 (vceqzq_u64 (gt1))))
+ {
+#if WANT_SIMD_EXCEPT
+ ax = v_zerofy_f64 (ax, vorrq_u64 (tiny, gt1));
+#endif
+ float64x2_t x2 = vmulq_f64 (ax, ax), x3 = vmulq_f64 (ax, x2),
+ z2 = vmulq_f64 (x2, x2), z4 = vmulq_f64 (z2, z2),
+ z8 = vmulq_f64 (z4, z4), z16 = vmulq_f64 (z8, z8);
+ float64x2_t p = v_estrin_17_f64 (x2, z2, z4, z8, z16, d->poly);
+ option_2 = vfmaq_f64 (ax, p, x3);
+#if WANT_SIMD_EXCEPT
+ option_2 = vbslq_f64 (tiny, x, option_2);
+#endif
+ }
+
+ /* Choose the right option for each lane. */
+ float64x2_t y = vbslq_f64 (gt1, option_1, option_2);
+ /* Copy sign. */
+ y = vbslq_f64 (d->abs_mask, y, x);
+
+ if (__glibc_unlikely (v_any_u64 (special)))
+ return special_case (x, y, special);
+ return y;
+}
diff --git a/sysdeps/aarch64/fpu/asinh_sve.c b/sysdeps/aarch64/fpu/asinh_sve.c
new file mode 100644
index 0000000000000000..28dc5c458750bac4
--- /dev/null
+++ b/sysdeps/aarch64/fpu/asinh_sve.c
@@ -0,0 +1,150 @@
+/* Double-precision vector (SVE) asinh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+
+#define SignMask (0x8000000000000000)
+#define One (0x3ff0000000000000)
+#define Thres (0x5fe0000000000000) /* asuint64 (0x1p511). */
+
+static const struct data
+{
+ double poly[18];
+ double ln2, p3, p1, p4, p0, p2;
+ uint64_t n;
+ uint64_t off;
+
+} data = {
+ /* Polynomial generated using Remez on [2^-26, 1]. */
+ .poly
+ = { -0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5,
+ 0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6,
+ -0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7,
+ 0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8,
+ -0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11,
+ 0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14, 0x1.93d4ba83d34dap-18 },
+ .ln2 = 0x1.62e42fefa39efp-1,
+ .p0 = -0x1.ffffffffffff7p-2,
+ .p1 = 0x1.55555555170d4p-2,
+ .p2 = -0x1.0000000399c27p-2,
+ .p3 = 0x1.999b2e90e94cap-3,
+ .p4 = -0x1.554e550bd501ep-3,
+ .n = 1 << V_LOG_TABLE_BITS,
+ .off = 0x3fe6900900000000
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (asinh, x, y, special);
+}
+
+static inline svfloat64_t
+__sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg)
+{
+ /* Double-precision SVE log, copied from SVE log implementation with some
+ cosmetic modification and special-cases removed. See that file for details
+ of the algorithm used. */
+
+ svuint64_t ix = svreinterpret_u64 (x);
+ svuint64_t tmp = svsub_x (pg, ix, d->off);
+ svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)),
+ (d->n - 1) << 1);
+ svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52);
+ svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
+ svfloat64_t z = svreinterpret_f64 (iz);
+
+ svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
+ svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
+
+ svfloat64_t ln2_p3 = svld1rq (svptrue_b64 (), &d->ln2);
+ svfloat64_t p1_p4 = svld1rq (svptrue_b64 (), &d->p1);
+
+ svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), invc, z);
+ svfloat64_t kd = svcvt_f64_x (pg, k);
+
+ svfloat64_t hi = svmla_lane (svadd_x (pg, logc, r), kd, ln2_p3, 0);
+ svfloat64_t r2 = svmul_x (pg, r, r);
+
+ svfloat64_t y = svmla_lane (sv_f64 (d->p2), r, ln2_p3, 1);
+
+ svfloat64_t p = svmla_lane (sv_f64 (d->p0), r, p1_p4, 0);
+ y = svmla_lane (y, r2, p1_p4, 1);
+ y = svmla_x (pg, p, r2, y);
+ y = svmla_x (pg, hi, r2, y);
+ return y;
+}
+
+/* Double-precision implementation of SVE asinh(x).
+ asinh is very sensitive around 1, so it is impractical to devise a single
+ low-cost algorithm which is sufficiently accurate on a wide range of input.
+ Instead we use two different algorithms:
+ asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1) if |x| >= 1
+ = sign(x) * (|x| + |x|^3 * P(x^2)) otherwise
+ where log(x) is an optimized log approximation, and P(x) is a polynomial
+ shared with the scalar routine. The greatest observed error 2.51 ULP, in
+ |x| >= 1:
+ _ZGVsMxv_asinh(0x1.170469d024505p+0) got 0x1.e3181c43b0f36p-1
+ want 0x1.e3181c43b0f39p-1. */
+svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svuint64_t ix = svreinterpret_u64 (x);
+ svuint64_t iax = svbic_x (pg, ix, SignMask);
+ svuint64_t sign = svand_x (pg, ix, SignMask);
+ svfloat64_t ax = svreinterpret_f64 (iax);
+
+ svbool_t ge1 = svcmpge (pg, iax, One);
+ svbool_t special = svcmpge (pg, iax, Thres);
+
+ /* Option 1: |x| >= 1.
+ Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)). */
+ svfloat64_t option_1 = sv_f64 (0);
+ if (__glibc_likely (svptest_any (pg, ge1)))
+ {
+ svfloat64_t x2 = svmul_x (pg, ax, ax);
+ option_1 = __sv_log_inline (
+ svadd_x (pg, ax, svsqrt_x (pg, svadd_x (pg, x2, 1))), d, pg);
+ }
+
+ /* Option 2: |x| < 1.
+ Compute asinh(x) using a polynomial.
+ The largest observed error in this region is 1.51 ULPs:
+ _ZGVsMxv_asinh(0x1.fe12bf8c616a2p-1) got 0x1.c1e649ee2681bp-1
+ want 0x1.c1e649ee2681dp-1. */
+ svfloat64_t option_2 = sv_f64 (0);
+ if (__glibc_likely (svptest_any (pg, svnot_z (pg, ge1))))
+ {
+ svfloat64_t x2 = svmul_x (pg, ax, ax);
+ svfloat64_t x4 = svmul_x (pg, x2, x2);
+ svfloat64_t p = sv_pw_horner_17_f64_x (pg, x2, x4, d->poly);
+ option_2 = svmla_x (pg, ax, p, svmul_x (pg, x2, ax));
+ }
+
+ /* Choose the right option for each lane. */
+ svfloat64_t y = svsel (ge1, option_1, option_2);
+
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (
+ x, svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign)),
+ special);
+ return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
+}
diff --git a/sysdeps/aarch64/fpu/asinhf_advsimd.c b/sysdeps/aarch64/fpu/asinhf_advsimd.c
new file mode 100644
index 0000000000000000..09fd8a614305563d
--- /dev/null
+++ b/sysdeps/aarch64/fpu/asinhf_advsimd.c
@@ -0,0 +1,80 @@
+/* Single-precision vector (Advanced SIMD) asinh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_math.h"
+#include "v_log1pf_inline.h"
+
+#define SignMask v_u32 (0x80000000)
+
+const static struct data
+{
+ struct v_log1pf_data log1pf_consts;
+ uint32x4_t big_bound;
+#if WANT_SIMD_EXCEPT
+ uint32x4_t tiny_bound;
+#endif
+} data = {
+ .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
+ .big_bound = V4 (0x5f800000), /* asuint(0x1p64). */
+#if WANT_SIMD_EXCEPT
+ .tiny_bound = V4 (0x30800000) /* asuint(0x1p-30). */
+#endif
+};
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (asinhf, x, y, special);
+}
+
+/* Single-precision implementation of vector asinh(x), using vector log1p.
+ Worst-case error is 2.66 ULP, at roughly +/-0.25:
+ __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3. */
+VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (asinh) (float32x4_t x)
+{
+ const struct data *dat = ptr_barrier (&data);
+ uint32x4_t iax = vbicq_u32 (vreinterpretq_u32_f32 (x), SignMask);
+ float32x4_t ax = vreinterpretq_f32_u32 (iax);
+ uint32x4_t special = vcgeq_u32 (iax, dat->big_bound);
+ float32x4_t special_arg = x;
+
+#if WANT_SIMD_EXCEPT
+ /* Sidestep tiny and large values to avoid inadvertently triggering
+ under/overflow. */
+ special = vorrq_u32 (special, vcltq_u32 (iax, dat->tiny_bound));
+ if (__glibc_unlikely (v_any_u32 (special)))
+ {
+ ax = v_zerofy_f32 (ax, special);
+ x = v_zerofy_f32 (x, special);
+ }
+#endif
+
+ /* asinh(x) = log(x + sqrt(x * x + 1)).
+ For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))). */
+ float32x4_t d
+ = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (v_f32 (1), x, x)));
+ float32x4_t y = log1pf_inline (
+ vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)), dat->log1pf_consts);
+
+ if (__glibc_unlikely (v_any_u32 (special)))
+ return special_case (special_arg, vbslq_f32 (SignMask, x, y), special);
+ return vbslq_f32 (SignMask, x, y);
+}
+libmvec_hidden_def (V_NAME_F1 (asinh))
+HALF_WIDTH_ALIAS_F1 (asinh)
diff --git a/sysdeps/aarch64/fpu/asinhf_sve.c b/sysdeps/aarch64/fpu/asinhf_sve.c
new file mode 100644
index 0000000000000000..d85c3a685c0b83ff
--- /dev/null
+++ b/sysdeps/aarch64/fpu/asinhf_sve.c
@@ -0,0 +1,56 @@
+/* Single-precision vector (SVE) asinh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_math.h"
+#include "sv_log1pf_inline.h"
+
+#define BigBound (0x5f800000) /* asuint(0x1p64). */
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (asinhf, x, y, special);
+}
+
+/* Single-precision SVE asinh(x) routine. Implements the same algorithm as
+ vector asinhf and log1p.
+
+ Maximum error is 2.48 ULPs:
+ SV_NAME_F1 (asinh) (0x1.008864p-3) got 0x1.ffbbbcp-4
+ want 0x1.ffbbb8p-4. */
+svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
+{
+ svfloat32_t ax = svabs_x (pg, x);
+ svuint32_t iax = svreinterpret_u32 (ax);
+ svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
+ svbool_t special = svcmpge (pg, iax, BigBound);
+
+ /* asinh(x) = log(x + sqrt(x * x + 1)).
+ For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))). */
+ svfloat32_t ax2 = svmul_x (pg, ax, ax);
+ svfloat32_t d = svadd_x (pg, svsqrt_x (pg, svadd_x (pg, ax2, 1.0f)), 1.0f);
+ svfloat32_t y
+ = sv_log1pf_inline (svadd_x (pg, ax, svdiv_x (pg, ax2, d)), pg);
+
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (
+ x, svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y))),
+ special);
+ return svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y)));
+}
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index 841330956c102ff1..eb2af35b27757fc6 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -41,6 +41,10 @@
# define __DECL_SIMD_asin __DECL_SIMD_aarch64
# undef __DECL_SIMD_asinf
# define __DECL_SIMD_asinf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_asinh
+# define __DECL_SIMD_asinh __DECL_SIMD_aarch64
+# undef __DECL_SIMD_asinhf
+# define __DECL_SIMD_asinhf __DECL_SIMD_aarch64
# undef __DECL_SIMD_atan
# define __DECL_SIMD_atan __DECL_SIMD_aarch64
# undef __DECL_SIMD_atanf
@@ -131,6 +135,7 @@ __vpcs __f32x4_t _ZGVnN4vv_atan2f (__f32x4_t, __f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_acosf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
@@ -150,6 +155,7 @@ __vpcs __f64x2_t _ZGVnN2vv_atan2 (__f64x2_t, __f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_acos (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
@@ -174,6 +180,7 @@ __sv_f32_t _ZGVsMxvv_atan2f (__sv_f32_t, __sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_acosf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_acoshf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_asinf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_asinhf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_atanf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t);
@@ -193,6 +200,7 @@ __sv_f64_t _ZGVsMxvv_atan2 (__sv_f64_t, __sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_acos (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_acosh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_asin (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_asinh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_atan (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t);
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index f4ce1d70096888aa..3d7177c32dcd77a6 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -26,6 +26,7 @@
VPCS_VECTOR_WRAPPER (acos_advsimd, _ZGVnN2v_acos)
VPCS_VECTOR_WRAPPER (acosh_advsimd, _ZGVnN2v_acosh)
VPCS_VECTOR_WRAPPER (asin_advsimd, _ZGVnN2v_asin)
+VPCS_VECTOR_WRAPPER (asinh_advsimd, _ZGVnN2v_asinh)
VPCS_VECTOR_WRAPPER (atan_advsimd, _ZGVnN2v_atan)
VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2)
VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index 0e973cc9d7ade813..b88a2afe5c1198c0 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -45,6 +45,7 @@
SVE_VECTOR_WRAPPER (acos_sve, _ZGVsMxv_acos)
SVE_VECTOR_WRAPPER (acosh_sve, _ZGVsMxv_acosh)
SVE_VECTOR_WRAPPER (asin_sve, _ZGVsMxv_asin)
+SVE_VECTOR_WRAPPER (asinh_sve, _ZGVsMxv_asinh)
SVE_VECTOR_WRAPPER (atan_sve, _ZGVsMxv_atan)
SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2)
SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index 0ce026b5ea96a064..533655402d3f3737 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -26,6 +26,7 @@
VPCS_VECTOR_WRAPPER (acosf_advsimd, _ZGVnN4v_acosf)
VPCS_VECTOR_WRAPPER (acoshf_advsimd, _ZGVnN4v_acoshf)
VPCS_VECTOR_WRAPPER (asinf_advsimd, _ZGVnN4v_asinf)
+VPCS_VECTOR_WRAPPER (asinhf_advsimd, _ZGVnN4v_asinhf)
VPCS_VECTOR_WRAPPER (atanf_advsimd, _ZGVnN4v_atanf)
VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f)
VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index 398b7373e800cd5b..f7b673e3358e7d82 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -45,6 +45,7 @@
SVE_VECTOR_WRAPPER (acosf_sve, _ZGVsMxv_acosf)
SVE_VECTOR_WRAPPER (acoshf_sve, _ZGVsMxv_acoshf)
SVE_VECTOR_WRAPPER (asinf_sve, _ZGVsMxv_asinf)
+SVE_VECTOR_WRAPPER (asinhf_sve, _ZGVsMxv_asinhf)
SVE_VECTOR_WRAPPER (atanf_sve, _ZGVsMxv_atanf)
SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f)
SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 1646cdbdd22d93d9..b916e422432014c2 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -90,11 +90,19 @@ double: 2
float: 2
ldouble: 4
+Function: "asinh_advsimd":
+double: 1
+float: 2
+
Function: "asinh_downward":
double: 3
float: 3
ldouble: 4
+Function: "asinh_sve":
+double: 1
+float: 2
+
Function: "asinh_towardzero":
double: 2
float: 2
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index f5aaa519f2c8663e..f288afdfdd9c8757 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -75,15 +75,20 @@ GLIBC_2.39 _ZGVsMxvv_atan2 F
GLIBC_2.39 _ZGVsMxvv_atan2f F
GLIBC_2.40 _ZGVnN2v_acosh F
GLIBC_2.40 _ZGVnN2v_acoshf F
+GLIBC_2.40 _ZGVnN2v_asinh F
+GLIBC_2.40 _ZGVnN2v_asinhf F
GLIBC_2.40 _ZGVnN2v_cosh F
GLIBC_2.40 _ZGVnN2v_coshf F
GLIBC_2.40 _ZGVnN2v_erf F
GLIBC_2.40 _ZGVnN2v_erff F
GLIBC_2.40 _ZGVnN4v_acoshf F
+GLIBC_2.40 _ZGVnN4v_asinhf F
GLIBC_2.40 _ZGVnN4v_coshf F
GLIBC_2.40 _ZGVnN4v_erff F
GLIBC_2.40 _ZGVsMxv_acosh F
GLIBC_2.40 _ZGVsMxv_acoshf F
+GLIBC_2.40 _ZGVsMxv_asinh F
+GLIBC_2.40 _ZGVsMxv_asinhf F
GLIBC_2.40 _ZGVsMxv_cosh F
GLIBC_2.40 _ZGVsMxv_coshf F
GLIBC_2.40 _ZGVsMxv_erf F

521
glibc-RHEL-118273-40.patch Normal file
View File

@ -0,0 +1,521 @@
commit 1e3d1ddf977ecd653de8d0d10eb083d80ac21cf3
Author: Dylan Fleming <Dylan.Fleming@arm.com>
Date: Wed Jun 18 16:17:12 2025 +0000
AArch64: Optimize SVE exp functions
Improve performance of SVE exps by making better use
of the SVE FEXPA instruction.
Performance improvement on Neoverse V1:
exp2_sve: 21%
exp2f_sve: 24%
exp10f_sve: 23%
expm1_sve: 25%
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/exp10f_sve.c b/sysdeps/aarch64/fpu/exp10f_sve.c
index 8aa3fa9c4335cfb8..0a4c26450601a1db 100644
--- a/sysdeps/aarch64/fpu/exp10f_sve.c
+++ b/sysdeps/aarch64/fpu/exp10f_sve.c
@@ -19,26 +19,19 @@
#include "sv_math.h"
-/* For x < -Thres, the result is subnormal and not handled correctly by
- FEXPA. */
-#define Thres 37.9
+/* For x < -Thres (-log10(2^126)), the result is subnormal and not handled
+ correctly by FEXPA. */
+#define Thres 0x1.2f702p+5
static const struct data
{
- float log2_10_lo, c0, c2, c4;
- float c1, c3, log10_2;
- float shift, log2_10_hi, thres;
+ float log10_2, log2_10_hi, log2_10_lo, c1;
+ float c0, shift, thres;
} data = {
/* Coefficients generated using Remez algorithm with minimisation of relative
- error.
- rel error: 0x1.89dafa3p-24
- abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
- maxerr: 0.52 +0.5 ulp. */
- .c0 = 0x1.26bb16p+1f,
- .c1 = 0x1.5350d2p+1f,
- .c2 = 0x1.04744ap+1f,
- .c3 = 0x1.2d8176p+0f,
- .c4 = 0x1.12b41ap-1f,
+ error. */
+ .c0 = 0x1.26bb62p1,
+ .c1 = 0x1.53524cp1,
/* 1.5*2^17 + 127, a shift value suitable for FEXPA. */
.shift = 0x1.803f8p17f,
.log10_2 = 0x1.a934fp+1,
@@ -53,28 +46,23 @@ sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
/* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
with poly(r) in [1/sqrt(2), sqrt(2)] and
x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N]. */
-
- svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log2_10_lo);
+ svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->log10_2);
/* n = round(x/(log10(2)/N)). */
svfloat32_t shift = sv_f32 (d->shift);
- svfloat32_t z = svmad_x (pg, sv_f32 (d->log10_2), x, shift);
- svfloat32_t n = svsub_x (svptrue_b32 (), z, shift);
+ svfloat32_t z = svmla_lane (shift, x, lane_consts, 0);
+ svfloat32_t n = svsub_x (pg, z, shift);
/* r = x - n*log10(2)/N. */
- svfloat32_t r = svmsb_x (pg, sv_f32 (d->log2_10_hi), n, x);
- r = svmls_lane (r, n, lane_consts, 0);
+ svfloat32_t r = x;
+ r = svmls_lane (r, n, lane_consts, 1);
+ r = svmls_lane (r, n, lane_consts, 2);
svfloat32_t scale = svexpa (svreinterpret_u32 (z));
/* Polynomial evaluation: poly(r) ~ exp10(r)-1. */
- svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, lane_consts, 2);
- svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, lane_consts, 3);
- svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
- svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
- svfloat32_t p0 = svmul_lane (r, lane_consts, 1);
- svfloat32_t poly = svmla_x (pg, p0, r2, p14);
-
+ svfloat32_t poly = svmla_lane (sv_f32 (d->c0), r, lane_consts, 3);
+ poly = svmul_x (pg, poly, r);
return svmla_x (pg, scale, scale, poly);
}
@@ -85,11 +73,10 @@ special_case (svfloat32_t x, svbool_t special, const struct data *d)
special);
}
-/* Single-precision SVE exp10f routine. Implements the same algorithm
- as AdvSIMD exp10f.
- Worst case error is 1.02 ULPs.
- _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
- want 0x1.ba5f9cp-1. */
+/* Single-precision SVE exp10f routine. Based on the FEXPA instruction.
+ Worst case error is 1.10 ULP.
+ _ZGVsMxv_exp10f (0x1.cc76dep+3) got 0x1.be0172p+47
+ want 0x1.be017p+47. */
svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
diff --git a/sysdeps/aarch64/fpu/exp2_sve.c b/sysdeps/aarch64/fpu/exp2_sve.c
index 5dfb77cdbc2f6a51..ed11423e45059133 100644
--- a/sysdeps/aarch64/fpu/exp2_sve.c
+++ b/sysdeps/aarch64/fpu/exp2_sve.c
@@ -19,23 +19,21 @@
#include "sv_math.h"
-#define N (1 << V_EXP_TABLE_BITS)
-
#define BigBound 1022
#define UOFlowBound 1280
static const struct data
{
- double c0, c2;
- double c1, c3;
+ double c2, c4;
+ double c0, c1, c3;
double shift, big_bound, uoflow_bound;
} data = {
/* Coefficients are computed using Remez algorithm with
minimisation of the absolute error. */
- .c0 = 0x1.62e42fefa3686p-1, .c1 = 0x1.ebfbdff82c241p-3,
- .c2 = 0x1.c6b09b16de99ap-5, .c3 = 0x1.3b2abf5571ad8p-7,
- .shift = 0x1.8p52 / N, .uoflow_bound = UOFlowBound,
- .big_bound = BigBound,
+ .c0 = 0x1.62e42fefa39efp-1, .c1 = 0x1.ebfbdff82a31bp-3,
+ .c2 = 0x1.c6b08d706c8a5p-5, .c3 = 0x1.3b2ad2ff7d2f3p-7,
+ .c4 = 0x1.5d8761184beb3p-10, .shift = 0x1.800000000ffc0p+46,
+ .uoflow_bound = UOFlowBound, .big_bound = BigBound,
};
#define SpecialOffset 0x6000000000000000 /* 0x1p513. */
@@ -64,50 +62,52 @@ special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n,
svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
/* |n| > 1280 => 2^(n) overflows. */
- svbool_t p_cmp = svacgt (pg, n, d->uoflow_bound);
+ svbool_t p_cmp = svacle (pg, n, d->uoflow_bound);
svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
svfloat64_t r2 = svmla_x (pg, s2, s2, y);
svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
- return svsel (p_cmp, r1, r0);
+ return svsel (p_cmp, r0, r1);
}
/* Fast vector implementation of exp2.
- Maximum measured error is 1.65 ulp.
- _ZGVsMxv_exp2(-0x1.4c264ab5b559bp-6) got 0x1.f8db0d4df721fp-1
- want 0x1.f8db0d4df721dp-1. */
+ Maximum measured error is 0.52 + 0.5 ulp.
+ _ZGVsMxv_exp2 (0x1.3b72ad5b701bfp-1) got 0x1.8861641b49e08p+0
+ want 0x1.8861641b49e07p+0. */
svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
- svbool_t no_big_scale = svacle (pg, x, d->big_bound);
- svbool_t special = svnot_z (pg, no_big_scale);
-
- /* Reduce x to k/N + r, where k is integer and r in [-1/2N, 1/2N]. */
- svfloat64_t shift = sv_f64 (d->shift);
- svfloat64_t kd = svadd_x (pg, x, shift);
- svuint64_t ki = svreinterpret_u64 (kd);
- /* kd = k/N. */
- kd = svsub_x (pg, kd, shift);
- svfloat64_t r = svsub_x (pg, x, kd);
-
- /* scale ~= 2^(k/N). */
- svuint64_t idx = svand_x (pg, ki, N - 1);
- svuint64_t sbits = svld1_gather_index (pg, __v_exp_data, idx);
- /* This is only a valid scale when -1023*N < k < 1024*N. */
- svuint64_t top = svlsl_x (pg, ki, 52 - V_EXP_TABLE_BITS);
- svfloat64_t scale = svreinterpret_f64 (svadd_x (pg, sbits, top));
-
- svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1);
- /* Approximate exp2(r) using polynomial. */
- /* y = exp2(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4. */
+ svbool_t special = svacge (pg, x, d->big_bound);
+
+ svfloat64_t z = svadd_x (svptrue_b64 (), x, d->shift);
+ svfloat64_t n = svsub_x (svptrue_b64 (), z, d->shift);
+ svfloat64_t r = svsub_x (svptrue_b64 (), x, n);
+
+ svfloat64_t scale = svexpa (svreinterpret_u64 (z));
+
svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
- svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), r, c13, 0);
- svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), r, c13, 1);
- svfloat64_t p = svmla_x (pg, p01, p23, r2);
+ svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
+
+ /* Approximate exp2(r) using polynomial. */
+ /* y = exp2(r) - 1 ~= r * (C0 + C1 r + C2 r^2 + C3 r^3 + C4 r^4). */
+ svfloat64_t p12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+ svfloat64_t p34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+ svfloat64_t p = svmla_x (pg, p12, p34, r2);
+ p = svmad_x (pg, p, r, d->c0);
svfloat64_t y = svmul_x (svptrue_b64 (), r, p);
+
/* Assemble exp2(x) = exp2(r) * scale. */
if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (pg, scale, y, kd, d);
+ {
+ /* FEXPA zeroes the sign bit, however the sign is meaningful to the
+ special case function so needs to be copied.
+ e = sign bit of u << 46. */
+ svuint64_t e = svand_x (pg, svlsl_x (pg, svreinterpret_u64 (z), 46),
+ 0x8000000000000000);
+ scale = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (scale)));
+ return special_case (pg, scale, y, n, d);
+ }
+
return svmla_x (pg, scale, scale, y);
}
diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c
index c6216bed9e9e7538..cf01820288f1855c 100644
--- a/sysdeps/aarch64/fpu/exp2f_sve.c
+++ b/sysdeps/aarch64/fpu/exp2f_sve.c
@@ -18,21 +18,17 @@
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
-#include "poly_sve_f32.h"
#define Thres 0x1.5d5e2ap+6f
static const struct data
{
- float c0, c2, c4, c1, c3;
- float shift, thres;
+ float c0, c1, shift, thres;
} data = {
- /* Coefficients copied from the polynomial in AdvSIMD variant. */
- .c0 = 0x1.62e422p-1f,
- .c1 = 0x1.ebf9bcp-3f,
- .c2 = 0x1.c6bd32p-5f,
- .c3 = 0x1.3ce9e4p-7f,
- .c4 = 0x1.59977ap-10f,
+ /* Coefficients generated using Remez algorithm with minimisation of relative
+ error. */
+ .c0 = 0x1.62e485p-1,
+ .c1 = 0x1.ebfbe0p-3,
/* 1.5*2^17 + 127. */
.shift = 0x1.803f8p17f,
/* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
@@ -51,16 +47,8 @@ sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
svfloat32_t scale = svexpa (svreinterpret_u32 (z));
- /* Polynomial evaluation: poly(r) ~ exp2(r)-1.
- Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for
- coefficients 1 to 4, and apply most significant coefficient directly. */
- svfloat32_t even_coeffs = svld1rq (svptrue_b32 (), &d->c0);
- svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
- svfloat32_t p12 = svmla_lane (sv_f32 (d->c1), r, even_coeffs, 1);
- svfloat32_t p34 = svmla_lane (sv_f32 (d->c3), r, even_coeffs, 2);
- svfloat32_t p14 = svmla_x (pg, p12, r2, p34);
- svfloat32_t p0 = svmul_lane (r, even_coeffs, 0);
- svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+ svfloat32_t poly = svmla_x (pg, sv_f32 (d->c0), r, sv_f32 (d->c1));
+ poly = svmul_x (svptrue_b32 (), poly, r);
return svmla_x (pg, scale, scale, poly);
}
@@ -72,11 +60,10 @@ special_case (svfloat32_t x, svbool_t special, const struct data *d)
special);
}
-/* Single-precision SVE exp2f routine. Implements the same algorithm
- as AdvSIMD exp2f.
- Worst case error is 1.04 ULPs.
- _ZGVsMxv_exp2f(-0x1.af994ap-3) got 0x1.ba6a66p-1
- want 0x1.ba6a64p-1. */
+/* Single-precision SVE exp2f routine, based on the FEXPA instruction.
+ Worst case error is 1.09 ULPs.
+ _ZGVsMxv_exp2f (0x1.9a2a94p-1) got 0x1.be1054p+0
+ want 0x1.be1052p+0. */
svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
diff --git a/sysdeps/aarch64/fpu/expm1_sve.c b/sysdeps/aarch64/fpu/expm1_sve.c
index c933cf9c0eb2406b..4c35e0341d34aee0 100644
--- a/sysdeps/aarch64/fpu/expm1_sve.c
+++ b/sysdeps/aarch64/fpu/expm1_sve.c
@@ -18,82 +18,164 @@
<https://www.gnu.org/licenses/>. */
#include "sv_math.h"
-#include "poly_sve_f64.h"
-#define SpecialBound 0x1.62b7d369a5aa9p+9
-#define ExponentBias 0x3ff0000000000000
+#define FexpaBound 0x1.4cb5ecef28adap-3 /* 15*ln2/64. */
+#define SpecialBound 0x1.628c2855bfaddp+9 /* ln(2^(1023 + 1/128)). */
static const struct data
{
- double poly[11];
- double shift, inv_ln2, special_bound;
- /* To be loaded in one quad-word. */
+ double c2, c4;
+ double inv_ln2;
double ln2_hi, ln2_lo;
+ double c0, c1, c3;
+ double shift, thres;
+ uint64_t expm1_data[32];
} data = {
- /* Generated using fpminimax. */
- .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
- 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10, 0x1.a01a01affa35dp-13,
- 0x1.a01a018b4ecbbp-16, 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
- 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
-
- .special_bound = SpecialBound,
- .inv_ln2 = 0x1.71547652b82fep0,
- .ln2_hi = 0x1.62e42fefa39efp-1,
- .ln2_lo = 0x1.abc9e3b39803fp-56,
- .shift = 0x1.8p52,
+ /* Table emulating FEXPA - 1, for values of FEXPA close to 1.
+ The table holds values of 2^(i/64) - 1, computed in arbitrary precision.
+ The first half of the table stores values associated to i from 0 to 15.
+ The second half of the table stores values associated to i from 0 to -15. */
+ .expm1_data = {
+ 0x0000000000000000, 0x3f864d1f3bc03077, 0x3f966c34c5615d0f, 0x3fa0e8a30eb37901,
+ 0x3fa6ab0d9f3121ec, 0x3fac7d865a7a3440, 0x3fb1301d0125b50a, 0x3fb429aaea92ddfb,
+ 0x3fb72b83c7d517ae, 0x3fba35beb6fcb754, 0x3fbd4873168b9aa8, 0x3fc031dc431466b2,
+ 0x3fc1c3d373ab11c3, 0x3fc35a2b2f13e6e9, 0x3fc4f4efa8fef709, 0x3fc6942d3720185a,
+ 0x0000000000000000, 0xbfc331751ec3a814, 0xbfc20224341286e4, 0xbfc0cf85bed0f8b7,
+ 0xbfbf332113d56b1f, 0xbfbcc0768d4175a6, 0xbfba46f918837cb7, 0xbfb7c695afc3b424,
+ 0xbfb53f391822dbc7, 0xbfb2b0cfe1266bd4, 0xbfb01b466423250a, 0xbfaafd11874c009e,
+ 0xbfa5b505d5b6f268, 0xbfa05e4119ea5d89, 0xbf95f134923757f3, 0xbf860f9f985bc9f4,
+ },
+
+ /* Generated using Remez, in [-log(2)/128, log(2)/128]. */
+ .c0 = 0x1p-1,
+ .c1 = 0x1.55555555548f9p-3,
+ .c2 = 0x1.5555555554c22p-5,
+ .c3 = 0x1.111123aaa2fb2p-7,
+ .c4 = 0x1.6c16d77d98e5bp-10,
+ .ln2_hi = 0x1.62e42fefa3800p-1,
+ .ln2_lo = 0x1.ef35793c76730p-45,
+ .inv_ln2 = 0x1.71547652b82fep+0,
+ .shift = 0x1.800000000ffc0p+46, /* 1.5*2^46+1023. */
+ .thres = SpecialBound,
};
-static svfloat64_t NOINLINE
-special_case (svfloat64_t x, svfloat64_t y, svbool_t pg)
+#define SpecialOffset 0x6000000000000000 /* 0x1p513. */
+/* SpecialBias1 + SpecialBias1 = asuint(1.0). */
+#define SpecialBias1 0x7000000000000000 /* 0x1p769. */
+#define SpecialBias2 0x3010000000000000 /* 0x1p-254. */
+
+static NOINLINE svfloat64_t
+special_case (svbool_t pg, svfloat64_t y, svfloat64_t s, svfloat64_t p,
+ svfloat64_t n)
{
- return sv_call_f64 (expm1, x, y, pg);
+ /* s=2^n may overflow, break it up into s=s1*s2,
+ such that exp = s + s*y can be computed as s1*(s2+s2*y)
+ and s1*s1 overflows only if n>0. */
+
+ /* If n<=0 then set b to 0x6, 0 otherwise. */
+ svbool_t p_sign = svcmple (pg, n, 0.0); /* n <= 0. */
+ svuint64_t b
+ = svdup_u64_z (p_sign, SpecialOffset); /* Inactive lanes set to 0. */
+
+ /* Set s1 to generate overflow depending on sign of exponent n,
+ ie. s1 = 0x70...0 - b. */
+ svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1));
+ /* Offset s to avoid overflow in final result if n is below threshold.
+ ie. s2 = as_u64 (s) - 0x3010...0 + b. */
+ svfloat64_t s2 = svreinterpret_f64 (
+ svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
+
+ /* |n| > 1280 => 2^(n) overflows. */
+ svbool_t p_cmp = svacgt (pg, n, 1280.0);
+
+ svfloat64_t r1 = svmul_x (svptrue_b64 (), s1, s1);
+ svfloat64_t r2 = svmla_x (pg, s2, s2, p);
+ svfloat64_t r0 = svmul_x (svptrue_b64 (), r2, s1);
+
+ svbool_t is_safe = svacle (pg, n, 1023); /* Only correct special lanes. */
+ return svsel (is_safe, y, svsub_x (pg, svsel (p_cmp, r1, r0), 1.0));
}
-/* Double-precision vector exp(x) - 1 function.
- The maximum error observed error is 2.18 ULP:
- _ZGVsMxv_expm1(0x1.634ba0c237d7bp-2) got 0x1.a8b9ea8d66e22p-2
- want 0x1.a8b9ea8d66e2p-2. */
+/* FEXPA based SVE expm1 algorithm.
+ Maximum measured error is 2.81 + 0.5 ULP:
+ _ZGVsMxv_expm1 (0x1.974060e619bfp-3) got 0x1.c290e5858bb53p-3
+ want 0x1.c290e5858bb5p-3. */
svfloat64_t SV_NAME_D1 (expm1) (svfloat64_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
- /* Large, Nan/Inf. */
- svbool_t special = svnot_z (pg, svaclt (pg, x, d->special_bound));
-
- /* Reduce argument to smaller range:
- Let i = round(x / ln2)
- and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
- exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
- where 2^i is exact because i is an integer. */
- svfloat64_t shift = sv_f64 (d->shift);
- svfloat64_t n = svsub_x (pg, svmla_x (pg, shift, x, d->inv_ln2), shift);
- svint64_t i = svcvt_s64_x (pg, n);
- svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
- svfloat64_t f = svmls_lane (x, n, ln2, 0);
- f = svmls_lane (f, n, ln2, 1);
-
- /* Approximate expm1(f) using polynomial.
- Taylor expansion for expm1(x) has the form:
- x + ax^2 + bx^3 + cx^4 ....
- So we calculate the polynomial P(f) = a + bf + cf^2 + ...
- and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
- svfloat64_t f2 = svmul_x (pg, f, f);
- svfloat64_t f4 = svmul_x (pg, f2, f2);
- svfloat64_t f8 = svmul_x (pg, f4, f4);
- svfloat64_t p
- = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly));
-
- /* Assemble the result.
- expm1(x) ~= 2^i * (p + 1) - 1
- Let t = 2^i. */
- svint64_t u = svadd_x (pg, svlsl_x (pg, i, 52), ExponentBias);
- svfloat64_t t = svreinterpret_f64 (u);
-
- /* expm1(x) ~= p * t + (t - 1). */
- svfloat64_t y = svmla_x (pg, svsub_x (pg, t, 1), p, t);
+ svbool_t special = svacgt (pg, x, d->thres);
- if (__glibc_unlikely (svptest_any (pg, special)))
- return special_case (x, y, special);
+ svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
+ svuint64_t u = svreinterpret_u64 (z);
+ svfloat64_t n = svsub_x (pg, z, d->shift);
+ /* r = x - n * ln2, r is in [-ln2/128, ln2/128]. */
+ svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
+ svfloat64_t r = x;
+ r = svmls_lane (r, n, ln2, 0);
+ r = svmls_lane (r, n, ln2, 1);
+
+ /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */
+ svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r);
+ svfloat64_t c24 = svld1rq (svptrue_b64 (), &d->c2);
+
+ svfloat64_t p;
+ svfloat64_t c12 = svmla_lane (sv_f64 (d->c1), r, c24, 0);
+ svfloat64_t c34 = svmla_lane (sv_f64 (d->c3), r, c24, 1);
+ p = svmad_x (pg, c34, r2, c12);
+ p = svmad_x (pg, p, r, sv_f64 (d->c0));
+ p = svmad_x (pg, p, r2, r);
+
+ svfloat64_t scale = svexpa (u);
+ svfloat64_t scalem1 = svsub_x (pg, scale, sv_f64 (1.0));
+
+ /* We want to construct expm1(x) = (scale - 1) + scale * poly.
+ However, for values of scale close to 1, scale-1 causes large ULP errors
+ due to cancellation.
+
+ This can be circumvented by using a small lookup for scale-1
+ when our input is below a certain bound, otherwise we can use FEXPA.
+
+ This bound is based upon the table size:
+ Bound = (TableSize-1/64) * ln2.
+ The current bound is based upon a table size of 16. */
+ svbool_t is_small = svaclt (pg, x, FexpaBound);
+
+ if (svptest_any (pg, is_small))
+ {
+ /* Index via the input of FEXPA, but we only care about the lower 4 bits.
+ */
+ svuint64_t base_idx = svand_x (pg, u, 0xf);
+
+ /* We can use the sign of x as a fifth bit to account for the asymmetry
+ of e^x around 0. */
+ svuint64_t signBit
+ = svlsl_x (pg, svlsr_x (pg, svreinterpret_u64 (x), 63), 4);
+ svuint64_t idx = svorr_x (pg, base_idx, signBit);
+
+ /* Lookup values for scale - 1 for small x. */
+ svfloat64_t lookup = svreinterpret_f64 (
+ svld1_gather_index (is_small, d->expm1_data, idx));
+
+ /* Select the appropriate scale - 1 value based on x. */
+ scalem1 = svsel (is_small, lookup, scalem1);
+ }
+
+ svfloat64_t y = svmla_x (pg, scalem1, scale, p);
+
+ /* FEXPA returns nan for large inputs so we special case those. */
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ {
+ /* FEXPA zeroes the sign bit, however the sign is meaningful to the
+ special case function so needs to be copied.
+ e = sign bit of u << 46. */
+ svuint64_t e = svand_x (pg, svlsl_x (pg, u, 46), 0x8000000000000000);
+ /* Copy sign to s. */
+ scale = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (scale)));
+ return special_case (pg, y, scale, p, n);
+ }
+
+ /* return expm1 = (scale - 1) + (scale * poly). */
return y;
}

View File

@ -0,0 +1,49 @@
commit aac077645a645bba0d67f3250e82017c539d0f4b
Author: Pierre Blanchard <pierre.blanchard@arm.com>
Date: Wed Aug 20 17:41:50 2025 +0000
AArch64: Fix SVE powf routine [BZ #33299]
Fix a bug in predicate logic introduced in last change.
A slight performance improvement from relying on all true
predicates during conversion from single to double.
This fixes BZ #33299.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/powf_sve.c b/sysdeps/aarch64/fpu/powf_sve.c
index 08d7019a1855ff3c..33bba96054cf4cc8 100644
--- a/sysdeps/aarch64/fpu/powf_sve.c
+++ b/sysdeps/aarch64/fpu/powf_sve.c
@@ -223,15 +223,15 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
const svbool_t ptrue = svptrue_b64 ();
/* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two
- * in order to perform core computation in double precision. */
+ in order to perform core computation in double precision. */
const svbool_t pg_lo = svunpklo (pg);
const svbool_t pg_hi = svunpkhi (pg);
- svfloat64_t y_lo
- = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
- svfloat64_t y_hi
- = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
- svfloat64_t z_lo = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (iz)));
- svfloat64_t z_hi = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (iz)));
+ svfloat64_t y_lo = svcvt_f64_x (
+ ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
+ svfloat64_t y_hi = svcvt_f64_x (
+ ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
+ svfloat64_t z_lo = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpklo (iz)));
+ svfloat64_t z_hi = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpkhi (iz)));
svuint64_t i_lo = svunpklo (i);
svuint64_t i_hi = svunpkhi (i);
svint64_t k_lo = svunpklo (k);
@@ -312,7 +312,7 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
(23 - V_POWF_EXP2_TABLE_BITS));
/* Compute core in extended precision and return intermediate ylogx results
- * to handle cases of underflow and underflow in exp. */
+ to handle cases of underflow and overflow in exp. */
svfloat32_t ylogx;
svfloat32_t ret
= sv_powf_core (yint_or_xpos, i, iz, k, y, sign_bias, &ylogx, d);

174
glibc-RHEL-118273-42.patch Normal file
View File

@ -0,0 +1,174 @@
commit e20ca759af46fbb7eae20c52b857e7636eb50e1b
Author: remph <lhr@disroot.org>
Date: Thu Sep 4 12:53:56 2025 +0000
AArch64: add optimised strspn/strcspn
Requires Neon (aka. Advanced SIMD). Looks up 16 characters at a time,
for a 2-3x perfomance improvement, and a ~30% speedup on the strtok &
strsep benchtests, as tested on Cortex A-{53,72}.
Signed-off-by: remph <lhr@disroot.org>
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/strcspn.S b/sysdeps/aarch64/strcspn.S
new file mode 100644
index 0000000000000000..f2a69e9856cba04c
--- /dev/null
+++ b/sysdeps/aarch64/strcspn.S
@@ -0,0 +1,2 @@
+#define USE_AS_STRCSPN 1
+#include "strspn.S"
diff --git a/sysdeps/aarch64/strspn.S b/sysdeps/aarch64/strspn.S
new file mode 100644
index 0000000000000000..edbb705b15991e39
--- /dev/null
+++ b/sysdeps/aarch64/strspn.S
@@ -0,0 +1,146 @@
+/* Copyright (C) 2025 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#ifdef USE_AS_STRCSPN
+# define STRSPN strcspn
+# define SBT orr /* SBT -- `set bit' */
+#else
+# define STRSPN strspn
+# define SBT bic
+#endif
+
+#ifdef __AARCH64EB__
+# define LS_FW lsl
+# define LS_BK lsr
+#else
+# define LS_FW lsr
+# define LS_BK lsl
+#endif
+
+#define og_s x0
+#define set x1 /* ACCEPT for strspn, REJECT for strcspn */
+
+#define byte_i x3
+#define bits_i x4
+#define one x6
+
+#define syndrome x5
+#define s x6
+
+#define vbyte_i v1.16b
+#define vbits_i v2.16b
+#define table v4.16b-v5.16b
+#define table_a v4
+#define table_b v5
+#define sevens v7.16b
+
+ENTRY(STRSPN)
+ ldrb w2, [set]
+ cbz w2, L(early)
+#ifdef USE_AS_STRCSPN
+ ldrb w3, [set, 1]
+ cbz w3, L(early)
+#endif
+
+ /* Table has ones for bytes to reject and zeros for bytes to accept */
+ mov one, 1
+#ifdef USE_AS_STRCSPN
+ stp one, xzr, [sp, -32]!
+ .cfi_def_cfa_offset 32
+ stp xzr, xzr, [sp, 16]
+#else
+ mvni v0.4s, 0
+ stp q0, q0, [sp, -32]!
+ .cfi_def_cfa_offset 32
+#endif
+
+ .p2align 4
+L(fill_table):
+ lsr byte_i, x2, 6 /* x2 / 64 */
+ lsl bits_i, one, x2 /* x2 % 64 implicitly */
+ ldrb w2, [set, 1]!
+ ldr x5, [sp, byte_i, lsl 3]
+ SBT x5, x5, bits_i
+ str x5, [sp, byte_i, lsl 3]
+ cbnz w2, L(fill_table)
+
+ ld1 {table_a.2d-table_b.2d}, [sp], 32
+ .cfi_def_cfa_offset 0
+ ubfiz syndrome, og_s, 2, 4 /* Bottom 4 bits, times 4 to count nibbles */
+ and s, og_s, -16 /* Round S down to 16-byte boundary */
+ movi sevens, 7
+ /* Bias the syndrome to mask off these nibbles */
+ mov x8, -1
+ LS_BK syndrome, x8, syndrome
+ mvn syndrome, syndrome
+
+L(loop):
+ ldr q0, [s], 16
+ ushr vbyte_i, v0.16b, 3
+ bic vbits_i, sevens, v0.16b
+ tbl v0.16b, {table}, vbyte_i
+ /* Bring the relevant bit to the MSB of each byte */
+ sshl v0.16b, v0.16b, vbits_i
+ /* Set every bit of each byte to its MSB */
+ cmlt v0.16b, v0.16b, 0
+ /* Bytes->nibbles */
+ shrn v0.8b, v0.8h, 4
+ fmov x2, d0
+ bic syndrome, x2, syndrome
+ cbz syndrome, L(loop)
+
+#ifndef __AARCH64EB__
+ rbit syndrome, syndrome
+#endif
+ sub s, s, 16
+ clz syndrome, syndrome
+ sub x0, s, og_s
+ add x0, x0, syndrome, lsr 2
+ ret
+
+ .balign 8 /* For strspn, which has only 2 instructions here */
+L(early):
+#ifdef USE_AS_STRCSPN
+ /* strlen(set) < 2: call strchrnul(s, *set) and get its offset from S */
+ stp fp, lr, [sp, -32]!
+ .cfi_def_cfa_offset 32
+ .cfi_offset fp, -32
+ .cfi_offset lr, -24
+ str x19, [sp, 16]
+ .cfi_offset 19, -16
+ mov w1, w2
+ mov fp, sp
+ mov x19, x0
+ bl __strchrnul
+ sub x0, x0, x19
+ ldr x19, [sp, 16]
+ ldp fp, lr, [sp], 32
+ .cfi_restore lr
+ .cfi_restore fp
+ .cfi_restore 19
+ .cfi_def_cfa_offset 0
+#else
+ mov w0, 0
+#endif
+ ret
+END(STRSPN)
+
+#undef set
+libc_hidden_def(STRSPN)

View File

@ -0,0 +1,93 @@
commit aebaeb2c330482171340e966f7f33fac884a27f4
Author: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date: Thu Sep 18 14:24:47 2025 +0000
AArch64: Update math-vector-fortran.h
Update math-vector-fortran.h with the latest set of math functions
and sort by name.
Reviewed-by: Yury Khrustalev <yury.khrustalev@arm.com>
diff --git a/sysdeps/aarch64/fpu/finclude/math-vector-fortran.h b/sysdeps/aarch64/fpu/finclude/math-vector-fortran.h
index 92e15f0d6a758258..161f43d20c51e252 100644
--- a/sysdeps/aarch64/fpu/finclude/math-vector-fortran.h
+++ b/sysdeps/aarch64/fpu/finclude/math-vector-fortran.h
@@ -15,33 +15,74 @@
! You should have received a copy of the GNU Lesser General Public
! License along with the GNU C Library; if not, see
! <https://www.gnu.org/licenses/>.
+
!GCC$ builtin (acos) attributes simd (notinbranch)
!GCC$ builtin (acosf) attributes simd (notinbranch)
+!GCC$ builtin (acosh) attributes simd (notinbranch)
+!GCC$ builtin (acoshf) attributes simd (notinbranch)
+!GCC$ builtin (acospi) attributes simd (notinbranch)
+!GCC$ builtin (acospif) attributes simd (notinbranch)
!GCC$ builtin (asin) attributes simd (notinbranch)
!GCC$ builtin (asinf) attributes simd (notinbranch)
+!GCC$ builtin (asinh) attributes simd (notinbranch)
+!GCC$ builtin (asinhf) attributes simd (notinbranch)
+!GCC$ builtin (asinpi) attributes simd (notinbranch)
+!GCC$ builtin (asinpif) attributes simd (notinbranch)
!GCC$ builtin (atan) attributes simd (notinbranch)
-!GCC$ builtin (atanf) attributes simd (notinbranch)
!GCC$ builtin (atan2) attributes simd (notinbranch)
!GCC$ builtin (atan2f) attributes simd (notinbranch)
+!GCC$ builtin (atan2pi) attributes simd (notinbranch)
+!GCC$ builtin (atan2pif) attributes simd (notinbranch)
+!GCC$ builtin (atanf) attributes simd (notinbranch)
+!GCC$ builtin (atanh) attributes simd (notinbranch)
+!GCC$ builtin (atanhf) attributes simd (notinbranch)
+!GCC$ builtin (atanpi) attributes simd (notinbranch)
+!GCC$ builtin (atanpif) attributes simd (notinbranch)
+!GCC$ builtin (cbrt) attributes simd (notinbranch)
+!GCC$ builtin (cbrtf) attributes simd (notinbranch)
!GCC$ builtin (cos) attributes simd (notinbranch)
!GCC$ builtin (cosf) attributes simd (notinbranch)
+!GCC$ builtin (cosh) attributes simd (notinbranch)
+!GCC$ builtin (coshf) attributes simd (notinbranch)
+!GCC$ builtin (cospi) attributes simd (notinbranch)
+!GCC$ builtin (cospif) attributes simd (notinbranch)
+!GCC$ builtin (erf) attributes simd (notinbranch)
+!GCC$ builtin (erfc) attributes simd (notinbranch)
+!GCC$ builtin (erfcf) attributes simd (notinbranch)
+!GCC$ builtin (erff) attributes simd (notinbranch)
!GCC$ builtin (exp) attributes simd (notinbranch)
-!GCC$ builtin (expf) attributes simd (notinbranch)
!GCC$ builtin (exp10) attributes simd (notinbranch)
!GCC$ builtin (exp10f) attributes simd (notinbranch)
+!GCC$ builtin (exp10m1) attributes simd (notinbranch)
+!GCC$ builtin (exp10m1f) attributes simd (notinbranch)
!GCC$ builtin (exp2) attributes simd (notinbranch)
!GCC$ builtin (exp2f) attributes simd (notinbranch)
+!GCC$ builtin (exp2m1) attributes simd (notinbranch)
+!GCC$ builtin (exp2m1f) attributes simd (notinbranch)
+!GCC$ builtin (expf) attributes simd (notinbranch)
!GCC$ builtin (expm1) attributes simd (notinbranch)
!GCC$ builtin (expm1f) attributes simd (notinbranch)
+!GCC$ builtin (hypot) attributes simd (notinbranch)
+!GCC$ builtin (hypotf) attributes simd (notinbranch)
!GCC$ builtin (log) attributes simd (notinbranch)
-!GCC$ builtin (logf) attributes simd (notinbranch)
!GCC$ builtin (log10) attributes simd (notinbranch)
!GCC$ builtin (log10f) attributes simd (notinbranch)
!GCC$ builtin (log1p) attributes simd (notinbranch)
!GCC$ builtin (log1pf) attributes simd (notinbranch)
!GCC$ builtin (log2) attributes simd (notinbranch)
!GCC$ builtin (log2f) attributes simd (notinbranch)
+!GCC$ builtin (logf) attributes simd (notinbranch)
+!GCC$ builtin (pow) attributes simd (notinbranch)
+!GCC$ builtin (powf) attributes simd (notinbranch)
!GCC$ builtin (sin) attributes simd (notinbranch)
!GCC$ builtin (sinf) attributes simd (notinbranch)
+!GCC$ builtin (sinh) attributes simd (notinbranch)
+!GCC$ builtin (sinhf) attributes simd (notinbranch)
+!GCC$ builtin (sinpi) attributes simd (notinbranch)
+!GCC$ builtin (sinpif) attributes simd (notinbranch)
!GCC$ builtin (tan) attributes simd (notinbranch)
!GCC$ builtin (tanf) attributes simd (notinbranch)
+!GCC$ builtin (tanh) attributes simd (notinbranch)
+!GCC$ builtin (tanhf) attributes simd (notinbranch)
+!GCC$ builtin (tanpi) attributes simd (notinbranch)
+!GCC$ builtin (tanpif) attributes simd (notinbranch)

View File

@ -0,0 +1,97 @@
commit 6c22823da57aa5218f717f569c04c9573c0448c5
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Thu Nov 6 18:26:54 2025 +0000
AArch64: Fix instability in AdvSIMD tan
Previously presence of special-cases in one lane could affect the
results in other lanes due to unconditional scalar fallback. The old
WANT_SIMD_EXCEPT option (which has never been enabled in libmvec) has
been removed from AOR, making it easier to spot and fix this. 4%
improvement in throughput with GCC 14 on Neoverse V1. This bug is
present as far back as 2.39 (where tan was first introduced).
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c
index d56a102dd17a3463..c6a5a17126674d7d 100644
--- a/sysdeps/aarch64/fpu/tan_advsimd.c
+++ b/sysdeps/aarch64/fpu/tan_advsimd.c
@@ -25,9 +25,7 @@ static const struct data
float64x2_t poly[9];
double half_pi[2];
float64x2_t two_over_pi, shift;
-#if !WANT_SIMD_EXCEPT
float64x2_t range_val;
-#endif
} data = {
/* Coefficients generated using FPMinimax. */
.poly = { V2 (0x1.5555555555556p-2), V2 (0x1.1111111110a63p-3),
@@ -38,20 +36,17 @@ static const struct data
.half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 },
.two_over_pi = V2 (0x1.45f306dc9c883p-1),
.shift = V2 (0x1.8p52),
-#if !WANT_SIMD_EXCEPT
.range_val = V2 (0x1p23),
-#endif
};
#define RangeVal 0x4160000000000000 /* asuint64(0x1p23). */
#define TinyBound 0x3e50000000000000 /* asuint64(2^-26). */
-#define Thresh 0x310000000000000 /* RangeVal - TinyBound. */
/* Special cases (fall back to scalar calls). */
static float64x2_t VPCS_ATTR NOINLINE
-special_case (float64x2_t x)
+special_case (float64x2_t x, float64x2_t n, float64x2_t d, uint64x2_t special)
{
- return v_call_f64 (tan, x, x, v_u64 (-1));
+ return v_call_f64 (tan, x, vdivq_f64 (n, d), special);
}
/* Vector approximation for double-precision tan.
@@ -65,14 +60,6 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
very large inputs. Fall back to scalar routine for all lanes if any are
too large, or Inf/NaN. If fenv exceptions are expected, also fall back for
tiny input to avoid underflow. */
-#if WANT_SIMD_EXCEPT
- uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
- /* iax - tiny_bound > range_val - tiny_bound. */
- uint64x2_t special
- = vcgtq_u64 (vsubq_u64 (iax, v_u64 (TinyBound)), v_u64 (Thresh));
- if (__glibc_unlikely (v_any_u64 (special)))
- return special_case (x);
-#endif
/* q = nearest integer to 2 * x / pi. */
float64x2_t q
@@ -81,9 +68,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
/* Use q to reduce x to r in [-pi/4, pi/4], by:
r = x - q * pi/2, in extended precision. */
- float64x2_t r = x;
float64x2_t half_pi = vld1q_f64 (dat->half_pi);
- r = vfmsq_laneq_f64 (r, q, half_pi, 0);
+ float64x2_t r = vfmsq_laneq_f64 (x, q, half_pi, 0);
r = vfmsq_laneq_f64 (r, q, half_pi, 1);
/* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
formula. */
@@ -114,12 +100,13 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1));
-#if !WANT_SIMD_EXCEPT
uint64x2_t special = vcageq_f64 (x, dat->range_val);
+ float64x2_t swap = vbslq_f64 (no_recip, n, vnegq_f64 (d));
+ d = vbslq_f64 (no_recip, d, n);
+ n = swap;
+
if (__glibc_unlikely (v_any_u64 (special)))
- return special_case (x);
-#endif
+ return special_case (x, n, d, special);
- return vdivq_f64 (vbslq_f64 (no_recip, n, vnegq_f64 (d)),
- vbslq_f64 (no_recip, d, n));
+ return vdivq_f64 (n, d);
}

View File

@ -0,0 +1,88 @@
commit e45af510bc816e860c8e2e1d4a652b4fe15c4b34
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Thu Nov 6 18:29:33 2025 +0000
AArch64: Fix instability in AdvSIMD sinh
Previously presence of special-cases in one lane could affect the
results in other lanes due to unconditional scalar fallback. The old
WANT_SIMD_EXCEPT option (which has never been enabled in libmvec) has
been removed from AOR, making it easier to spot and fix
this. No measured change in performance. This patch applies cleanly as
far back as 2.41, however there are conflicts with 2.40 where sinh was
first introduced.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/sinh_advsimd.c b/sysdeps/aarch64/fpu/sinh_advsimd.c
index 7adf771517de2507..66504cdee84ee77e 100644
--- a/sysdeps/aarch64/fpu/sinh_advsimd.c
+++ b/sysdeps/aarch64/fpu/sinh_advsimd.c
@@ -24,36 +24,26 @@ static const struct data
{
struct v_expm1_data d;
uint64x2_t halff;
-#if WANT_SIMD_EXCEPT
- uint64x2_t tiny_bound, thresh;
-#else
float64x2_t large_bound;
-#endif
} data = {
.d = V_EXPM1_DATA,
.halff = V2 (0x3fe0000000000000),
-#if WANT_SIMD_EXCEPT
- /* 2^-26, below which sinh(x) rounds to x. */
- .tiny_bound = V2 (0x3e50000000000000),
- /* asuint(large_bound) - asuint(tiny_bound). */
- .thresh = V2 (0x0230000000000000),
-#else
/* 2^9. expm1 helper overflows for large input. */
.large_bound = V2 (0x1p+9),
-#endif
};
static float64x2_t NOINLINE VPCS_ATTR
-special_case (float64x2_t x)
+special_case (float64x2_t x, float64x2_t t, float64x2_t halfsign,
+ uint64x2_t special)
{
- return v_call_f64 (sinh, x, x, v_u64 (-1));
+ return v_call_f64 (sinh, x, vmulq_f64 (t, halfsign), special);
}
/* Approximation for vector double-precision sinh(x) using expm1.
sinh(x) = (exp(x) - exp(-x)) / 2.
The greatest observed error is 2.52 ULP:
- _ZGVnN2v_sinh(-0x1.a098a2177a2b9p-2) got -0x1.ac2f05bb66fccp-2
- want -0x1.ac2f05bb66fc9p-2. */
+ _ZGVnN2v_sinh(0x1.9f6ff2ab6fb19p-2) got 0x1.aaed83a3153ccp-2
+ want 0x1.aaed83a3153c9p-2. */
float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
{
const struct data *d = ptr_barrier (&data);
@@ -63,21 +53,16 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
float64x2_t halfsign = vreinterpretq_f64_u64 (
vbslq_u64 (v_u64 (0x8000000000000000), ix, d->halff));
-#if WANT_SIMD_EXCEPT
- uint64x2_t special = vcgeq_u64 (
- vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh);
-#else
uint64x2_t special = vcageq_f64 (x, d->large_bound);
-#endif
-
- /* Fall back to scalar variant for all lanes if any of them are special. */
- if (__glibc_unlikely (v_any_u64 (special)))
- return special_case (x);
/* Up to the point that expm1 overflows, we can use it to calculate sinh
using a slight rearrangement of the definition of sinh. This allows us to
retain acceptable accuracy for very small inputs. */
float64x2_t t = expm1_inline (ax, &d->d);
t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0))));
+
+ if (__glibc_unlikely (v_any_u64 (special)))
+ return special_case (x, t, halfsign, special);
+
return vmulq_f64 (t, halfsign);
}

475
glibc-RHEL-118273-5.patch Normal file
View File

@ -0,0 +1,475 @@
commit 8b679205286e7874f0b04187c0bc787632168aa2
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Wed Apr 3 12:13:53 2024 +0100
aarch64/fpu: Add vector variants of atanh
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index d474f2969dd05c26..4c878e590681becc 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -3,6 +3,7 @@ libmvec-supported-funcs = acos \
asin \
asinh \
atan \
+ atanh \
atan2 \
cos \
cosh \
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index 08ea15efaec959fb..092949dc96d55624 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -89,6 +89,11 @@ libmvec {
_ZGVnN4v_asinhf;
_ZGVsMxv_asinh;
_ZGVsMxv_asinhf;
+ _ZGVnN2v_atanh;
+ _ZGVnN2v_atanhf;
+ _ZGVnN4v_atanhf;
+ _ZGVsMxv_atanh;
+ _ZGVsMxv_atanhf;
_ZGVnN2v_cosh;
_ZGVnN2v_coshf;
_ZGVnN4v_coshf;
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index 1e80721c9f73ba12..afbb01e191b917a4 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -22,6 +22,7 @@ libmvec_hidden_proto (V_NAME_F1(acosh));
libmvec_hidden_proto (V_NAME_F1(asin));
libmvec_hidden_proto (V_NAME_F1(asinh));
libmvec_hidden_proto (V_NAME_F1(atan));
+libmvec_hidden_proto (V_NAME_F1(atanh));
libmvec_hidden_proto (V_NAME_F1(cos));
libmvec_hidden_proto (V_NAME_F1(cosh));
libmvec_hidden_proto (V_NAME_F1(erf));
diff --git a/sysdeps/aarch64/fpu/atanh_advsimd.c b/sysdeps/aarch64/fpu/atanh_advsimd.c
new file mode 100644
index 0000000000000000..3c3d0bd6ad41396d
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanh_advsimd.c
@@ -0,0 +1,64 @@
+/* Double-precision vector (Advanced SIMD) atanh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define WANT_V_LOG1P_K0_SHORTCUT 0
+#include "v_log1p_inline.h"
+
+const static struct data
+{
+ struct v_log1p_data log1p_consts;
+ uint64x2_t one, half;
+} data = { .log1p_consts = V_LOG1P_CONSTANTS_TABLE,
+ .one = V2 (0x3ff0000000000000),
+ .half = V2 (0x3fe0000000000000) };
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+ return v_call_f64 (atanh, x, y, special);
+}
+
+/* Approximation for vector double-precision atanh(x) using modified log1p.
+ The greatest observed error is 3.31 ULP:
+ _ZGVnN2v_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
+ want 0x1.ffd8ff31b501cp-6. */
+VPCS_ATTR
+float64x2_t V_NAME_D1 (atanh) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t ax = vabsq_f64 (x);
+ uint64x2_t ia = vreinterpretq_u64_f64 (ax);
+ uint64x2_t sign = veorq_u64 (vreinterpretq_u64_f64 (x), ia);
+ uint64x2_t special = vcgeq_u64 (ia, d->one);
+ float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->half));
+
+#if WANT_SIMD_EXCEPT
+ ax = v_zerofy_f64 (ax, special);
+#endif
+
+ float64x2_t y;
+ y = vaddq_f64 (ax, ax);
+ y = vdivq_f64 (y, vsubq_f64 (v_f64 (1), ax));
+ y = log1p_inline (y, &d->log1p_consts);
+
+ if (__glibc_unlikely (v_any_u64 (special)))
+ return special_case (x, vmulq_f64 (y, halfsign), special);
+ return vmulq_f64 (y, halfsign);
+}
diff --git a/sysdeps/aarch64/fpu/atanh_sve.c b/sysdeps/aarch64/fpu/atanh_sve.c
new file mode 100644
index 0000000000000000..7a52728d70f6d226
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanh_sve.c
@@ -0,0 +1,59 @@
+/* Double-precision vector (SVE) atanh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define WANT_SV_LOG1P_K0_SHORTCUT 0
+#include "sv_log1p_inline.h"
+
+#define One (0x3ff0000000000000)
+#define Half (0x3fe0000000000000)
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (atanh, x, y, special);
+}
+
+/* SVE approximation for double-precision atanh, based on log1p.
+ The greatest observed error is 2.81 ULP:
+ _ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
+ want 0x1.ffd8ff31b501cp-6. */
+svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
+{
+
+ svfloat64_t ax = svabs_x (pg, x);
+ svuint64_t iax = svreinterpret_u64 (ax);
+ svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), iax);
+ svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, Half));
+
+ /* It is special if iax >= 1. */
+// svbool_t special = svcmpge (pg, iax, One);
+ svbool_t special = svacge (pg, x, 1.0);
+
+ /* Computation is performed based on the following sequence of equality:
+ (1+x)/(1-x) = 1 + 2x/(1-x). */
+ svfloat64_t y;
+ y = svadd_x (pg, ax, ax);
+ y = svdiv_x (pg, y, svsub_x (pg, sv_f64 (1), ax));
+ /* ln((1+x)/(1-x)) = ln(1+2x/(1-x)) = ln(1 + y). */
+ y = sv_log1p_inline (y, pg);
+
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (x, svmul_x (pg, halfsign, y), special);
+ return svmul_x (pg, halfsign, y);
+}
diff --git a/sysdeps/aarch64/fpu/atanhf_advsimd.c b/sysdeps/aarch64/fpu/atanhf_advsimd.c
new file mode 100644
index 0000000000000000..ae488f7b54ddce26
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanhf_advsimd.c
@@ -0,0 +1,79 @@
+/* Single-precision vector (Advanced SIMD) atanh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_math.h"
+#include "v_log1pf_inline.h"
+
+const static struct data
+{
+ struct v_log1pf_data log1pf_consts;
+ uint32x4_t one;
+#if WANT_SIMD_EXCEPT
+ uint32x4_t tiny_bound;
+#endif
+} data = {
+ .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
+ .one = V4 (0x3f800000),
+#if WANT_SIMD_EXCEPT
+ /* 0x1p-12, below which atanhf(x) rounds to x. */
+ .tiny_bound = V4 (0x39800000),
+#endif
+};
+
+#define AbsMask v_u32 (0x7fffffff)
+#define Half v_u32 (0x3f000000)
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (atanhf, x, y, special);
+}
+
+/* Approximation for vector single-precision atanh(x) using modified log1p.
+ The maximum error is 3.08 ULP:
+ __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5
+ want 0x1.ffcb82p-5. */
+VPCS_ATTR float32x4_t NOINLINE V_NAME_F1 (atanh) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float32x4_t halfsign = vbslq_f32 (AbsMask, v_f32 (0.5), x);
+ float32x4_t ax = vabsq_f32 (x);
+ uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+
+#if WANT_SIMD_EXCEPT
+ uint32x4_t special
+ = vorrq_u32 (vcgeq_u32 (iax, d->one), vcltq_u32 (iax, d->tiny_bound));
+ /* Side-step special cases by setting those lanes to 0, which will trigger no
+ exceptions. These will be fixed up later. */
+ if (__glibc_unlikely (v_any_u32 (special)))
+ ax = v_zerofy_f32 (ax, special);
+#else
+ uint32x4_t special = vcgeq_u32 (iax, d->one);
+#endif
+
+ float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax));
+ y = log1pf_inline (y, d->log1pf_consts);
+
+ if (__glibc_unlikely (v_any_u32 (special)))
+ return special_case (x, vmulq_f32 (halfsign, y), special);
+ return vmulq_f32 (halfsign, y);
+}
+libmvec_hidden_def (V_NAME_F1 (atanh))
+HALF_WIDTH_ALIAS_F1 (atanh)
diff --git a/sysdeps/aarch64/fpu/atanhf_sve.c b/sysdeps/aarch64/fpu/atanhf_sve.c
new file mode 100644
index 0000000000000000..dae83041ef7157f0
--- /dev/null
+++ b/sysdeps/aarch64/fpu/atanhf_sve.c
@@ -0,0 +1,54 @@
+/* Single-precision vector (SVE) atanh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_log1pf_inline.h"
+
+#define One (0x3f800000)
+#define Half (0x3f000000)
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (atanhf, x, y, special);
+}
+
+/* Approximation for vector single-precision atanh(x) using modified log1p.
+ The maximum error is 2.28 ULP:
+ _ZGVsMxv_atanhf(0x1.ff1194p-5) got 0x1.ffbbbcp-5
+ want 0x1.ffbbb6p-5. */
+svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
+{
+ svfloat32_t ax = svabs_x (pg, x);
+ svuint32_t iax = svreinterpret_u32 (ax);
+ svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
+ svfloat32_t halfsign = svreinterpret_f32 (svorr_x (pg, sign, Half));
+ svbool_t special = svcmpge (pg, iax, One);
+
+ /* Computation is performed based on the following sequence of equality:
+ * (1+x)/(1-x) = 1 + 2x/(1-x). */
+ svfloat32_t y = svadd_x (pg, ax, ax);
+ y = svdiv_x (pg, y, svsub_x (pg, sv_f32 (1), ax));
+ /* ln((1+x)/(1-x)) = ln(1+2x/(1-x)) = ln(1 + y). */
+ y = sv_log1pf_inline (y, pg);
+
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (x, svmul_x (pg, halfsign, y), special);
+
+ return svmul_x (pg, halfsign, y);
+}
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index eb2af35b27757fc6..ab7a8f74548854b9 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -49,6 +49,10 @@
# define __DECL_SIMD_atan __DECL_SIMD_aarch64
# undef __DECL_SIMD_atanf
# define __DECL_SIMD_atanf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_atanh
+# define __DECL_SIMD_atanh __DECL_SIMD_aarch64
+# undef __DECL_SIMD_atanhf
+# define __DECL_SIMD_atanhf __DECL_SIMD_aarch64
# undef __DECL_SIMD_atan2
# define __DECL_SIMD_atan2 __DECL_SIMD_aarch64
# undef __DECL_SIMD_atan2f
@@ -137,6 +141,7 @@ __vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t);
@@ -157,6 +162,7 @@ __vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t);
@@ -182,6 +188,7 @@ __sv_f32_t _ZGVsMxv_acoshf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_asinf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_asinhf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_atanf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_atanhf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_erff (__sv_f32_t, __sv_bool_t);
@@ -202,6 +209,7 @@ __sv_f64_t _ZGVsMxv_acosh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_asin (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_asinh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_atan (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_atanh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_erf (__sv_f64_t, __sv_bool_t);
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index 3d7177c32dcd77a6..a01aa99c16740631 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -28,6 +28,7 @@ VPCS_VECTOR_WRAPPER (acosh_advsimd, _ZGVnN2v_acosh)
VPCS_VECTOR_WRAPPER (asin_advsimd, _ZGVnN2v_asin)
VPCS_VECTOR_WRAPPER (asinh_advsimd, _ZGVnN2v_asinh)
VPCS_VECTOR_WRAPPER (atan_advsimd, _ZGVnN2v_atan)
+VPCS_VECTOR_WRAPPER (atanh_advsimd, _ZGVnN2v_atanh)
VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2)
VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
VPCS_VECTOR_WRAPPER (cosh_advsimd, _ZGVnN2v_cosh)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index b88a2afe5c1198c0..83cb3ad5d0e4d056 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -47,6 +47,7 @@ SVE_VECTOR_WRAPPER (acosh_sve, _ZGVsMxv_acosh)
SVE_VECTOR_WRAPPER (asin_sve, _ZGVsMxv_asin)
SVE_VECTOR_WRAPPER (asinh_sve, _ZGVsMxv_asinh)
SVE_VECTOR_WRAPPER (atan_sve, _ZGVsMxv_atan)
+SVE_VECTOR_WRAPPER (atanh_sve, _ZGVsMxv_atanh)
SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2)
SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
SVE_VECTOR_WRAPPER (cosh_sve, _ZGVsMxv_cosh)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index 533655402d3f3737..831d4d755272d616 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -28,6 +28,7 @@ VPCS_VECTOR_WRAPPER (acoshf_advsimd, _ZGVnN4v_acoshf)
VPCS_VECTOR_WRAPPER (asinf_advsimd, _ZGVnN4v_asinf)
VPCS_VECTOR_WRAPPER (asinhf_advsimd, _ZGVnN4v_asinhf)
VPCS_VECTOR_WRAPPER (atanf_advsimd, _ZGVnN4v_atanf)
+VPCS_VECTOR_WRAPPER (atanhf_advsimd, _ZGVnN4v_atanhf)
VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f)
VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
VPCS_VECTOR_WRAPPER (coshf_advsimd, _ZGVnN4v_coshf)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index f7b673e3358e7d82..96fd612c3e76f6dc 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -47,6 +47,7 @@ SVE_VECTOR_WRAPPER (acoshf_sve, _ZGVsMxv_acoshf)
SVE_VECTOR_WRAPPER (asinf_sve, _ZGVsMxv_asinf)
SVE_VECTOR_WRAPPER (asinhf_sve, _ZGVsMxv_asinhf)
SVE_VECTOR_WRAPPER (atanf_sve, _ZGVsMxv_atanf)
+SVE_VECTOR_WRAPPER (atanhf_sve, _ZGVsMxv_atanhf)
SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f)
SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
SVE_VECTOR_WRAPPER (coshf_sve, _ZGVsMxv_coshf)
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index b916e422432014c2..7c2e43d3dc5bbc13 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -173,11 +173,19 @@ double: 2
float: 2
ldouble: 4
+Function: "atanh_advsimd":
+double: 1
+float: 1
+
Function: "atanh_downward":
double: 3
float: 3
ldouble: 4
+Function: "atanh_sve":
+double: 2
+float: 1
+
Function: "atanh_towardzero":
double: 2
float: 2
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index f288afdfdd9c8757..ce42372a3a276832 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -77,18 +77,23 @@ GLIBC_2.40 _ZGVnN2v_acosh F
GLIBC_2.40 _ZGVnN2v_acoshf F
GLIBC_2.40 _ZGVnN2v_asinh F
GLIBC_2.40 _ZGVnN2v_asinhf F
+GLIBC_2.40 _ZGVnN2v_atanh F
+GLIBC_2.40 _ZGVnN2v_atanhf F
GLIBC_2.40 _ZGVnN2v_cosh F
GLIBC_2.40 _ZGVnN2v_coshf F
GLIBC_2.40 _ZGVnN2v_erf F
GLIBC_2.40 _ZGVnN2v_erff F
GLIBC_2.40 _ZGVnN4v_acoshf F
GLIBC_2.40 _ZGVnN4v_asinhf F
+GLIBC_2.40 _ZGVnN4v_atanhf F
GLIBC_2.40 _ZGVnN4v_coshf F
GLIBC_2.40 _ZGVnN4v_erff F
GLIBC_2.40 _ZGVsMxv_acosh F
GLIBC_2.40 _ZGVsMxv_acoshf F
GLIBC_2.40 _ZGVsMxv_asinh F
GLIBC_2.40 _ZGVsMxv_asinhf F
+GLIBC_2.40 _ZGVsMxv_atanh F
+GLIBC_2.40 _ZGVsMxv_atanhf F
GLIBC_2.40 _ZGVsMxv_cosh F
GLIBC_2.40 _ZGVsMxv_coshf F
GLIBC_2.40 _ZGVsMxv_erf F

758
glibc-RHEL-118273-6.patch Normal file
View File

@ -0,0 +1,758 @@
commit eedbbca0bf3adf3c45aff6c4e128bae3a5562675
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Wed Apr 3 12:15:41 2024 +0100
aarch64/fpu: Add vector variants of sinh
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index 4c878e590681becc..fb5f3a365b27fdf3 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -17,6 +17,7 @@ libmvec-supported-funcs = acos \
log1p \
log2 \
sin \
+ sinh \
tan
float-advsimd-funcs = $(libmvec-supported-funcs)
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index 092949dc96d55624..4774b3efeacf59fb 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -104,5 +104,10 @@ libmvec {
_ZGVnN4v_erff;
_ZGVsMxv_erf;
_ZGVsMxv_erff;
+ _ZGVnN2v_sinh;
+ _ZGVnN2v_sinhf;
+ _ZGVnN4v_sinhf;
+ _ZGVsMxv_sinh;
+ _ZGVsMxv_sinhf;
}
}
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index afbb01e191b917a4..7d9445d5c0c0c2a8 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -35,5 +35,6 @@ libmvec_hidden_proto (V_NAME_F1(log1p));
libmvec_hidden_proto (V_NAME_F1(log2));
libmvec_hidden_proto (V_NAME_F1(log));
libmvec_hidden_proto (V_NAME_F1(sin));
+libmvec_hidden_proto (V_NAME_F1(sinh));
libmvec_hidden_proto (V_NAME_F1(tan));
libmvec_hidden_proto (V_NAME_F2(atan2));
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index ab7a8f74548854b9..1e9b76cf41916365 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -105,6 +105,10 @@
# define __DECL_SIMD_sin __DECL_SIMD_aarch64
# undef __DECL_SIMD_sinf
# define __DECL_SIMD_sinf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_sinh
+# define __DECL_SIMD_sinh __DECL_SIMD_aarch64
+# undef __DECL_SIMD_sinhf
+# define __DECL_SIMD_sinhf __DECL_SIMD_aarch64
# undef __DECL_SIMD_tan
# define __DECL_SIMD_tan __DECL_SIMD_aarch64
# undef __DECL_SIMD_tanf
@@ -154,6 +158,7 @@ __vpcs __f32x4_t _ZGVnN4v_log10f (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_log1pf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t);
__vpcs __f64x2_t _ZGVnN2vv_atan2 (__f64x2_t, __f64x2_t);
@@ -175,6 +180,7 @@ __vpcs __f64x2_t _ZGVnN2v_log10 (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_tan (__f64x2_t);
# undef __ADVSIMD_VEC_MATH_SUPPORTED
@@ -201,6 +207,7 @@ __sv_f32_t _ZGVsMxv_log10f (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_log1pf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_log2f (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_tanf (__sv_f32_t, __sv_bool_t);
__sv_f64_t _ZGVsMxvv_atan2 (__sv_f64_t, __sv_f64_t, __sv_bool_t);
@@ -222,6 +229,7 @@ __sv_f64_t _ZGVsMxv_log10 (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_log1p (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_log2 (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_tan (__sv_f64_t, __sv_bool_t);
# undef __SVE_VEC_MATH_SUPPORTED
diff --git a/sysdeps/aarch64/fpu/sinh_advsimd.c b/sysdeps/aarch64/fpu/sinh_advsimd.c
new file mode 100644
index 0000000000000000..fa3723b10c15eb29
--- /dev/null
+++ b/sysdeps/aarch64/fpu/sinh_advsimd.c
@@ -0,0 +1,121 @@
+/* Double-precision vector (Advanced SIMD) sinh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+
+static const struct data
+{
+ float64x2_t poly[11];
+ float64x2_t inv_ln2, m_ln2, shift;
+ uint64x2_t halff;
+ int64x2_t onef;
+#if WANT_SIMD_EXCEPT
+ uint64x2_t tiny_bound, thresh;
+#else
+ uint64x2_t large_bound;
+#endif
+} data = {
+ /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */
+ .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
+ V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
+ V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
+ V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
+ V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29), },
+
+ .inv_ln2 = V2 (0x1.71547652b82fep0),
+ .m_ln2 = (float64x2_t) {-0x1.62e42fefa39efp-1, -0x1.abc9e3b39803fp-56},
+ .shift = V2 (0x1.8p52),
+
+ .halff = V2 (0x3fe0000000000000),
+ .onef = V2 (0x3ff0000000000000),
+#if WANT_SIMD_EXCEPT
+ /* 2^-26, below which sinh(x) rounds to x. */
+ .tiny_bound = V2 (0x3e50000000000000),
+ /* asuint(large_bound) - asuint(tiny_bound). */
+ .thresh = V2 (0x0230000000000000),
+#else
+/* 2^9. expm1 helper overflows for large input. */
+ .large_bound = V2 (0x4080000000000000),
+#endif
+};
+
+static inline float64x2_t
+expm1_inline (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Reduce argument:
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where i = round(x / ln2)
+ and f = x - i * ln2 (f in [-ln2/2, ln2/2]). */
+ float64x2_t j = vsubq_f64 (vfmaq_f64 (d->shift, d->inv_ln2, x), d->shift);
+ int64x2_t i = vcvtq_s64_f64 (j);
+ float64x2_t f = vfmaq_laneq_f64 (x, j, d->m_ln2, 0);
+ f = vfmaq_laneq_f64 (f, j, d->m_ln2, 1);
+ /* Approximate expm1(f) using polynomial. */
+ float64x2_t f2 = vmulq_f64 (f, f);
+ float64x2_t f4 = vmulq_f64 (f2, f2);
+ float64x2_t f8 = vmulq_f64 (f4, f4);
+ float64x2_t p = vfmaq_f64 (f, f2, v_estrin_10_f64 (f, f2, f4, f8, d->poly));
+ /* t = 2^i. */
+ float64x2_t t = vreinterpretq_f64_u64 (
+ vreinterpretq_u64_s64 (vaddq_s64 (vshlq_n_s64 (i, 52), d->onef)));
+ /* expm1(x) ~= p * t + (t - 1). */
+ return vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t);
+}
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x)
+{
+ return v_call_f64 (sinh, x, x, v_u64 (-1));
+}
+
+/* Approximation for vector double-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The greatest observed error is 2.57 ULP:
+ _ZGVnN2v_sinh (0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
+ want 0x1.ab34e59d678d9p-2. */
+float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t ax = vabsq_f64 (x);
+ uint64x2_t sign
+ = veorq_u64 (vreinterpretq_u64_f64 (x), vreinterpretq_u64_f64 (ax));
+ float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->halff));
+
+#if WANT_SIMD_EXCEPT
+ uint64x2_t special = vcgeq_u64 (
+ vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh);
+#else
+ uint64x2_t special = vcgeq_u64 (vreinterpretq_u64_f64 (ax), d->large_bound);
+#endif
+
+ /* Fall back to scalar variant for all lanes if any of them are special. */
+ if (__glibc_unlikely (v_any_u64 (special)))
+ return special_case (x);
+
+ /* Up to the point that expm1 overflows, we can use it to calculate sinh
+ using a slight rearrangement of the definition of sinh. This allows us to
+ retain acceptable accuracy for very small inputs. */
+ float64x2_t t = expm1_inline (ax);
+ t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0))));
+ return vmulq_f64 (t, halfsign);
+}
diff --git a/sysdeps/aarch64/fpu/sinh_sve.c b/sysdeps/aarch64/fpu/sinh_sve.c
new file mode 100644
index 0000000000000000..df5f6c8c06e5b173
--- /dev/null
+++ b/sysdeps/aarch64/fpu/sinh_sve.c
@@ -0,0 +1,107 @@
+/* Double-precision vector (SVE) atanh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+
+static const struct data
+{
+ float64_t poly[11];
+ float64_t inv_ln2, m_ln2_hi, m_ln2_lo, shift;
+ uint64_t halff;
+ int64_t onef;
+ uint64_t large_bound;
+} data = {
+ /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */
+ .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
+ 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10,
+ 0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16,
+ 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
+ 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
+
+ .inv_ln2 = 0x1.71547652b82fep0,
+ .m_ln2_hi = -0x1.62e42fefa39efp-1,
+ .m_ln2_lo = -0x1.abc9e3b39803fp-56,
+ .shift = 0x1.8p52,
+
+ .halff = 0x3fe0000000000000,
+ .onef = 0x3ff0000000000000,
+ /* 2^9. expm1 helper overflows for large input. */
+ .large_bound = 0x4080000000000000,
+};
+
+static inline svfloat64_t
+expm1_inline (svfloat64_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Reduce argument:
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where i = round(x / ln2)
+ and f = x - i * ln2 (f in [-ln2/2, ln2/2]). */
+ svfloat64_t j
+ = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift);
+ svint64_t i = svcvt_s64_x (pg, j);
+ svfloat64_t f = svmla_x (pg, x, j, d->m_ln2_hi);
+ f = svmla_x (pg, f, j, d->m_ln2_lo);
+ /* Approximate expm1(f) using polynomial. */
+ svfloat64_t f2 = svmul_x (pg, f, f);
+ svfloat64_t f4 = svmul_x (pg, f2, f2);
+ svfloat64_t f8 = svmul_x (pg, f4, f4);
+ svfloat64_t p
+ = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly));
+ /* t = 2^i. */
+ svfloat64_t t = svscale_x (pg, sv_f64 (1), i);
+ /* expm1(x) ~= p * t + (t - 1). */
+ return svmla_x (pg, svsub_x (pg, t, 1.0), p, t);
+}
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svbool_t pg)
+{
+ return sv_call_f64 (sinh, x, x, pg);
+}
+
+/* Approximation for SVE double-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The greatest observed error is 2.57 ULP:
+ _ZGVsMxv_sinh (0x1.a008538399931p-2) got 0x1.ab929fc64bd66p-2
+ want 0x1.ab929fc64bd63p-2. */
+svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat64_t ax = svabs_x (pg, x);
+ svuint64_t sign
+ = sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax));
+ svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, d->halff));
+
+ svbool_t special = svcmpge (pg, svreinterpret_u64 (ax), d->large_bound);
+
+ /* Fall back to scalar variant for all lanes if any are special. */
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (x, pg);
+
+ /* Up to the point that expm1 overflows, we can use it to calculate sinh
+ using a slight rearrangement of the definition of sinh. This allows us to
+ retain acceptable accuracy for very small inputs. */
+ svfloat64_t t = expm1_inline (ax, pg);
+ t = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0)));
+ return svmul_x (pg, t, halfsign);
+}
diff --git a/sysdeps/aarch64/fpu/sinhf_advsimd.c b/sysdeps/aarch64/fpu/sinhf_advsimd.c
new file mode 100644
index 0000000000000000..6bb7482dc28795c1
--- /dev/null
+++ b/sysdeps/aarch64/fpu/sinhf_advsimd.c
@@ -0,0 +1,88 @@
+/* Single-precision vector (Advanced SIMD) sinh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_math.h"
+#include "v_expm1f_inline.h"
+
+static const struct data
+{
+ struct v_expm1f_data expm1f_consts;
+ uint32x4_t halff;
+#if WANT_SIMD_EXCEPT
+ uint32x4_t tiny_bound, thresh;
+#else
+ uint32x4_t oflow_bound;
+#endif
+} data = {
+ .expm1f_consts = V_EXPM1F_DATA,
+ .halff = V4 (0x3f000000),
+#if WANT_SIMD_EXCEPT
+ /* 0x1.6a09e8p-32, below which expm1f underflows. */
+ .tiny_bound = V4 (0x2fb504f4),
+ /* asuint(oflow_bound) - asuint(tiny_bound). */
+ .thresh = V4 (0x12fbbbb3),
+#else
+ /* 0x1.61814ep+6, above which expm1f helper overflows. */
+ .oflow_bound = V4 (0x42b0c0a7),
+#endif
+};
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (sinhf, x, y, special);
+}
+
+/* Approximation for vector single-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The maximum error is 2.26 ULP:
+ _ZGVnN4v_sinhf (0x1.e34a9ep-4) got 0x1.e469ep-4
+ want 0x1.e469e4p-4. */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sinh) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ float32x4_t ax = vabsq_f32 (x);
+ uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+ uint32x4_t sign = veorq_u32 (ix, iax);
+ float32x4_t halfsign = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->halff));
+
+#if WANT_SIMD_EXCEPT
+ uint32x4_t special = vcgeq_u32 (vsubq_u32 (iax, d->tiny_bound), d->thresh);
+ ax = v_zerofy_f32 (ax, special);
+#else
+ uint32x4_t special = vcgeq_u32 (iax, d->oflow_bound);
+#endif
+
+ /* Up to the point that expm1f overflows, we can use it to calculate sinhf
+ using a slight rearrangement of the definition of asinh. This allows us
+ to retain acceptable accuracy for very small inputs. */
+ float32x4_t t = expm1f_inline (ax, &d->expm1f_consts);
+ t = vaddq_f32 (t, vdivq_f32 (t, vaddq_f32 (t, v_f32 (1.0))));
+
+ /* Fall back to the scalar variant for any lanes that should trigger an
+ exception. */
+ if (__glibc_unlikely (v_any_u32 (special)))
+ return special_case (x, vmulq_f32 (t, halfsign), special);
+
+ return vmulq_f32 (t, halfsign);
+}
+libmvec_hidden_def (V_NAME_F1 (sinh))
+HALF_WIDTH_ALIAS_F1 (sinh)
diff --git a/sysdeps/aarch64/fpu/sinhf_sve.c b/sysdeps/aarch64/fpu/sinhf_sve.c
new file mode 100644
index 0000000000000000..6c204b57a2aa18d3
--- /dev/null
+++ b/sysdeps/aarch64/fpu/sinhf_sve.c
@@ -0,0 +1,67 @@
+/* Single-precision vector (SVE) sinh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_expm1f_inline.h"
+#include "sv_math.h"
+
+static const struct data
+{
+ struct sv_expm1f_data expm1f_consts;
+ uint32_t halff, large_bound;
+} data = {
+ .expm1f_consts = SV_EXPM1F_DATA,
+ .halff = 0x3f000000,
+ /* 0x1.61814ep+6, above which expm1f helper overflows. */
+ .large_bound = 0x42b0c0a7,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t pg)
+{
+ return sv_call_f32 (sinhf, x, y, pg);
+}
+
+/* Approximation for SVE single-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The maximum error is 2.26 ULP:
+ _ZGVsMxv_sinhf (0x1.e34a9ep-4) got 0x1.e469ep-4
+ want 0x1.e469e4p-4. */
+svfloat32_t SV_NAME_F1 (sinh) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+ svfloat32_t ax = svabs_x (pg, x);
+ svuint32_t sign
+ = sveor_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (ax));
+ svfloat32_t halfsign = svreinterpret_f32 (svorr_x (pg, sign, d->halff));
+
+ svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->large_bound);
+
+ /* Up to the point that expm1f overflows, we can use it to calculate sinhf
+ using a slight rearrangement of the definition of asinh. This allows us to
+ retain acceptable accuracy for very small inputs. */
+ svfloat32_t t = expm1f_inline (ax, pg, &d->expm1f_consts);
+ t = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0)));
+
+ /* Fall back to the scalar variant for any lanes which would cause
+ expm1f to overflow. */
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (x, svmul_x (pg, t, halfsign), special);
+
+ return svmul_x (pg, t, halfsign);
+}
diff --git a/sysdeps/aarch64/fpu/sv_expm1f_inline.h b/sysdeps/aarch64/fpu/sv_expm1f_inline.h
new file mode 100644
index 0000000000000000..5b7245122294e1b4
--- /dev/null
+++ b/sysdeps/aarch64/fpu/sv_expm1f_inline.h
@@ -0,0 +1,84 @@
+/* Single-precision inline helper for vector (SVE) expm1 function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef AARCH64_FPU_SV_EXPM1F_INLINE_H
+#define AARCH64_FPU_SV_EXPM1F_INLINE_H
+
+#include "sv_math.h"
+
+struct sv_expm1f_data
+{
+ /* These 4 are grouped together so they can be loaded as one quadword, then
+ used with _lane forms of svmla/svmls. */
+ float32_t c2, c4, ln2_hi, ln2_lo;
+ float32_t c0, c1, c3, inv_ln2, shift;
+};
+
+/* Coefficients generated using fpminimax. */
+#define SV_EXPM1F_DATA \
+ { \
+ .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .c2 = 0x1.555736p-5, \
+ .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10, \
+ \
+ .shift = 0x1.8p23f, .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \
+ .ln2_lo = 0x1.7f7d1cp-20f, \
+ }
+
+#define C(i) sv_f32 (d->c##i)
+
+static inline svfloat32_t
+expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d)
+{
+ /* This vector is reliant on layout of data - it contains constants
+ that can be used with _lane forms of svmla/svmls. Values are:
+ [ coeff_2, coeff_4, ln2_hi, ln2_lo ]. */
+ svfloat32_t lane_constants = svld1rq (svptrue_b32 (), &d->c2);
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2);
+ j = svsub_x (pg, j, d->shift);
+ svint32_t i = svcvt_s32_x (pg, j);
+
+ svfloat32_t f = svmls_lane (x, j, lane_constants, 2);
+ f = svmls_lane (f, j, lane_constants, 3);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+ svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0);
+ svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1);
+ svfloat32_t f2 = svmul_x (pg, f, f);
+ svfloat32_t p = svmla_x (pg, p12, f2, p34);
+ p = svmla_x (pg, C (0), f, p);
+ p = svmla_x (pg, f, f2, p);
+
+ /* Assemble the result.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^i. */
+ svfloat32_t t = svscale_x (pg, sv_f32 (1), i);
+ return svmla_x (pg, svsub_x (pg, t, 1), p, t);
+}
+
+#endif
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index a01aa99c16740631..1a57b22c3a92f1e1 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -42,4 +42,5 @@ VPCS_VECTOR_WRAPPER (log10_advsimd, _ZGVnN2v_log10)
VPCS_VECTOR_WRAPPER (log1p_advsimd, _ZGVnN2v_log1p)
VPCS_VECTOR_WRAPPER (log2_advsimd, _ZGVnN2v_log2)
VPCS_VECTOR_WRAPPER (sin_advsimd, _ZGVnN2v_sin)
+VPCS_VECTOR_WRAPPER (sinh_advsimd, _ZGVnN2v_sinh)
VPCS_VECTOR_WRAPPER (tan_advsimd, _ZGVnN2v_tan)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index 83cb3ad5d0e4d056..0c9858f6b74aaef6 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -61,4 +61,5 @@ SVE_VECTOR_WRAPPER (log10_sve, _ZGVsMxv_log10)
SVE_VECTOR_WRAPPER (log1p_sve, _ZGVsMxv_log1p)
SVE_VECTOR_WRAPPER (log2_sve, _ZGVsMxv_log2)
SVE_VECTOR_WRAPPER (sin_sve, _ZGVsMxv_sin)
+SVE_VECTOR_WRAPPER (sinh_sve, _ZGVsMxv_sinh)
SVE_VECTOR_WRAPPER (tan_sve, _ZGVsMxv_tan)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index 831d4d755272d616..4758490c6fc40fda 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -42,4 +42,5 @@ VPCS_VECTOR_WRAPPER (log10f_advsimd, _ZGVnN4v_log10f)
VPCS_VECTOR_WRAPPER (log1pf_advsimd, _ZGVnN4v_log1pf)
VPCS_VECTOR_WRAPPER (log2f_advsimd, _ZGVnN4v_log2f)
VPCS_VECTOR_WRAPPER (sinf_advsimd, _ZGVnN4v_sinf)
+VPCS_VECTOR_WRAPPER (sinhf_advsimd, _ZGVnN4v_sinhf)
VPCS_VECTOR_WRAPPER (tanf_advsimd, _ZGVnN4v_tanf)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index 96fd612c3e76f6dc..7c04f07bbee84777 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -61,4 +61,5 @@ SVE_VECTOR_WRAPPER (log10f_sve, _ZGVsMxv_log10f)
SVE_VECTOR_WRAPPER (log1pf_sve, _ZGVsMxv_log1pf)
SVE_VECTOR_WRAPPER (log2f_sve, _ZGVsMxv_log2f)
SVE_VECTOR_WRAPPER (sinf_sve, _ZGVsMxv_sinf)
+SVE_VECTOR_WRAPPER (sinhf_sve, _ZGVsMxv_sinhf)
SVE_VECTOR_WRAPPER (tanf_sve, _ZGVsMxv_tanf)
diff --git a/sysdeps/aarch64/fpu/v_expm1f_inline.h b/sysdeps/aarch64/fpu/v_expm1f_inline.h
new file mode 100644
index 0000000000000000..337ccfbfab555c97
--- /dev/null
+++ b/sysdeps/aarch64/fpu/v_expm1f_inline.h
@@ -0,0 +1,73 @@
+/* Single-precision inline helper for vector (Advanced SIMD) expm1 function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef AARCH64_FPU_V_EXPM1F_INLINE_H
+#define AARCH64_FPU_V_EXPM1F_INLINE_H
+
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+
+struct v_expm1f_data
+{
+ float32x4_t poly[5];
+ float32x4_t invln2_and_ln2, shift;
+ int32x4_t exponent_bias;
+};
+
+/* Coefficients generated using fpminimax with degree=5 in [-log(2)/2,
+ log(2)/2]. Exponent bias is asuint(1.0f).
+ invln2_and_ln2 Stores constants: invln2, ln2_lo, ln2_hi, 0. */
+#define V_EXPM1F_DATA \
+ { \
+ .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), \
+ V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, \
+ .shift = V4 (0x1.8p23f), .exponent_bias = V4 (0x3f800000), \
+ .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, \
+ }
+
+static inline float32x4_t
+expm1f_inline (float32x4_t x, const struct v_expm1f_data *d)
+{
+ /* Helper routine for calculating exp(x) - 1.
+ Copied from v_expm1f_1u6.c, with all special-case handling removed - the
+ calling routine should handle special values if required. */
+
+ /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
+ float32x4_t j = vsubq_f32 (
+ vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
+ int32x4_t i = vcvtq_s32_f32 (j);
+ float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
+ f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
+
+ /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
+ Uses Estrin scheme, where the main _ZGVnN4v_expm1f routine uses
+ Horner. */
+ float32x4_t f2 = vmulq_f32 (f, f);
+ float32x4_t f4 = vmulq_f32 (f2, f2);
+ float32x4_t p = v_estrin_4_f32 (f, f2, f4, d->poly);
+ p = vfmaq_f32 (f, f2, p);
+
+ /* t = 2^i. */
+ int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias);
+ float32x4_t t = vreinterpretq_f32_s32 (u);
+ /* expm1(x) ~= p * t + (t - 1). */
+ return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t);
+}
+
+#endif
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index 7c2e43d3dc5bbc13..fec0972081af734a 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1441,11 +1441,19 @@ double: 2
float: 2
ldouble: 2
+Function: "sinh_advsimd":
+double: 2
+float: 1
+
Function: "sinh_downward":
double: 3
float: 3
ldouble: 3
+Function: "sinh_sve":
+double: 2
+float: 1
+
Function: "sinh_towardzero":
double: 3
float: 2
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index ce42372a3a276832..1db5ba61d64067a2 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -83,11 +83,14 @@ GLIBC_2.40 _ZGVnN2v_cosh F
GLIBC_2.40 _ZGVnN2v_coshf F
GLIBC_2.40 _ZGVnN2v_erf F
GLIBC_2.40 _ZGVnN2v_erff F
+GLIBC_2.40 _ZGVnN2v_sinh F
+GLIBC_2.40 _ZGVnN2v_sinhf F
GLIBC_2.40 _ZGVnN4v_acoshf F
GLIBC_2.40 _ZGVnN4v_asinhf F
GLIBC_2.40 _ZGVnN4v_atanhf F
GLIBC_2.40 _ZGVnN4v_coshf F
GLIBC_2.40 _ZGVnN4v_erff F
+GLIBC_2.40 _ZGVnN4v_sinhf F
GLIBC_2.40 _ZGVsMxv_acosh F
GLIBC_2.40 _ZGVsMxv_acoshf F
GLIBC_2.40 _ZGVsMxv_asinh F
@@ -98,3 +101,5 @@ GLIBC_2.40 _ZGVsMxv_cosh F
GLIBC_2.40 _ZGVsMxv_coshf F
GLIBC_2.40 _ZGVsMxv_erf F
GLIBC_2.40 _ZGVsMxv_erff F
+GLIBC_2.40 _ZGVsMxv_sinh F
+GLIBC_2.40 _ZGVsMxv_sinhf F

624
glibc-RHEL-118273-7.patch Normal file
View File

@ -0,0 +1,624 @@
commit 3d3a4fb8e4fe854a0bbb3df9c26ba482c10a7e22
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Tue Feb 20 16:59:44 2024 +0000
aarch64/fpu: Add vector variants of tanh
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in
index 5a690023e9a675cb..4584c5e498ab7194 100644
--- a/math/auto-libm-test-in
+++ b/math/auto-libm-test-in
@@ -7747,7 +7747,7 @@ tan min_subnorm
tan -min_subnorm
tanh 0
-tanh -0
+tanh -0 no-mathvec
tanh 0.75
tanh -0.75
tanh 1.0
diff --git a/math/auto-libm-test-out-tanh b/math/auto-libm-test-out-tanh
index 8b9427c917f3b388..19ce2e7b9355963d 100644
--- a/math/auto-libm-test-out-tanh
+++ b/math/auto-libm-test-out-tanh
@@ -23,31 +23,31 @@ tanh 0
= tanh tonearest ibm128 0x0p+0 : 0x0p+0 : inexact-ok
= tanh towardzero ibm128 0x0p+0 : 0x0p+0 : inexact-ok
= tanh upward ibm128 0x0p+0 : 0x0p+0 : inexact-ok
-tanh -0
-= tanh downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh tonearest intel96 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh towardzero intel96 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh upward intel96 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh downward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh tonearest m68k96 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh towardzero m68k96 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh upward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh downward binary128 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh tonearest binary128 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh towardzero binary128 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh upward binary128 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh downward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh tonearest ibm128 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh towardzero ibm128 -0x0p+0 : -0x0p+0 : inexact-ok
-= tanh upward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok
+tanh -0 no-mathvec
+= tanh downward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh tonearest binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh towardzero binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh upward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh downward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh tonearest binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh towardzero binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh upward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh downward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh tonearest intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh towardzero intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh upward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh downward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh tonearest m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh towardzero m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh upward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh downward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh tonearest binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh towardzero binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh upward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh downward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh tonearest ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh towardzero ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
+= tanh upward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok
tanh 0.75
= tanh downward binary32 0xcp-4 : 0xa.2991fp-4 : inexact-ok
= tanh tonearest binary32 0xcp-4 : 0xa.2991fp-4 : inexact-ok
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index fb5f3a365b27fdf3..e5f418ae4274edb2 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -18,7 +18,8 @@ libmvec-supported-funcs = acos \
log2 \
sin \
sinh \
- tan
+ tan \
+ tanh
float-advsimd-funcs = $(libmvec-supported-funcs)
double-advsimd-funcs = $(libmvec-supported-funcs)
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions
index 4774b3efeacf59fb..4dbf3d32441dd43a 100644
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -109,5 +109,10 @@ libmvec {
_ZGVnN4v_sinhf;
_ZGVsMxv_sinh;
_ZGVsMxv_sinhf;
+ _ZGVnN2v_tanh;
+ _ZGVnN2v_tanhf;
+ _ZGVnN4v_tanhf;
+ _ZGVsMxv_tanh;
+ _ZGVsMxv_tanhf;
}
}
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
index 7d9445d5c0c0c2a8..4ff191c324050b42 100644
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -37,4 +37,5 @@ libmvec_hidden_proto (V_NAME_F1(log));
libmvec_hidden_proto (V_NAME_F1(sin));
libmvec_hidden_proto (V_NAME_F1(sinh));
libmvec_hidden_proto (V_NAME_F1(tan));
+libmvec_hidden_proto (V_NAME_F1(tanh));
libmvec_hidden_proto (V_NAME_F2(atan2));
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h
index 1e9b76cf41916365..585e022082d62a5d 100644
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -113,6 +113,10 @@
# define __DECL_SIMD_tan __DECL_SIMD_aarch64
# undef __DECL_SIMD_tanf
# define __DECL_SIMD_tanf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_tanh
+# define __DECL_SIMD_tanh __DECL_SIMD_aarch64
+# undef __DECL_SIMD_tanhf
+# define __DECL_SIMD_tanhf __DECL_SIMD_aarch64
#endif
#if __GNUC_PREREQ(9, 0)
@@ -160,6 +164,7 @@ __vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_tanhf (__f32x4_t);
__vpcs __f64x2_t _ZGVnN2vv_atan2 (__f64x2_t, __f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_acos (__f64x2_t);
@@ -182,6 +187,7 @@ __vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_tan (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_tanh (__f64x2_t);
# undef __ADVSIMD_VEC_MATH_SUPPORTED
#endif /* __ADVSIMD_VEC_MATH_SUPPORTED */
@@ -209,6 +215,7 @@ __sv_f32_t _ZGVsMxv_log2f (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_sinf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_sinhf (__sv_f32_t, __sv_bool_t);
__sv_f32_t _ZGVsMxv_tanf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_tanhf (__sv_f32_t, __sv_bool_t);
__sv_f64_t _ZGVsMxvv_atan2 (__sv_f64_t, __sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_acos (__sv_f64_t, __sv_bool_t);
@@ -231,6 +238,7 @@ __sv_f64_t _ZGVsMxv_log2 (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_sin (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_sinh (__sv_f64_t, __sv_bool_t);
__sv_f64_t _ZGVsMxv_tan (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_tanh (__sv_f64_t, __sv_bool_t);
# undef __SVE_VEC_MATH_SUPPORTED
#endif /* __SVE_VEC_MATH_SUPPORTED */
diff --git a/sysdeps/aarch64/fpu/tanh_advsimd.c b/sysdeps/aarch64/fpu/tanh_advsimd.c
new file mode 100644
index 0000000000000000..1da1dfa5dbe418b6
--- /dev/null
+++ b/sysdeps/aarch64/fpu/tanh_advsimd.c
@@ -0,0 +1,109 @@
+/* Double-precision vector (Advanced SIMD) tanh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+
+static const struct data
+{
+ float64x2_t poly[11];
+ float64x2_t inv_ln2, ln2_hi, ln2_lo, shift;
+ uint64x2_t onef;
+ uint64x2_t thresh, tiny_bound;
+} data = {
+ /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */
+ .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
+ V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
+ V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
+ V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
+ V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29), },
+
+ .inv_ln2 = V2 (0x1.71547652b82fep0),
+ .ln2_hi = V2 (-0x1.62e42fefa39efp-1),
+ .ln2_lo = V2 (-0x1.abc9e3b39803fp-56),
+ .shift = V2 (0x1.8p52),
+
+ .onef = V2 (0x3ff0000000000000),
+ .tiny_bound = V2 (0x3e40000000000000), /* asuint64 (0x1p-27). */
+ /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound). */
+ .thresh = V2 (0x01f241bf835f9d5f),
+};
+
+static inline float64x2_t
+expm1_inline (float64x2_t x, const struct data *d)
+{
+ /* Helper routine for calculating exp(x) - 1. Vector port of the helper from
+ the scalar variant of tanh. */
+
+ /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
+ float64x2_t j = vsubq_f64 (vfmaq_f64 (d->shift, d->inv_ln2, x), d->shift);
+ int64x2_t i = vcvtq_s64_f64 (j);
+ float64x2_t f = vfmaq_f64 (x, j, d->ln2_hi);
+ f = vfmaq_f64 (f, j, d->ln2_lo);
+
+ /* Approximate expm1(f) using polynomial. */
+ float64x2_t f2 = vmulq_f64 (f, f);
+ float64x2_t f4 = vmulq_f64 (f2, f2);
+ float64x2_t p = vfmaq_f64 (
+ f, f2, v_estrin_10_f64 (f, f2, f4, vmulq_f64 (f4, f4), d->poly));
+
+ /* t = 2 ^ i. */
+ float64x2_t t = vreinterpretq_f64_u64 (
+ vaddq_u64 (vreinterpretq_u64_s64 (i << 52), d->onef));
+ /* expm1(x) = p * t + (t - 1). */
+ return vfmaq_f64 (vsubq_f64 (t, v_f64 (1)), p, t);
+}
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+ return v_call_f64 (tanh, x, y, special);
+}
+
+/* Vector approximation for double-precision tanh(x), using a simplified
+ version of expm1. The greatest observed error is 2.77 ULP:
+ _ZGVnN2v_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
+ want -0x1.bd6a21a163624p-3. */
+float64x2_t VPCS_ATTR V_NAME_D1 (tanh) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
+
+ float64x2_t u = x;
+
+ /* Trigger special-cases for tiny, boring and infinity/NaN. */
+ uint64x2_t special = vcgtq_u64 (vsubq_u64 (ia, d->tiny_bound), d->thresh);
+#if WANT_SIMD_EXCEPT
+ /* To trigger fp exceptions correctly, set special lanes to a neutral value.
+ They will be fixed up later by the special-case handler. */
+ if (__glibc_unlikely (v_any_u64 (special)))
+ u = v_zerofy_f64 (u, special);
+#endif
+
+ u = vaddq_f64 (u, u);
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ float64x2_t q = expm1_inline (u, d);
+ float64x2_t qp2 = vaddq_f64 (q, v_f64 (2));
+
+ if (__glibc_unlikely (v_any_u64 (special)))
+ return special_case (x, vdivq_f64 (q, qp2), special);
+ return vdivq_f64 (q, qp2);
+}
diff --git a/sysdeps/aarch64/fpu/tanh_sve.c b/sysdeps/aarch64/fpu/tanh_sve.c
new file mode 100644
index 0000000000000000..d25e011cea305094
--- /dev/null
+++ b/sysdeps/aarch64/fpu/tanh_sve.c
@@ -0,0 +1,100 @@
+/* Double-precision vector (SVE) tanh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+
+static const struct data
+{
+ float64_t poly[11];
+ float64_t inv_ln2, ln2_hi, ln2_lo, shift;
+ uint64_t thresh, tiny_bound;
+} data = {
+ /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */
+ .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
+ 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10,
+ 0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16,
+ 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
+ 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
+
+ .inv_ln2 = 0x1.71547652b82fep0,
+ .ln2_hi = -0x1.62e42fefa39efp-1,
+ .ln2_lo = -0x1.abc9e3b39803fp-56,
+ .shift = 0x1.8p52,
+
+ .tiny_bound = 0x3e40000000000000, /* asuint64 (0x1p-27). */
+ /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound). */
+ .thresh = 0x01f241bf835f9d5f,
+};
+
+static inline svfloat64_t
+expm1_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
+{
+ /* Helper routine for calculating exp(x) - 1. Vector port of the helper from
+ the scalar variant of tanh. */
+
+ /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
+ svfloat64_t j
+ = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift);
+ svint64_t i = svcvt_s64_x (pg, j);
+ svfloat64_t f = svmla_x (pg, x, j, d->ln2_hi);
+ f = svmla_x (pg, f, j, d->ln2_lo);
+
+ /* Approximate expm1(f) using polynomial. */
+ svfloat64_t f2 = svmul_x (pg, f, f);
+ svfloat64_t f4 = svmul_x (pg, f2, f2);
+ svfloat64_t p = svmla_x (
+ pg, f, f2,
+ sv_estrin_10_f64_x (pg, f, f2, f4, svmul_x (pg, f4, f4), d->poly));
+
+ /* t = 2 ^ i. */
+ svfloat64_t t = svscale_x (pg, sv_f64 (1), i);
+ /* expm1(x) = p * t + (t - 1). */
+ return svmla_x (pg, svsub_x (pg, t, 1), p, t);
+}
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (tanh, x, y, special);
+}
+
+/* SVE approximation for double-precision tanh(x), using a simplified
+ version of expm1. The greatest observed error is 2.77 ULP:
+ _ZGVsMxv_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
+ want -0x1.bd6a21a163624p-3. */
+svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svuint64_t ia = svreinterpret_u64 (svabs_x (pg, x));
+
+ /* Trigger special-cases for tiny, boring and infinity/NaN. */
+ svbool_t special = svcmpgt (pg, svsub_x (pg, ia, d->tiny_bound), d->thresh);
+
+ svfloat64_t u = svadd_x (pg, x, x);
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ svfloat64_t q = expm1_inline (u, pg, d);
+ svfloat64_t qp2 = svadd_x (pg, q, 2);
+
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (x, svdiv_x (pg, q, qp2), special);
+ return svdiv_x (pg, q, qp2);
+}
diff --git a/sysdeps/aarch64/fpu/tanhf_advsimd.c b/sysdeps/aarch64/fpu/tanhf_advsimd.c
new file mode 100644
index 0000000000000000..50defd6ef03926f4
--- /dev/null
+++ b/sysdeps/aarch64/fpu/tanhf_advsimd.c
@@ -0,0 +1,76 @@
+/* Single-precision vector (Advanced SIMD) tanh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "v_expm1f_inline.h"
+
+static const struct data
+{
+ struct v_expm1f_data expm1f_consts;
+ uint32x4_t boring_bound, large_bound, onef;
+} data = {
+ .expm1f_consts = V_EXPM1F_DATA,
+ /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative). */
+ .boring_bound = V4 (0x41102cb3),
+ .large_bound = V4 (0x7f800000),
+ .onef = V4 (0x3f800000),
+};
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (tanhf, x, y, special);
+}
+
+/* Approximation for single-precision vector tanh(x), using a simplified
+ version of expm1f. The maximum error is 2.58 ULP:
+ _ZGVnN4v_tanhf (0x1.fa5eep-5) got 0x1.f9ba02p-5
+ want 0x1.f9ba08p-5. */
+float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tanh) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ float32x4_t ax = vabsq_f32 (x);
+ uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+ uint32x4_t sign = veorq_u32 (ix, iax);
+ uint32x4_t is_boring = vcgtq_u32 (iax, d->boring_bound);
+ float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->onef));
+
+#if WANT_SIMD_EXCEPT
+ /* If fp exceptions are to be triggered properly, set all special and boring
+ lanes to 0, which will trigger no exceptions, and fix them up later. */
+ uint32x4_t special = vorrq_u32 (vcgtq_u32 (iax, d->large_bound),
+ vcltq_u32 (iax, v_u32 (0x34000000)));
+ x = v_zerofy_f32 (x, is_boring);
+ if (__glibc_unlikely (v_any_u32 (special)))
+ x = v_zerofy_f32 (x, special);
+#else
+ uint32x4_t special = vcgtq_u32 (iax, d->large_bound);
+#endif
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ float32x4_t q = expm1f_inline (vmulq_n_f32 (x, 2), &d->expm1f_consts);
+ float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)));
+ if (__glibc_unlikely (v_any_u32 (special)))
+ return special_case (vreinterpretq_f32_u32 (ix),
+ vbslq_f32 (is_boring, boring, y), special);
+ return vbslq_f32 (is_boring, boring, y);
+}
+libmvec_hidden_def (V_NAME_F1 (tanh))
+HALF_WIDTH_ALIAS_F1 (tanh)
diff --git a/sysdeps/aarch64/fpu/tanhf_sve.c b/sysdeps/aarch64/fpu/tanhf_sve.c
new file mode 100644
index 0000000000000000..0b94523cf5074200
--- /dev/null
+++ b/sysdeps/aarch64/fpu/tanhf_sve.c
@@ -0,0 +1,61 @@
+/* Single-precision vector (SVE) tanh function
+
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "sv_expm1f_inline.h"
+
+static const struct data
+{
+ struct sv_expm1f_data expm1f_consts;
+ uint32_t boring_bound, onef;
+} data = {
+ .expm1f_consts = SV_EXPM1F_DATA,
+ /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative). */
+ .boring_bound = 0x41102cb3,
+ .onef = 0x3f800000,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (tanhf, x, y, special);
+}
+
+/* Approximation for single-precision SVE tanh(x), using a simplified
+ version of expm1f. The maximum error is 2.57 ULP:
+ _ZGVsMxv_tanhf (0x1.fc1832p-5) got 0x1.fb71a4p-5
+ want 0x1.fb71aap-5. */
+svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat32_t ax = svabs_x (pg, x);
+ svuint32_t iax = svreinterpret_u32 (ax);
+ svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
+ svbool_t is_boring = svcmpgt (pg, iax, d->boring_bound);
+ svfloat32_t boring = svreinterpret_f32 (svorr_x (pg, sign, d->onef));
+
+ svbool_t special = svcmpgt (pg, iax, 0x7f800000);
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ svfloat32_t q = expm1f_inline (svmul_x (pg, x, 2.0), pg, &d->expm1f_consts);
+ svfloat32_t y = svdiv_x (pg, q, svadd_x (pg, q, 2.0));
+ if (__glibc_unlikely (svptest_any (pg, special)))
+ return special_case (x, svsel_f32 (is_boring, boring, y), special);
+ return svsel_f32 (is_boring, boring, y);
+}
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
index 1a57b22c3a92f1e1..7aeda880bd885ce5 100644
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -44,3 +44,4 @@ VPCS_VECTOR_WRAPPER (log2_advsimd, _ZGVnN2v_log2)
VPCS_VECTOR_WRAPPER (sin_advsimd, _ZGVnN2v_sin)
VPCS_VECTOR_WRAPPER (sinh_advsimd, _ZGVnN2v_sinh)
VPCS_VECTOR_WRAPPER (tan_advsimd, _ZGVnN2v_tan)
+VPCS_VECTOR_WRAPPER (tanh_advsimd, _ZGVnN2v_tanh)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index 0c9858f6b74aaef6..95f1ec52221ba626 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -63,3 +63,4 @@ SVE_VECTOR_WRAPPER (log2_sve, _ZGVsMxv_log2)
SVE_VECTOR_WRAPPER (sin_sve, _ZGVsMxv_sin)
SVE_VECTOR_WRAPPER (sinh_sve, _ZGVsMxv_sinh)
SVE_VECTOR_WRAPPER (tan_sve, _ZGVsMxv_tan)
+SVE_VECTOR_WRAPPER (tanh_sve, _ZGVsMxv_tanh)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
index 4758490c6fc40fda..bd6800e91c64136f 100644
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -44,3 +44,4 @@ VPCS_VECTOR_WRAPPER (log2f_advsimd, _ZGVnN4v_log2f)
VPCS_VECTOR_WRAPPER (sinf_advsimd, _ZGVnN4v_sinf)
VPCS_VECTOR_WRAPPER (sinhf_advsimd, _ZGVnN4v_sinhf)
VPCS_VECTOR_WRAPPER (tanf_advsimd, _ZGVnN4v_tanf)
+VPCS_VECTOR_WRAPPER (tanhf_advsimd, _ZGVnN4v_tanhf)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index 7c04f07bbee84777..35ca305fddb7366c 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -63,3 +63,4 @@ SVE_VECTOR_WRAPPER (log2f_sve, _ZGVsMxv_log2f)
SVE_VECTOR_WRAPPER (sinf_sve, _ZGVsMxv_sinf)
SVE_VECTOR_WRAPPER (sinhf_sve, _ZGVsMxv_sinhf)
SVE_VECTOR_WRAPPER (tanf_sve, _ZGVsMxv_tanf)
+SVE_VECTOR_WRAPPER (tanhf_sve, _ZGVsMxv_tanhf)
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
index fec0972081af734a..8398b7bc7749808d 100644
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -1496,11 +1496,19 @@ double: 2
float: 2
ldouble: 2
+Function: "tanh_advsimd":
+double: 2
+float: 2
+
Function: "tanh_downward":
double: 3
float: 3
ldouble: 4
+Function: "tanh_sve":
+double: 2
+float: 2
+
Function: "tanh_towardzero":
double: 2
float: 2
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
index 1db5ba61d64067a2..396082f6a7981686 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -85,12 +85,15 @@ GLIBC_2.40 _ZGVnN2v_erf F
GLIBC_2.40 _ZGVnN2v_erff F
GLIBC_2.40 _ZGVnN2v_sinh F
GLIBC_2.40 _ZGVnN2v_sinhf F
+GLIBC_2.40 _ZGVnN2v_tanh F
+GLIBC_2.40 _ZGVnN2v_tanhf F
GLIBC_2.40 _ZGVnN4v_acoshf F
GLIBC_2.40 _ZGVnN4v_asinhf F
GLIBC_2.40 _ZGVnN4v_atanhf F
GLIBC_2.40 _ZGVnN4v_coshf F
GLIBC_2.40 _ZGVnN4v_erff F
GLIBC_2.40 _ZGVnN4v_sinhf F
+GLIBC_2.40 _ZGVnN4v_tanhf F
GLIBC_2.40 _ZGVsMxv_acosh F
GLIBC_2.40 _ZGVsMxv_acoshf F
GLIBC_2.40 _ZGVsMxv_asinh F
@@ -103,3 +106,5 @@ GLIBC_2.40 _ZGVsMxv_erf F
GLIBC_2.40 _ZGVsMxv_erff F
GLIBC_2.40 _ZGVsMxv_sinh F
GLIBC_2.40 _ZGVsMxv_sinhf F
+GLIBC_2.40 _ZGVsMxv_tanh F
+GLIBC_2.40 _ZGVsMxv_tanhf F

5115
glibc-RHEL-118273-8.patch Normal file

File diff suppressed because it is too large Load Diff

348
glibc-RHEL-118273-9.patch Normal file
View File

@ -0,0 +1,348 @@
commit 90a6ca8b28bf34e361e577e526e1b0f4c39a32a5
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Thu May 2 16:43:13 2024 +0100
aarch64: Fix AdvSIMD libmvec routines for big-endian
Previously many routines used * to load from vector types stored
in the data table. This is emitted as ldr, which byte-swaps the
entire vector register, and causes bugs for big-endian when not
all lanes contain the same value. When a vector is to be used
this way, it has been replaced with an array and the load with an
explicit ld1 intrinsic, which byte-swaps only within lanes.
As well, many routines previously used non-standard GCC syntax
for vector operations such as indexing into vectors types with []
and assembling vectors using {}. This syntax should not be mixed
with ACLE, as the former does not respect endianness whereas the
latter does. Such examples have been replaced with, for instance,
vcombine_* and vgetq_lane* intrinsics. Helpers which only use the
GCC syntax, such as the v_call helpers, do not need changing as
they do not use intrinsics.
Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
Conflicts:
sysdeps/aarch64/fpu/exp10f_advsimd.c
sysdeps/aarch64/fpu/expm1_advsimd.c
sysdeps/aarch64/fpu/expm1f_advsimd.c
sysdeps/aarch64/fpu/log10_advsimd.c
sysdeps/aarch64/fpu/log2_advsimd.c
sysdeps/aarch64/fpu/log_advsimd.c
sysdeps/aarch64/fpu/tan_advsimd.c
sysdeps/aarch64/fpu/tanf_advsimd.c
(Already backported by glibc-upstream-2.39-151.patch)
diff --git a/sysdeps/aarch64/fpu/asinh_advsimd.c b/sysdeps/aarch64/fpu/asinh_advsimd.c
index 544a52f6515d3201..6207e7da9531f48d 100644
--- a/sysdeps/aarch64/fpu/asinh_advsimd.c
+++ b/sysdeps/aarch64/fpu/asinh_advsimd.c
@@ -22,6 +22,7 @@
#define A(i) v_f64 (__v_log_data.poly[i])
#define N (1 << V_LOG_TABLE_BITS)
+#define IndexMask (N - 1)
const static struct data
{
@@ -63,11 +64,15 @@ struct entry
static inline struct entry
lookup (uint64x2_t i)
{
- float64x2_t e0 = vld1q_f64 (
- &__v_log_data.table[(i[0] >> (52 - V_LOG_TABLE_BITS)) & (N - 1)].invc);
- float64x2_t e1 = vld1q_f64 (
- &__v_log_data.table[(i[1] >> (52 - V_LOG_TABLE_BITS)) & (N - 1)].invc);
- return (struct entry){ vuzp1q_f64 (e0, e1), vuzp2q_f64 (e0, e1) };
+ /* Since N is a power of 2, n % N = n & (N - 1). */
+ struct entry e;
+ uint64_t i0 = (vgetq_lane_u64 (i, 0) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+ uint64_t i1 = (vgetq_lane_u64 (i, 1) >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+ float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
+ float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
+ e.invc = vuzp1q_f64 (e0, e1);
+ e.logc = vuzp2q_f64 (e0, e1);
+ return e;
}
static inline float64x2_t
diff --git a/sysdeps/aarch64/fpu/cosh_advsimd.c b/sysdeps/aarch64/fpu/cosh_advsimd.c
index ec7b59637e973da9..4bee734f00bd6a9b 100644
--- a/sysdeps/aarch64/fpu/cosh_advsimd.c
+++ b/sysdeps/aarch64/fpu/cosh_advsimd.c
@@ -22,7 +22,9 @@
static const struct data
{
float64x2_t poly[3];
- float64x2_t inv_ln2, ln2, shift, thres;
+ float64x2_t inv_ln2;
+ double ln2[2];
+ float64x2_t shift, thres;
uint64x2_t index_mask, special_bound;
} data = {
.poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6b68cp-3),
@@ -58,8 +60,9 @@ exp_inline (float64x2_t x)
float64x2_t n = vsubq_f64 (z, d->shift);
/* r = x - n*ln2/N. */
- float64x2_t r = vfmaq_laneq_f64 (x, n, d->ln2, 0);
- r = vfmaq_laneq_f64 (r, n, d->ln2, 1);
+ float64x2_t ln2 = vld1q_f64 (d->ln2);
+ float64x2_t r = vfmaq_laneq_f64 (x, n, ln2, 0);
+ r = vfmaq_laneq_f64 (r, n, ln2, 1);
uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TAIL_TABLE_BITS);
uint64x2_t i = vandq_u64 (u, d->index_mask);
diff --git a/sysdeps/aarch64/fpu/erf_advsimd.c b/sysdeps/aarch64/fpu/erf_advsimd.c
index 3e70cbc025248a05..19cbb7d0f42eb4e2 100644
--- a/sysdeps/aarch64/fpu/erf_advsimd.c
+++ b/sysdeps/aarch64/fpu/erf_advsimd.c
@@ -56,8 +56,8 @@ static inline struct entry
lookup (uint64x2_t i)
{
struct entry e;
- float64x2_t e1 = vld1q_f64 ((float64_t *) (__erf_data.tab + i[0])),
- e2 = vld1q_f64 ((float64_t *) (__erf_data.tab + i[1]));
+ float64x2_t e1 = vld1q_f64 (&__erf_data.tab[vgetq_lane_u64 (i, 0)].erf),
+ e2 = vld1q_f64 (&__erf_data.tab[vgetq_lane_u64 (i, 1)].erf);
e.erf = vuzp1q_f64 (e1, e2);
e.scale = vuzp2q_f64 (e1, e2);
return e;
diff --git a/sysdeps/aarch64/fpu/erfc_advsimd.c b/sysdeps/aarch64/fpu/erfc_advsimd.c
index 548f21a3d68d68d2..f1b3bfe8304c73b5 100644
--- a/sysdeps/aarch64/fpu/erfc_advsimd.c
+++ b/sysdeps/aarch64/fpu/erfc_advsimd.c
@@ -26,7 +26,7 @@ static const struct data
float64x2_t max, shift;
float64x2_t p20, p40, p41, p42;
float64x2_t p51, p52;
- float64x2_t qr5, qr6, qr7, qr8, qr9;
+ double qr5[2], qr6[2], qr7[2], qr8[2], qr9[2];
#if WANT_SIMD_EXCEPT
float64x2_t uflow_bound;
#endif
@@ -68,8 +68,10 @@ static inline struct entry
lookup (uint64x2_t i)
{
struct entry e;
- float64x2_t e1 = vld1q_f64 ((float64_t *) (__erfc_data.tab - Off + i[0])),
- e2 = vld1q_f64 ((float64_t *) (__erfc_data.tab - Off + i[1]));
+ float64x2_t e1
+ = vld1q_f64 (&__erfc_data.tab[vgetq_lane_u64 (i, 0) - Off].erfc);
+ float64x2_t e2
+ = vld1q_f64 (&__erfc_data.tab[vgetq_lane_u64 (i, 1) - Off].erfc);
e.erfc = vuzp1q_f64 (e1, e2);
e.scale = vuzp2q_f64 (e1, e2);
return e;
@@ -161,16 +163,19 @@ float64x2_t V_NAME_D1 (erfc) (float64x2_t x)
p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->p20), r2, p5));
/* Compute p_i using recurrence relation:
p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}. */
- float64x2_t p6 = vfmaq_f64 (p4, p5, vmulq_laneq_f64 (r, dat->qr5, 0));
- p6 = vmulq_laneq_f64 (p6, dat->qr5, 1);
- float64x2_t p7 = vfmaq_f64 (p5, p6, vmulq_laneq_f64 (r, dat->qr6, 0));
- p7 = vmulq_laneq_f64 (p7, dat->qr6, 1);
- float64x2_t p8 = vfmaq_f64 (p6, p7, vmulq_laneq_f64 (r, dat->qr7, 0));
- p8 = vmulq_laneq_f64 (p8, dat->qr7, 1);
- float64x2_t p9 = vfmaq_f64 (p7, p8, vmulq_laneq_f64 (r, dat->qr8, 0));
- p9 = vmulq_laneq_f64 (p9, dat->qr8, 1);
- float64x2_t p10 = vfmaq_f64 (p8, p9, vmulq_laneq_f64 (r, dat->qr9, 0));
- p10 = vmulq_laneq_f64 (p10, dat->qr9, 1);
+ float64x2_t qr5 = vld1q_f64 (dat->qr5), qr6 = vld1q_f64 (dat->qr6),
+ qr7 = vld1q_f64 (dat->qr7), qr8 = vld1q_f64 (dat->qr8),
+ qr9 = vld1q_f64 (dat->qr9);
+ float64x2_t p6 = vfmaq_f64 (p4, p5, vmulq_laneq_f64 (r, qr5, 0));
+ p6 = vmulq_laneq_f64 (p6, qr5, 1);
+ float64x2_t p7 = vfmaq_f64 (p5, p6, vmulq_laneq_f64 (r, qr6, 0));
+ p7 = vmulq_laneq_f64 (p7, qr6, 1);
+ float64x2_t p8 = vfmaq_f64 (p6, p7, vmulq_laneq_f64 (r, qr7, 0));
+ p8 = vmulq_laneq_f64 (p8, qr7, 1);
+ float64x2_t p9 = vfmaq_f64 (p7, p8, vmulq_laneq_f64 (r, qr8, 0));
+ p9 = vmulq_laneq_f64 (p9, qr8, 1);
+ float64x2_t p10 = vfmaq_f64 (p8, p9, vmulq_laneq_f64 (r, qr9, 0));
+ p10 = vmulq_laneq_f64 (p10, qr9, 1);
/* Compute polynomial in d using pairwise Horner scheme. */
float64x2_t p90 = vfmaq_f64 (p9, d, p10);
float64x2_t p78 = vfmaq_f64 (p7, d, p8);
diff --git a/sysdeps/aarch64/fpu/erfcf_advsimd.c b/sysdeps/aarch64/fpu/erfcf_advsimd.c
index 30b9e48dd40d80a0..ca5bc3ab33c92f83 100644
--- a/sysdeps/aarch64/fpu/erfcf_advsimd.c
+++ b/sysdeps/aarch64/fpu/erfcf_advsimd.c
@@ -23,7 +23,8 @@ static const struct data
{
uint32x4_t offset, table_scale;
float32x4_t max, shift;
- float32x4_t coeffs, third, two_over_five, tenth;
+ float coeffs[4];
+ float32x4_t third, two_over_five, tenth;
#if WANT_SIMD_EXCEPT
float32x4_t uflow_bound;
#endif
@@ -37,7 +38,7 @@ static const struct data
.shift = V4 (0x1p17f),
/* Store 1/3, 2/3 and 2/15 in a single register for use with indexed muls and
fmas. */
- .coeffs = (float32x4_t){ 0x1.555556p-2f, 0x1.555556p-1f, 0x1.111112p-3f, 0 },
+ .coeffs = { 0x1.555556p-2f, 0x1.555556p-1f, 0x1.111112p-3f, 0 },
.third = V4 (0x1.555556p-2f),
.two_over_five = V4 (-0x1.99999ap-2f),
.tenth = V4 (-0x1.99999ap-4f),
@@ -60,12 +61,16 @@ static inline struct entry
lookup (uint32x4_t i)
{
struct entry e;
- float64_t t0 = *((float64_t *) (__erfcf_data.tab - Off + i[0]));
- float64_t t1 = *((float64_t *) (__erfcf_data.tab - Off + i[1]));
- float64_t t2 = *((float64_t *) (__erfcf_data.tab - Off + i[2]));
- float64_t t3 = *((float64_t *) (__erfcf_data.tab - Off + i[3]));
- float32x4_t e1 = vreinterpretq_f32_f64 ((float64x2_t){ t0, t1 });
- float32x4_t e2 = vreinterpretq_f32_f64 ((float64x2_t){ t2, t3 });
+ float32x2_t t0
+ = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 0) - Off].erfc);
+ float32x2_t t1
+ = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 1) - Off].erfc);
+ float32x2_t t2
+ = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 2) - Off].erfc);
+ float32x2_t t3
+ = vld1_f32 (&__erfcf_data.tab[vgetq_lane_u32 (i, 3) - Off].erfc);
+ float32x4_t e1 = vcombine_f32 (t0, t1);
+ float32x4_t e2 = vcombine_f32 (t2, t3);
e.erfc = vuzp1q_f32 (e1, e2);
e.scale = vuzp2q_f32 (e1, e2);
return e;
@@ -140,10 +145,11 @@ float32x4_t NOINLINE V_NAME_F1 (erfc) (float32x4_t x)
float32x4_t r2 = vmulq_f32 (r, r);
float32x4_t p1 = r;
- float32x4_t p2 = vfmsq_laneq_f32 (dat->third, r2, dat->coeffs, 1);
+ float32x4_t coeffs = vld1q_f32 (dat->coeffs);
+ float32x4_t p2 = vfmsq_laneq_f32 (dat->third, r2, coeffs, 1);
float32x4_t p3
- = vmulq_f32 (r, vfmaq_laneq_f32 (v_f32 (-0.5), r2, dat->coeffs, 0));
- float32x4_t p4 = vfmaq_laneq_f32 (dat->two_over_five, r2, dat->coeffs, 2);
+ = vmulq_f32 (r, vfmaq_laneq_f32 (v_f32 (-0.5), r2, coeffs, 0));
+ float32x4_t p4 = vfmaq_laneq_f32 (dat->two_over_five, r2, coeffs, 2);
p4 = vfmsq_f32 (dat->tenth, r2, p4);
float32x4_t y = vfmaq_f32 (p3, d, p4);
diff --git a/sysdeps/aarch64/fpu/erff_advsimd.c b/sysdeps/aarch64/fpu/erff_advsimd.c
index c44644a71cffbb62..f2fe6ff236a6ec07 100644
--- a/sysdeps/aarch64/fpu/erff_advsimd.c
+++ b/sysdeps/aarch64/fpu/erff_advsimd.c
@@ -47,12 +47,12 @@ static inline struct entry
lookup (uint32x4_t i)
{
struct entry e;
- float64_t t0 = *((float64_t *) (__erff_data.tab + i[0]));
- float64_t t1 = *((float64_t *) (__erff_data.tab + i[1]));
- float64_t t2 = *((float64_t *) (__erff_data.tab + i[2]));
- float64_t t3 = *((float64_t *) (__erff_data.tab + i[3]));
- float32x4_t e1 = vreinterpretq_f32_f64 ((float64x2_t){ t0, t1 });
- float32x4_t e2 = vreinterpretq_f32_f64 ((float64x2_t){ t2, t3 });
+ float32x2_t t0 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 0)].erf);
+ float32x2_t t1 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 1)].erf);
+ float32x2_t t2 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 2)].erf);
+ float32x2_t t3 = vld1_f32 (&__erff_data.tab[vgetq_lane_u32 (i, 3)].erf);
+ float32x4_t e1 = vcombine_f32 (t0, t1);
+ float32x4_t e2 = vcombine_f32 (t2, t3);
e.erf = vuzp1q_f32 (e1, e2);
e.scale = vuzp2q_f32 (e1, e2);
return e;
diff --git a/sysdeps/aarch64/fpu/sinh_advsimd.c b/sysdeps/aarch64/fpu/sinh_advsimd.c
index fa3723b10c15eb29..3e3b76c502b01e16 100644
--- a/sysdeps/aarch64/fpu/sinh_advsimd.c
+++ b/sysdeps/aarch64/fpu/sinh_advsimd.c
@@ -22,8 +22,9 @@
static const struct data
{
- float64x2_t poly[11];
- float64x2_t inv_ln2, m_ln2, shift;
+ float64x2_t poly[11], inv_ln2;
+ double m_ln2[2];
+ float64x2_t shift;
uint64x2_t halff;
int64x2_t onef;
#if WANT_SIMD_EXCEPT
@@ -40,7 +41,7 @@ static const struct data
V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29), },
.inv_ln2 = V2 (0x1.71547652b82fep0),
- .m_ln2 = (float64x2_t) {-0x1.62e42fefa39efp-1, -0x1.abc9e3b39803fp-56},
+ .m_ln2 = {-0x1.62e42fefa39efp-1, -0x1.abc9e3b39803fp-56},
.shift = V2 (0x1.8p52),
.halff = V2 (0x3fe0000000000000),
@@ -67,8 +68,10 @@ expm1_inline (float64x2_t x)
and f = x - i * ln2 (f in [-ln2/2, ln2/2]). */
float64x2_t j = vsubq_f64 (vfmaq_f64 (d->shift, d->inv_ln2, x), d->shift);
int64x2_t i = vcvtq_s64_f64 (j);
- float64x2_t f = vfmaq_laneq_f64 (x, j, d->m_ln2, 0);
- f = vfmaq_laneq_f64 (f, j, d->m_ln2, 1);
+
+ float64x2_t m_ln2 = vld1q_f64 (d->m_ln2);
+ float64x2_t f = vfmaq_laneq_f64 (x, j, m_ln2, 0);
+ f = vfmaq_laneq_f64 (f, j, m_ln2, 1);
/* Approximate expm1(f) using polynomial. */
float64x2_t f2 = vmulq_f64 (f, f);
float64x2_t f4 = vmulq_f64 (f2, f2);
diff --git a/sysdeps/aarch64/fpu/v_expf_inline.h b/sysdeps/aarch64/fpu/v_expf_inline.h
index a3b0e32f9eb42021..08b06e0a6b34b4f4 100644
--- a/sysdeps/aarch64/fpu/v_expf_inline.h
+++ b/sysdeps/aarch64/fpu/v_expf_inline.h
@@ -25,7 +25,8 @@
struct v_expf_data
{
float32x4_t poly[5];
- float32x4_t shift, invln2_and_ln2;
+ float32x4_t shift;
+ float invln2_and_ln2[4];
};
/* maxerr: 1.45358 +0.5 ulp. */
@@ -50,10 +51,11 @@ v_expf_inline (float32x4_t x, const struct v_expf_data *d)
/* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
float32x4_t n, r, z;
- z = vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0);
+ float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2);
+ z = vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0);
n = vsubq_f32 (z, d->shift);
- r = vfmsq_laneq_f32 (x, n, d->invln2_and_ln2, 1);
- r = vfmsq_laneq_f32 (r, n, d->invln2_and_ln2, 2);
+ r = vfmsq_laneq_f32 (x, n, invln2_and_ln2, 1);
+ r = vfmsq_laneq_f32 (r, n, invln2_and_ln2, 2);
uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
diff --git a/sysdeps/aarch64/fpu/v_expm1f_inline.h b/sysdeps/aarch64/fpu/v_expm1f_inline.h
index 337ccfbfab555c97..59b552da6b74785e 100644
--- a/sysdeps/aarch64/fpu/v_expm1f_inline.h
+++ b/sysdeps/aarch64/fpu/v_expm1f_inline.h
@@ -26,7 +26,8 @@
struct v_expm1f_data
{
float32x4_t poly[5];
- float32x4_t invln2_and_ln2, shift;
+ float invln2_and_ln2[4];
+ float32x4_t shift;
int32x4_t exponent_bias;
};
@@ -49,11 +50,12 @@ expm1f_inline (float32x4_t x, const struct v_expm1f_data *d)
calling routine should handle special values if required. */
/* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
- float32x4_t j = vsubq_f32 (
- vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
+ float32x4_t invln2_and_ln2 = vld1q_f32 (d->invln2_and_ln2);
+ float32x4_t j
+ = vsubq_f32 (vfmaq_laneq_f32 (d->shift, x, invln2_and_ln2, 0), d->shift);
int32x4_t i = vcvtq_s32_f32 (j);
- float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
- f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
+ float32x4_t f = vfmsq_laneq_f32 (x, j, invln2_and_ln2, 1);
+ f = vfmsq_laneq_f32 (f, j, invln2_and_ln2, 2);
/* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
Uses Estrin scheme, where the main _ZGVnN4v_expm1f routine uses

70
glibc-RHEL-119386-1.patch Normal file
View File

@ -0,0 +1,70 @@
commit 255df9299f544ad9e027e0c8d6b65b0635c59f8c
Author: Samuel Dobron <sdobron@redhat.com>
Date: Thu Jul 11 05:31:11 2024 +0200
time/Makefile: Split and sort tests
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
diff --git a/time/Makefile b/time/Makefile
index 5b541fb9d3be1c28..059d85c151401587 100644
--- a/time/Makefile
+++ b/time/Makefile
@@ -42,15 +42,48 @@ routines := offtime asctime clock ctime ctime_r difftime \
aux := era alt_digit lc-time-cleanup
-tests := test_time clocktest tst-posixtz tst-strptime tst_wcsftime \
- tst-getdate tst-mktime tst-mktime2 tst-ftime_l tst-strftime \
- tst-mktime3 tst-strptime2 bug-asctime bug-asctime_r bug-mktime1 \
- tst-strptime3 bug-getdate1 tst-strptime-whitespace tst-ftime \
- tst-tzname tst-y2039 bug-mktime4 tst-strftime2 tst-strftime3 \
- tst-clock tst-clock2 tst-clock_nanosleep tst-cpuclock1 \
- tst-adjtime tst-ctime tst-difftime tst-mktime4 tst-clock_settime \
- tst-settimeofday tst-itimer tst-gmtime tst-timegm \
- tst-timespec_get tst-timespec_getres tst-strftime4
+tests := \
+ bug-asctime \
+ bug-asctime_r \
+ bug-getdate1 \
+ bug-mktime1 \
+ bug-mktime4 \
+ clocktest \
+ test_time \
+ tst-adjtime \
+ tst-clock \
+ tst-clock2 \
+ tst-clock_nanosleep \
+ tst-clock_settime \
+ tst-cpuclock1 \
+ tst-ctime \
+ tst-difftime \
+ tst-ftime \
+ tst-ftime_l \
+ tst-getdate \
+ tst-gmtime \
+ tst-itimer \
+ tst-mktime \
+ tst-mktime2 \
+ tst-mktime3 \
+ tst-mktime4 \
+ tst-posixtz \
+ tst-settimeofday \
+ tst-strftime \
+ tst-strftime2 \
+ tst-strftime3 \
+ tst-strftime4 \
+ tst-strptime \
+ tst-strptime-whitespace \
+ tst-strptime2 \
+ tst-strptime3 \
+ tst-timegm \
+ tst-timespec_get \
+ tst-timespec_getres \
+ tst-tzname \
+ tst-y2039 \
+ tst_wcsftime \
+ # tests
tests-time64 := \
tst-adjtime-time64 \

428
glibc-RHEL-119386-2.patch Normal file
View File

@ -0,0 +1,428 @@
commit e5ea9aef5468404eecc8c990e6852315b7d1a0e3
Author: Joseph Myers <josmyers@redhat.com>
Date: Wed Oct 30 16:48:38 2024 +0000
Add tests of time, gettimeofday, clock_gettime
There are no tests specifically focused on the functions time,
gettimeofday and clock_gettime, although there are some incidental
uses in tests of other functions. Add tests specifically for these
three functions.
Tested for x86_64 and x86.
diff --git a/time/Makefile b/time/Makefile
index 059d85c151401587..b57963c1c6443770 100644
--- a/time/Makefile
+++ b/time/Makefile
@@ -53,6 +53,7 @@ tests := \
tst-adjtime \
tst-clock \
tst-clock2 \
+ tst-clock_gettime \
tst-clock_nanosleep \
tst-clock_settime \
tst-cpuclock1 \
@@ -61,6 +62,7 @@ tests := \
tst-ftime \
tst-ftime_l \
tst-getdate \
+ tst-gettimeofday \
tst-gmtime \
tst-itimer \
tst-mktime \
@@ -77,6 +79,7 @@ tests := \
tst-strptime-whitespace \
tst-strptime2 \
tst-strptime3 \
+ tst-time \
tst-timegm \
tst-timespec_get \
tst-timespec_getres \
@@ -89,16 +92,19 @@ tests-time64 := \
tst-adjtime-time64 \
tst-clock-time64 \
tst-clock2-time64 \
+ tst-clock_gettime-time64 \
tst-clock_nanosleep-time64 \
tst-clock_settime-time64 \
tst-cpuclock1-time64 \
tst-ctime-time64 \
tst-difftime-time64 \
+ tst-gettimeofday-time64 \
tst-gmtime-time64 \
tst-itimer-time64 \
tst-mktime4-time64 \
tst-settimeofday-time64 \
tst-strftime4-time64 \
+ tst-time-time64 \
tst-timegm-time64 \
tst-timespec_get-time64 \
tst-timespec_getres-time64 \
diff --git a/time/tst-clock_gettime-time64.c b/time/tst-clock_gettime-time64.c
new file mode 100644
index 0000000000000000..5b215d11f8a0a424
--- /dev/null
+++ b/time/tst-clock_gettime-time64.c
@@ -0,0 +1 @@
+#include "tst-clock_gettime.c"
diff --git a/time/tst-clock_gettime.c b/time/tst-clock_gettime.c
new file mode 100644
index 0000000000000000..51f24c0be2084a91
--- /dev/null
+++ b/time/tst-clock_gettime.c
@@ -0,0 +1,184 @@
+/* Test clock_gettime function.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <time.h>
+
+#include <support/check.h>
+#include <support/test-driver.h>
+#include <support/xsignal.h>
+
+/* Compare two struct timespec values, returning a value -1, 0 or 1. */
+
+int
+compare_timespec (const struct timespec *tv1, const struct timespec *tv2)
+{
+ if (tv1->tv_sec < tv2->tv_sec)
+ return -1;
+ if (tv1->tv_sec > tv2->tv_sec)
+ return 1;
+ if (tv1->tv_nsec < tv2->tv_nsec)
+ return -1;
+ if (tv1->tv_nsec > tv2->tv_nsec)
+ return 1;
+ return 0;
+}
+
+struct test_clockid
+{
+ clockid_t clockid;
+ const char *name;
+ bool is_cputime;
+ bool fail_ok;
+};
+
+#define CLOCK(clockid) { clockid, # clockid, false, false }
+#define CLOCK_CPU(clockid) { clockid, # clockid, true, false }
+#define CLOCK_FAIL_OK(clockid) { clockid, # clockid, false, true }
+
+static const struct test_clockid clocks[] =
+ {
+ CLOCK (CLOCK_REALTIME),
+#ifdef CLOCK_MONOTONIC
+ CLOCK (CLOCK_MONOTONIC),
+#endif
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+ CLOCK_CPU (CLOCK_PROCESS_CPUTIME_ID),
+#endif
+#ifdef CLOCK_THREAD_CPUTIME_ID
+ CLOCK_CPU (CLOCK_THREAD_CPUTIME_ID),
+#endif
+#ifdef CLOCK_MONOTONIC_RAW
+ CLOCK (CLOCK_MONOTONIC_RAW),
+#endif
+#ifdef CLOCK_REALTIME_COARSE
+ CLOCK (CLOCK_REALTIME_COARSE),
+#endif
+#ifdef CLOCK_MONOTONIC_COARSE
+ CLOCK (CLOCK_MONOTONIC_COARSE),
+#endif
+#ifdef CLOCK_BOOTTIME
+ CLOCK (CLOCK_BOOTTIME),
+#endif
+#ifdef CLOCK_REALTIME_ALARM
+ CLOCK_FAIL_OK (CLOCK_REALTIME_ALARM),
+#endif
+#ifdef CLOCK_BOOTTIME_ALARM
+ CLOCK_FAIL_OK (CLOCK_BOOTTIME_ALARM),
+#endif
+#ifdef CLOCK_TAI
+ CLOCK (CLOCK_TAI),
+#endif
+ };
+
+
+volatile int sigalrm_received;
+
+void
+handle_sigalrm (int sig)
+{
+ sigalrm_received = 1;
+}
+
+int
+do_test (void)
+{
+ /* Verify that the calls to clock_gettime succeed, that the time does
+ not decrease, and that time returns a truncated (not rounded)
+ version of the time. */
+ for (size_t i = 0; i < sizeof clocks / sizeof clocks[0]; i++)
+ {
+ printf ("testing %s\n", clocks[i].name);
+ struct timespec ts1, ts2, ts3;
+ int ret;
+ time_t t1;
+ t1 = time (NULL);
+ TEST_VERIFY_EXIT (t1 != (time_t) -1);
+ ret = clock_gettime (clocks[i].clockid, &ts1);
+ if (clocks[i].fail_ok && ret == -1)
+ {
+ printf ("failed (OK for this clock): %m\n");
+ continue;
+ }
+ TEST_VERIFY_EXIT (ret == 0);
+ if (clocks[i].clockid == CLOCK_REALTIME)
+ TEST_VERIFY (t1 <= ts1.tv_sec);
+ TEST_VERIFY (ts1.tv_nsec >= 0);
+ TEST_VERIFY (ts1.tv_nsec < 1000000000);
+ ret = clock_gettime (clocks[i].clockid, &ts2);
+ TEST_VERIFY_EXIT (ret == 0);
+ TEST_VERIFY (compare_timespec (&ts1, &ts2) <= 0);
+ TEST_VERIFY (ts2.tv_nsec >= 0);
+ TEST_VERIFY (ts2.tv_nsec < 1000000000);
+ /* Also verify that after sleeping, the time returned has
+ increased. Repeat several times to verify that each time,
+ the time from the time function is truncated not rounded.
+ For CPU time clocks, the time spent spinning on the CPU, and
+ so whether we end in the later half of a second, is not
+ predictable; thus, only test once for those clocks. */
+ const struct timespec duration = { .tv_nsec = 100000000 };
+ for (int j = 0; j < 5; j++)
+ {
+ if (clocks[i].is_cputime)
+ {
+ timer_t timer;
+ ret = timer_create (CLOCK_PROCESS_CPUTIME_ID, NULL, &timer);
+ TEST_VERIFY_EXIT (ret == 0);
+ sigalrm_received = 0;
+ xsignal (SIGALRM, handle_sigalrm);
+ struct itimerspec t =
+ { .it_value =
+ {
+ .tv_sec = 0,
+ .tv_nsec = 200000000
+ }
+ };
+ ret = timer_settime (timer, 0, &t, NULL);
+ TEST_VERIFY_EXIT (ret == 0);
+ while (sigalrm_received == 0)
+ ;
+ xsignal (SIGALRM, SIG_DFL);
+ ret = timer_delete (timer);
+ TEST_VERIFY_EXIT (ret == 0);
+ }
+ else
+ {
+ ret = nanosleep (&duration, NULL);
+ TEST_VERIFY_EXIT (ret == 0);
+ }
+ t1 = time (NULL);
+ TEST_VERIFY_EXIT (t1 != (time_t) -1);
+ ret = clock_gettime (clocks[i].clockid, &ts3);
+ TEST_VERIFY_EXIT (ret == 0);
+ TEST_VERIFY (compare_timespec (&ts2, &ts3) < 0);
+ if (clocks[i].clockid == CLOCK_REALTIME)
+ TEST_VERIFY (t1 <= ts3.tv_sec);
+ TEST_VERIFY (ts3.tv_nsec >= 0);
+ TEST_VERIFY (ts3.tv_nsec < 1000000000);
+ ts2 = ts3;
+ if (clocks[i].is_cputime)
+ break;
+ }
+ }
+ return 0;
+}
+
+#define TIMEOUT 60
+
+#include <support/test-driver.c>
diff --git a/time/tst-gettimeofday-time64.c b/time/tst-gettimeofday-time64.c
new file mode 100644
index 0000000000000000..6c08761ef995ce7c
--- /dev/null
+++ b/time/tst-gettimeofday-time64.c
@@ -0,0 +1 @@
+#include "tst-gettimeofday.c"
diff --git a/time/tst-gettimeofday.c b/time/tst-gettimeofday.c
new file mode 100644
index 0000000000000000..978ae28587d486f2
--- /dev/null
+++ b/time/tst-gettimeofday.c
@@ -0,0 +1,93 @@
+/* Test gettimeofday function.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sys/time.h>
+#include <time.h>
+
+#include <support/check.h>
+#include <support/test-driver.h>
+
+/* Compare two struct timeval values, returning a value -1, 0 or 1. */
+
+int
+compare_timeval (const struct timeval *tv1, const struct timeval *tv2)
+{
+ if (tv1->tv_sec < tv2->tv_sec)
+ return -1;
+ if (tv1->tv_sec > tv2->tv_sec)
+ return 1;
+ if (tv1->tv_usec < tv2->tv_usec)
+ return -1;
+ if (tv1->tv_usec > tv2->tv_usec)
+ return 1;
+ return 0;
+}
+
+int
+do_test (void)
+{
+ struct timeval tv1, tv2, tv3;
+ int ret;
+ time_t t1;
+ /* Verify that the calls to gettimeofday succeed, that the time does
+ not decrease, and that time returns a truncated (not rounded)
+ version of the time. */
+ t1 = time (NULL);
+ TEST_VERIFY_EXIT (t1 != (time_t) -1);
+ ret = gettimeofday (&tv1, NULL);
+ TEST_VERIFY_EXIT (ret == 0);
+ TEST_VERIFY (t1 <= tv1.tv_sec);
+ TEST_VERIFY (tv1.tv_usec >= 0);
+ TEST_VERIFY (tv1.tv_usec < 1000000);
+ ret = gettimeofday (&tv2, NULL);
+ TEST_VERIFY_EXIT (ret == 0);
+ TEST_VERIFY (compare_timeval (&tv1, &tv2) <= 0);
+ TEST_VERIFY (tv2.tv_usec >= 0);
+ TEST_VERIFY (tv2.tv_usec < 1000000);
+ /* Also verify that after sleeping, the time returned has increased.
+ Repeat several times to verify that each time, the time from the
+ time function is truncated not rounded. */
+ const struct timespec duration = { .tv_nsec = 100000000 };
+ for (int i = 0; i < 10; i++)
+ {
+ ret = nanosleep (&duration, NULL);
+ TEST_VERIFY_EXIT (ret == 0);
+ t1 = time (NULL);
+ TEST_VERIFY_EXIT (t1 != (time_t) -1);
+ ret = gettimeofday (&tv3, NULL);
+ TEST_VERIFY_EXIT (ret == 0);
+ TEST_VERIFY (compare_timeval (&tv2, &tv3) < 0);
+ TEST_VERIFY (t1 <= tv3.tv_sec);
+ TEST_VERIFY (tv3.tv_usec >= 0);
+ TEST_VERIFY (tv3.tv_usec < 1000000);
+ tv2 = tv3;
+ }
+ /* Also test with the obsolete tz argument not being NULL. */
+ struct timezone tz = { 0 };
+ t1 = time (NULL);
+ TEST_VERIFY_EXIT (t1 != (time_t) -1);
+ ret = gettimeofday (&tv3, &tz);
+ TEST_VERIFY_EXIT (ret == 0);
+ TEST_VERIFY (t1 <= tv3.tv_sec);
+ TEST_VERIFY (compare_timeval (&tv2, &tv3) <= 0);
+ TEST_VERIFY (tv3.tv_usec >= 0);
+ TEST_VERIFY (tv3.tv_usec < 1000000);
+ return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/time/tst-time-time64.c b/time/tst-time-time64.c
new file mode 100644
index 0000000000000000..30e8d3c86ef973cc
--- /dev/null
+++ b/time/tst-time-time64.c
@@ -0,0 +1 @@
+#include "tst-time.c"
diff --git a/time/tst-time.c b/time/tst-time.c
new file mode 100644
index 0000000000000000..7f24bed3530e1c1e
--- /dev/null
+++ b/time/tst-time.c
@@ -0,0 +1,51 @@
+/* Test time function.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <time.h>
+#include <unistd.h>
+
+#include <support/check.h>
+#include <support/test-driver.h>
+
+int
+do_test (void)
+{
+ time_t t1, t2, t3, t4, t5, t6;
+ /* Verify that the calls to time succeed, that the value returned
+ directly equals that returned through the pointer passed, and
+ that the time does not decrease. */
+ t1 = time (&t2);
+ TEST_VERIFY_EXIT (t1 != (time_t) -1);
+ TEST_VERIFY (t1 == t2);
+ t3 = time (NULL);
+ TEST_VERIFY_EXIT (t3 != (time_t) -1);
+ TEST_VERIFY (t3 >= t1);
+ /* Also verify that after sleeping, the time returned has
+ increased. */
+ sleep (2);
+ t4 = time (&t5);
+ TEST_VERIFY_EXIT (t4 != (time_t) -1);
+ TEST_VERIFY (t4 == t5);
+ TEST_VERIFY (t4 > t3);
+ t6 = time (NULL);
+ TEST_VERIFY_EXIT (t6 != (time_t) -1);
+ TEST_VERIFY (t6 >= t4);
+ return 0;
+}
+
+#include <support/test-driver.c>

24
glibc-RHEL-119386-3.patch Normal file
View File

@ -0,0 +1,24 @@
commit 9c0903fb7388f645d23b26160ed3669a116189fe
Author: Joseph Myers <josmyers@redhat.com>
Date: Thu Oct 31 17:43:52 2024 +0000
Link tst-clock_gettime with $(librt)
This is needed to avoid link failures for the timer_* functions on
Hurd.
Tested with build-many-glibcs.py for i686-gnu.
diff --git a/time/Makefile b/time/Makefile
index b57963c1c6443770..77a6ded3a5028746 100644
--- a/time/Makefile
+++ b/time/Makefile
@@ -126,6 +126,8 @@ $(objpfx)tst-strftime2.out: $(gen-locales)
$(objpfx)tst-strftime3.out: $(gen-locales)
endif
+$(objpfx)tst-clock_gettime: $(librt)
+$(objpfx)tst-clock_gettime-time64: $(librt)
$(objpfx)tst-clock_nanosleep: $(librt)
$(objpfx)tst-clock_nanosleep-time64: $(librt)

2531
glibc-RHEL-119390-1.patch Normal file

File diff suppressed because it is too large Load Diff

428
glibc-RHEL-119390-10.patch Normal file
View File

@ -0,0 +1,428 @@
commit 6018ba05c01b1e17d77742a123e8c443f8fc713c
Author: Maciej W. Rozycki <macro@redhat.com>
Date: Thu Nov 7 06:14:24 2024 +0000
stdio-common: Add tests for formatted vfprintf output specifiers
Wire vfprintf into test infrastructure for formatted printf output
specifiers.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 01018c53b0f4a13f..65f3ddc13141e4d6 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -24,7 +24,7 @@ subdir := stdio-common
include ../Makeconfig
# List of markers for printf family function tests.
-xprintf-funcs := p as d f s sn v vas vd
+xprintf-funcs := p as d f s sn v vas vd vf
# List of data types and formats for individual per-conversion printf tests.
fmt-convs := double ldouble
diff --git a/stdio-common/tst-printf-format-vf-c.c b/stdio-common/tst-printf-format-vf-c.c
new file mode 100644
index 0000000000000000..b31b551327bb4ebe
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-c.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for the 'c' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-c.c"
diff --git a/stdio-common/tst-printf-format-vf-char.c b/stdio-common/tst-printf-format-vf-char.c
new file mode 100644
index 0000000000000000..daa2886adb9f1636
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-char.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for signed char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-char.c"
diff --git a/stdio-common/tst-printf-format-vf-double.c b/stdio-common/tst-printf-format-vf-double.c
new file mode 100644
index 0000000000000000..63ec8c0cae6c272e
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-double.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-double.c"
diff --git a/stdio-common/tst-printf-format-vf-int.c b/stdio-common/tst-printf-format-vf-int.c
new file mode 100644
index 0000000000000000..e687099b1052d040
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-int.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-int.c"
diff --git a/stdio-common/tst-printf-format-vf-ldouble.c b/stdio-common/tst-printf-format-vf-ldouble.c
new file mode 100644
index 0000000000000000..801e359dab8ca159
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-ldouble.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for long double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-ldouble.c"
diff --git a/stdio-common/tst-printf-format-vf-llong.c b/stdio-common/tst-printf-format-vf-llong.c
new file mode 100644
index 0000000000000000..a1b9ae340c4aa5ff
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-llong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-llong.c"
diff --git a/stdio-common/tst-printf-format-vf-long.c b/stdio-common/tst-printf-format-vf-long.c
new file mode 100644
index 0000000000000000..7afc127b844d1e10
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-long.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-long.c"
diff --git a/stdio-common/tst-printf-format-vf-s.c b/stdio-common/tst-printf-format-vf-s.c
new file mode 100644
index 0000000000000000..6faa6d0dfe4b4132
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-s.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for the 's' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-s.c"
diff --git a/stdio-common/tst-printf-format-vf-short.c b/stdio-common/tst-printf-format-vf-short.c
new file mode 100644
index 0000000000000000..c3d17ca501974f83
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-short.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-short.c"
diff --git a/stdio-common/tst-printf-format-vf-uchar.c b/stdio-common/tst-printf-format-vf-uchar.c
new file mode 100644
index 0000000000000000..643438c693abf51f
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-uchar.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for unsigned char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-uchar.c"
diff --git a/stdio-common/tst-printf-format-vf-uint.c b/stdio-common/tst-printf-format-vf-uint.c
new file mode 100644
index 0000000000000000..844192cc9ccf5db8
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-uint.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for unsigned int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-uint.c"
diff --git a/stdio-common/tst-printf-format-vf-ullong.c b/stdio-common/tst-printf-format-vf-ullong.c
new file mode 100644
index 0000000000000000..ab58abd7f8c4e267
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-ullong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for unsigned long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-ullong.c"
diff --git a/stdio-common/tst-printf-format-vf-ulong.c b/stdio-common/tst-printf-format-vf-ulong.c
new file mode 100644
index 0000000000000000..e76251f9d0e3337f
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-ulong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for unsigned long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-ulong.c"
diff --git a/stdio-common/tst-printf-format-vf-ushort.c b/stdio-common/tst-printf-format-vf-ushort.c
new file mode 100644
index 0000000000000000..bf78a919a6599054
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf-ushort.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vfprintf' output for unsigned short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vf.h"
+#include "tst-printf-format-skeleton-ushort.c"
diff --git a/stdio-common/tst-printf-format-vf.h b/stdio-common/tst-printf-format-vf.h
new file mode 100644
index 0000000000000000..f824364f4c041ab8
--- /dev/null
+++ b/stdio-common/tst-printf-format-vf.h
@@ -0,0 +1,34 @@
+/* Test feature wrapper for formatted 'vfprintf' output.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+static int
+printf_under_test (const char *restrict fmt, ...)
+{
+ va_list ap;
+ int result;
+
+ va_start (ap, fmt);
+ result = vfprintf (stdout, fmt, ap);
+ va_end (ap);
+ if (result < 0)
+ perror ("vfprintf");
+ return result;
+}

458
glibc-RHEL-119390-11.patch Normal file
View File

@ -0,0 +1,458 @@
commit ac72dd90905e1693c108c9f36f0c7e79d6ad5501
Author: Maciej W. Rozycki <macro@redhat.com>
Date: Thu Nov 7 06:14:24 2024 +0000
stdio-common: Add tests for formatted vsprintf output specifiers
Wire vsprintf into test infrastructure for formatted printf output
specifiers.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 65f3ddc13141e4d6..529fc90945524a7b 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -24,7 +24,7 @@ subdir := stdio-common
include ../Makeconfig
# List of markers for printf family function tests.
-xprintf-funcs := p as d f s sn v vas vd vf
+xprintf-funcs := p as d f s sn v vas vd vf vs
# List of data types and formats for individual per-conversion printf tests.
fmt-convs := double ldouble
diff --git a/stdio-common/tst-printf-format-vs-c.c b/stdio-common/tst-printf-format-vs-c.c
new file mode 100644
index 0000000000000000..72bcb5f04957c4ef
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-c.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for the 'c' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-c.c"
diff --git a/stdio-common/tst-printf-format-vs-char.c b/stdio-common/tst-printf-format-vs-char.c
new file mode 100644
index 0000000000000000..30135cf1be1616ac
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-char.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for signed char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-char.c"
diff --git a/stdio-common/tst-printf-format-vs-double.c b/stdio-common/tst-printf-format-vs-double.c
new file mode 100644
index 0000000000000000..56290d383ebc33c8
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-double.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-double.c"
diff --git a/stdio-common/tst-printf-format-vs-int.c b/stdio-common/tst-printf-format-vs-int.c
new file mode 100644
index 0000000000000000..f954e1f4f8277c64
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-int.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-int.c"
diff --git a/stdio-common/tst-printf-format-vs-ldouble.c b/stdio-common/tst-printf-format-vs-ldouble.c
new file mode 100644
index 0000000000000000..3088e42813abd537
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-ldouble.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for long double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-ldouble.c"
diff --git a/stdio-common/tst-printf-format-vs-llong.c b/stdio-common/tst-printf-format-vs-llong.c
new file mode 100644
index 0000000000000000..348ec2c3d73b8f88
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-llong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-llong.c"
diff --git a/stdio-common/tst-printf-format-vs-long.c b/stdio-common/tst-printf-format-vs-long.c
new file mode 100644
index 0000000000000000..874e3ba479eda8fb
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-long.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-long.c"
diff --git a/stdio-common/tst-printf-format-vs-s.c b/stdio-common/tst-printf-format-vs-s.c
new file mode 100644
index 0000000000000000..051f1b79bf2cb028
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-s.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for the 's' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-s.c"
diff --git a/stdio-common/tst-printf-format-vs-short.c b/stdio-common/tst-printf-format-vs-short.c
new file mode 100644
index 0000000000000000..36595a82b445e8a3
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-short.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-short.c"
diff --git a/stdio-common/tst-printf-format-vs-uchar.c b/stdio-common/tst-printf-format-vs-uchar.c
new file mode 100644
index 0000000000000000..8e35614110bd7d57
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-uchar.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for unsigned char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-uchar.c"
diff --git a/stdio-common/tst-printf-format-vs-uint.c b/stdio-common/tst-printf-format-vs-uint.c
new file mode 100644
index 0000000000000000..4a13d6c409ad4245
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-uint.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for unsigned int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-uint.c"
diff --git a/stdio-common/tst-printf-format-vs-ullong.c b/stdio-common/tst-printf-format-vs-ullong.c
new file mode 100644
index 0000000000000000..313dfaf02bda3059
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-ullong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for unsigned long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-ullong.c"
diff --git a/stdio-common/tst-printf-format-vs-ulong.c b/stdio-common/tst-printf-format-vs-ulong.c
new file mode 100644
index 0000000000000000..5ab7e2e7fe9b237a
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-ulong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for unsigned long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-ulong.c"
diff --git a/stdio-common/tst-printf-format-vs-ushort.c b/stdio-common/tst-printf-format-vs-ushort.c
new file mode 100644
index 0000000000000000..a4af138a2f8485ab
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs-ushort.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsprintf' output for unsigned short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vs.h"
+#include "tst-printf-format-skeleton-ushort.c"
diff --git a/stdio-common/tst-printf-format-vs.h b/stdio-common/tst-printf-format-vs.h
new file mode 100644
index 0000000000000000..e00e1b085bc58150
--- /dev/null
+++ b/stdio-common/tst-printf-format-vs.h
@@ -0,0 +1,64 @@
+/* Test feature wrapper for formatted 'vsprintf' output.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <support/next_to_fault.h>
+
+#define SPRINTF_BUFFER_SIZE 65536
+
+static struct support_next_to_fault ntf;
+
+#define PREPARE printf_under_test_init
+static void
+printf_under_test_init (int argc, char **argv)
+{
+ ntf = support_next_to_fault_allocate (SPRINTF_BUFFER_SIZE);
+}
+
+static void __attribute__ ((destructor))
+printf_under_test_fini (void)
+{
+ support_next_to_fault_free (&ntf);
+}
+
+static int
+printf_under_test (const char *restrict fmt, ...)
+{
+ char *str = ntf.buffer;
+ va_list ap;
+ int result;
+
+ va_start (ap, fmt);
+ result = vsprintf (str, fmt, ap);
+ va_end (ap);
+ if (result < 0)
+ {
+ perror ("vsprintf");
+ goto out;
+ }
+ if (fwrite (str, sizeof (*str), result, stdout) != result)
+ {
+ perror ("fwrite");
+ result = -1;
+ }
+out:
+ return result;
+}

458
glibc-RHEL-119390-12.patch Normal file
View File

@ -0,0 +1,458 @@
commit 11a2169e4066e6b848f1e6e4c31ec4e2210cecd8
Author: Maciej W. Rozycki <macro@redhat.com>
Date: Thu Nov 7 06:14:24 2024 +0000
stdio-common: Add tests for formatted vsnprintf output specifiers
Wire vsnprintf into test infrastructure for formatted printf output
specifiers.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 529fc90945524a7b..cb86a6259ff5823b 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -24,7 +24,7 @@ subdir := stdio-common
include ../Makeconfig
# List of markers for printf family function tests.
-xprintf-funcs := p as d f s sn v vas vd vf vs
+xprintf-funcs := p as d f s sn v vas vd vf vs vsn
# List of data types and formats for individual per-conversion printf tests.
fmt-convs := double ldouble
diff --git a/stdio-common/tst-printf-format-vsn-c.c b/stdio-common/tst-printf-format-vsn-c.c
new file mode 100644
index 0000000000000000..47c8a0f5dfbddb49
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-c.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for the 'c' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-c.c"
diff --git a/stdio-common/tst-printf-format-vsn-char.c b/stdio-common/tst-printf-format-vsn-char.c
new file mode 100644
index 0000000000000000..48d4393a46d80e33
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-char.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for signed char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-char.c"
diff --git a/stdio-common/tst-printf-format-vsn-double.c b/stdio-common/tst-printf-format-vsn-double.c
new file mode 100644
index 0000000000000000..06c1003fb5fde4b0
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-double.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-double.c"
diff --git a/stdio-common/tst-printf-format-vsn-int.c b/stdio-common/tst-printf-format-vsn-int.c
new file mode 100644
index 0000000000000000..2aae92616f2f6007
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-int.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-int.c"
diff --git a/stdio-common/tst-printf-format-vsn-ldouble.c b/stdio-common/tst-printf-format-vsn-ldouble.c
new file mode 100644
index 0000000000000000..0b5aafb124307526
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-ldouble.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for long double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-ldouble.c"
diff --git a/stdio-common/tst-printf-format-vsn-llong.c b/stdio-common/tst-printf-format-vsn-llong.c
new file mode 100644
index 0000000000000000..8e79b8384f6858c6
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-llong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-llong.c"
diff --git a/stdio-common/tst-printf-format-vsn-long.c b/stdio-common/tst-printf-format-vsn-long.c
new file mode 100644
index 0000000000000000..e94f7dec23ece2ca
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-long.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-long.c"
diff --git a/stdio-common/tst-printf-format-vsn-s.c b/stdio-common/tst-printf-format-vsn-s.c
new file mode 100644
index 0000000000000000..efd8a4c23f5f42e6
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-s.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for the 's' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-s.c"
diff --git a/stdio-common/tst-printf-format-vsn-short.c b/stdio-common/tst-printf-format-vsn-short.c
new file mode 100644
index 0000000000000000..3d375b59e7a990f0
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-short.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-short.c"
diff --git a/stdio-common/tst-printf-format-vsn-uchar.c b/stdio-common/tst-printf-format-vsn-uchar.c
new file mode 100644
index 0000000000000000..6d0f396481d3e2d6
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-uchar.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for unsigned char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-uchar.c"
diff --git a/stdio-common/tst-printf-format-vsn-uint.c b/stdio-common/tst-printf-format-vsn-uint.c
new file mode 100644
index 0000000000000000..b637f7bdb11f7913
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-uint.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for unsigned int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-uint.c"
diff --git a/stdio-common/tst-printf-format-vsn-ullong.c b/stdio-common/tst-printf-format-vsn-ullong.c
new file mode 100644
index 0000000000000000..d2442715f087acaf
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-ullong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for unsigned long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-ullong.c"
diff --git a/stdio-common/tst-printf-format-vsn-ulong.c b/stdio-common/tst-printf-format-vsn-ulong.c
new file mode 100644
index 0000000000000000..67417d174043a605
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-ulong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for unsigned long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-ulong.c"
diff --git a/stdio-common/tst-printf-format-vsn-ushort.c b/stdio-common/tst-printf-format-vsn-ushort.c
new file mode 100644
index 0000000000000000..396ea43ae7067c86
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn-ushort.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vsnprintf' output for unsigned short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vsn.h"
+#include "tst-printf-format-skeleton-ushort.c"
diff --git a/stdio-common/tst-printf-format-vsn.h b/stdio-common/tst-printf-format-vsn.h
new file mode 100644
index 0000000000000000..4f25f1af767dc221
--- /dev/null
+++ b/stdio-common/tst-printf-format-vsn.h
@@ -0,0 +1,64 @@
+/* Test feature wrapper for formatted 'vsnprintf' output.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <support/next_to_fault.h>
+
+#define SPRINTF_BUFFER_SIZE 65536
+
+static struct support_next_to_fault ntf;
+
+#define PREPARE printf_under_test_init
+static void
+printf_under_test_init (int argc, char **argv)
+{
+ ntf = support_next_to_fault_allocate (SPRINTF_BUFFER_SIZE);
+}
+
+static void __attribute__ ((destructor))
+printf_under_test_fini (void)
+{
+ support_next_to_fault_free (&ntf);
+}
+
+static int
+printf_under_test (const char *restrict fmt, ...)
+{
+ char *str = ntf.buffer;
+ va_list ap;
+ int result;
+
+ va_start (ap, fmt);
+ result = vsnprintf (str, ntf.length, fmt, ap);
+ va_end (ap);
+ if (result < 0)
+ {
+ perror ("vsnprintf");
+ goto out;
+ }
+ if (fwrite (str, sizeof (*str), result, stdout) != result)
+ {
+ perror ("fwrite");
+ result = -1;
+ }
+out:
+ return result;
+}

459
glibc-RHEL-119390-2.patch Normal file
View File

@ -0,0 +1,459 @@
commit b350a60b6ecd77b7ec30c7969de1df8b73642e55
Author: Maciej W. Rozycki <macro@redhat.com>
Date: Thu Nov 7 06:14:24 2024 +0000
stdio-common: Add tests for formatted asprintf output specifiers
Wire asprintf into test infrastructure for formatted printf output
specifiers.
Owing to mtrace logging of lots of memory allocation calls these tests
take a considerable amount of time to complete, except for the character
conversion, taking from 00m20s for 'tst-printf-format-as-s --direct s',
through 01m10s and 03m53s for 'tst-printf-format-as-char --direct i' and
'tst-printf-format-as-double --direct f' respectively, to 19m24s for
'tst-printf-format-as-ldouble --direct f', all in standalone execution
from NFS on a RISC-V FU740@1.2GHz system and with output redirected over
100Mbps network via SSH. It is with the skeleton's stub implementation
of dladdr(3); execution times with regular dladdr(3) are up to over
twice longer.
Set timeouts for the tests accordingly then, with a global default for
all the asprintf tests, and then individual higher settings for double
and long double tests each.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 3e50c5a20ed7e679..54fb75e6232ddc2a 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -24,7 +24,7 @@ subdir := stdio-common
include ../Makeconfig
# List of markers for printf family function tests.
-xprintf-funcs := p
+xprintf-funcs := p as
# List of data types and formats for individual per-conversion printf tests.
fmt-convs := double ldouble
diff --git a/stdio-common/tst-printf-format-as-c.c b/stdio-common/tst-printf-format-as-c.c
new file mode 100644
index 0000000000000000..9eaf7aec73201b4d
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-c.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'asprintf' output for the 'c' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-c.c"
diff --git a/stdio-common/tst-printf-format-as-char.c b/stdio-common/tst-printf-format-as-char.c
new file mode 100644
index 0000000000000000..d9266d5760fb8808
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-char.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'asprintf' output for signed char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-char.c"
diff --git a/stdio-common/tst-printf-format-as-double.c b/stdio-common/tst-printf-format-as-double.c
new file mode 100644
index 0000000000000000..370ce8c11bd94fe8
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-double.c
@@ -0,0 +1,22 @@
+/* Test for formatted 'asprintf' output for double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define TIMEOUT (DEFAULT_TIMEOUT * 32)
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-double.c"
diff --git a/stdio-common/tst-printf-format-as-int.c b/stdio-common/tst-printf-format-as-int.c
new file mode 100644
index 0000000000000000..e6e10a9769f079a4
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-int.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'asprintf' output for int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-int.c"
diff --git a/stdio-common/tst-printf-format-as-ldouble.c b/stdio-common/tst-printf-format-as-ldouble.c
new file mode 100644
index 0000000000000000..e7f72208cec76078
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-ldouble.c
@@ -0,0 +1,22 @@
+/* Test for formatted 'asprintf' output for long double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define TIMEOUT (DEFAULT_TIMEOUT * 128)
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-ldouble.c"
diff --git a/stdio-common/tst-printf-format-as-llong.c b/stdio-common/tst-printf-format-as-llong.c
new file mode 100644
index 0000000000000000..beaad73c235c344e
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-llong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'asprintf' output for long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-llong.c"
diff --git a/stdio-common/tst-printf-format-as-long.c b/stdio-common/tst-printf-format-as-long.c
new file mode 100644
index 0000000000000000..7d968a873c3b5e29
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-long.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'asprintf' output for long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-long.c"
diff --git a/stdio-common/tst-printf-format-as-s.c b/stdio-common/tst-printf-format-as-s.c
new file mode 100644
index 0000000000000000..baa883d5316cbf2f
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-s.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'asprintf' output for the 's' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-s.c"
diff --git a/stdio-common/tst-printf-format-as-short.c b/stdio-common/tst-printf-format-as-short.c
new file mode 100644
index 0000000000000000..8d0b078815f299a6
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-short.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'asprintf' output for short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-short.c"
diff --git a/stdio-common/tst-printf-format-as-uchar.c b/stdio-common/tst-printf-format-as-uchar.c
new file mode 100644
index 0000000000000000..8e46254a2f32b457
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-uchar.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'asprintf' output for unsigned char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-uchar.c"
diff --git a/stdio-common/tst-printf-format-as-uint.c b/stdio-common/tst-printf-format-as-uint.c
new file mode 100644
index 0000000000000000..8cf38d71a2f53358
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-uint.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'asprintf' output for unsigned int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-uint.c"
diff --git a/stdio-common/tst-printf-format-as-ullong.c b/stdio-common/tst-printf-format-as-ullong.c
new file mode 100644
index 0000000000000000..30b31ed8f7b36ae1
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-ullong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'asprintf' output for unsigned long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-ullong.c"
diff --git a/stdio-common/tst-printf-format-as-ulong.c b/stdio-common/tst-printf-format-as-ulong.c
new file mode 100644
index 0000000000000000..9b108aa2b5132686
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-ulong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'asprintf' output for unsigned long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-ulong.c"
diff --git a/stdio-common/tst-printf-format-as-ushort.c b/stdio-common/tst-printf-format-as-ushort.c
new file mode 100644
index 0000000000000000..44b912fc38c46d28
--- /dev/null
+++ b/stdio-common/tst-printf-format-as-ushort.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'asprintf' output for unsigned short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-as.h"
+#include "tst-printf-format-skeleton-ushort.c"
diff --git a/stdio-common/tst-printf-format-as.h b/stdio-common/tst-printf-format-as.h
new file mode 100644
index 0000000000000000..c30d2e2c42822f24
--- /dev/null
+++ b/stdio-common/tst-printf-format-as.h
@@ -0,0 +1,46 @@
+/* Test feature wrapper for formatted 'asprintf' output.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define printf_under_test(...) \
+({ \
+ __label__ out; \
+ int result; \
+ char *str; \
+ \
+ result = asprintf (&str, __VA_ARGS__); \
+ if (result < 0) \
+ { \
+ perror ("asprintf"); \
+ goto out; \
+ } \
+ if (fwrite (str, sizeof (*str), result, stdout) != result) \
+ { \
+ perror ("fwrite"); \
+ result = -1; \
+ } \
+ free (str); \
+out: \
+ result; \
+})
+
+#ifndef TIMEOUT
+# define TIMEOUT (DEFAULT_TIMEOUT * 12)
+#endif

452
glibc-RHEL-119390-3.patch Normal file
View File

@ -0,0 +1,452 @@
commit b3e8a756ad569fd31181b74b3729d29df3eb55f3
Author: Maciej W. Rozycki <macro@redhat.com>
Date: Thu Nov 7 06:14:24 2024 +0000
stdio-common: Add tests for formatted dprintf output specifiers
Wire dprintf into test infrastructure for formatted printf output
specifiers.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 54fb75e6232ddc2a..850bc89517931652 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -24,7 +24,7 @@ subdir := stdio-common
include ../Makeconfig
# List of markers for printf family function tests.
-xprintf-funcs := p as
+xprintf-funcs := p as d
# List of data types and formats for individual per-conversion printf tests.
fmt-convs := double ldouble
diff --git a/stdio-common/tst-printf-format-d-c.c b/stdio-common/tst-printf-format-d-c.c
new file mode 100644
index 0000000000000000..61fd06654d2a957e
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-c.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for the 'c' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-c.c"
diff --git a/stdio-common/tst-printf-format-d-char.c b/stdio-common/tst-printf-format-d-char.c
new file mode 100644
index 0000000000000000..baa6e1683e923841
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-char.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for signed char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-char.c"
diff --git a/stdio-common/tst-printf-format-d-double.c b/stdio-common/tst-printf-format-d-double.c
new file mode 100644
index 0000000000000000..e432a9570d46e776
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-double.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-double.c"
diff --git a/stdio-common/tst-printf-format-d-int.c b/stdio-common/tst-printf-format-d-int.c
new file mode 100644
index 0000000000000000..6d59b23517b54a85
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-int.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-int.c"
diff --git a/stdio-common/tst-printf-format-d-ldouble.c b/stdio-common/tst-printf-format-d-ldouble.c
new file mode 100644
index 0000000000000000..67a2bae1bc4e0301
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-ldouble.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for long double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-ldouble.c"
diff --git a/stdio-common/tst-printf-format-d-llong.c b/stdio-common/tst-printf-format-d-llong.c
new file mode 100644
index 0000000000000000..950a2b84b9fc1abc
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-llong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-llong.c"
diff --git a/stdio-common/tst-printf-format-d-long.c b/stdio-common/tst-printf-format-d-long.c
new file mode 100644
index 0000000000000000..4fabb41b0b013011
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-long.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-long.c"
diff --git a/stdio-common/tst-printf-format-d-s.c b/stdio-common/tst-printf-format-d-s.c
new file mode 100644
index 0000000000000000..1ef896e6e7d146bc
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-s.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for the 's' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-s.c"
diff --git a/stdio-common/tst-printf-format-d-short.c b/stdio-common/tst-printf-format-d-short.c
new file mode 100644
index 0000000000000000..17767bb30d2f0d3e
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-short.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-short.c"
diff --git a/stdio-common/tst-printf-format-d-uchar.c b/stdio-common/tst-printf-format-d-uchar.c
new file mode 100644
index 0000000000000000..732479ecab2cdc4e
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-uchar.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for unsigned char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-uchar.c"
diff --git a/stdio-common/tst-printf-format-d-uint.c b/stdio-common/tst-printf-format-d-uint.c
new file mode 100644
index 0000000000000000..5b68aec803f653ac
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-uint.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for unsigned int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-uint.c"
diff --git a/stdio-common/tst-printf-format-d-ullong.c b/stdio-common/tst-printf-format-d-ullong.c
new file mode 100644
index 0000000000000000..0e20a1dccd58e84b
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-ullong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for unsigned long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-ullong.c"
diff --git a/stdio-common/tst-printf-format-d-ulong.c b/stdio-common/tst-printf-format-d-ulong.c
new file mode 100644
index 0000000000000000..62085ace806b6d33
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-ulong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for unsigned long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-ulong.c"
diff --git a/stdio-common/tst-printf-format-d-ushort.c b/stdio-common/tst-printf-format-d-ushort.c
new file mode 100644
index 0000000000000000..7d8ef76d60b70e2d
--- /dev/null
+++ b/stdio-common/tst-printf-format-d-ushort.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'dprintf' output for unsigned short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-d.h"
+#include "tst-printf-format-skeleton-ushort.c"
diff --git a/stdio-common/tst-printf-format-d.h b/stdio-common/tst-printf-format-d.h
new file mode 100644
index 0000000000000000..af7f26c17bde88ea
--- /dev/null
+++ b/stdio-common/tst-printf-format-d.h
@@ -0,0 +1,58 @@
+/* Test feature wrapper for formatted 'dprintf' output.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+/* We need to go through the POSIX-mandated dance to switch between
+ handles on an open file description. */
+
+#define printf_under_test(...) \
+({ \
+ __label__ out; \
+ int result; \
+ \
+ result = fflush (stdout); \
+ if (result == EOF) \
+ { \
+ perror ("fflush"); \
+ goto out; \
+ } \
+ result = lseek (STDOUT_FILENO, 0, SEEK_END); \
+ if (result < 0 && errno == ESPIPE) \
+ result = 0; \
+ if (result < 0) \
+ { \
+ perror ("lseek"); \
+ goto out; \
+ } \
+ result = dprintf (STDOUT_FILENO, __VA_ARGS__); \
+ if (result < 0) \
+ { \
+ perror ("dprintf"); \
+ goto out; \
+ } \
+ result = fseek (stdout, 0, SEEK_END); \
+ if (result < 0 && errno == ESPIPE) \
+ result = 0; \
+ if (result < 0) \
+ perror ("fseek"); \
+out: \
+ result; \
+})

423
glibc-RHEL-119390-4.patch Normal file
View File

@ -0,0 +1,423 @@
commit 1dc5cdc3da19e10d47e50a5ea2ea3ce62ee2fa82
Author: Maciej W. Rozycki <macro@redhat.com>
Date: Thu Nov 7 06:14:24 2024 +0000
stdio-common: Add tests for formatted fprintf output specifiers
Wire fprintf into test infrastructure for formatted printf output
specifiers.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 850bc89517931652..a40b4e15e346d170 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -24,7 +24,7 @@ subdir := stdio-common
include ../Makeconfig
# List of markers for printf family function tests.
-xprintf-funcs := p as d
+xprintf-funcs := p as d f
# List of data types and formats for individual per-conversion printf tests.
fmt-convs := double ldouble
diff --git a/stdio-common/tst-printf-format-f-c.c b/stdio-common/tst-printf-format-f-c.c
new file mode 100644
index 0000000000000000..1db9e2b5f3bb2d32
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-c.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for the 'c' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-c.c"
diff --git a/stdio-common/tst-printf-format-f-char.c b/stdio-common/tst-printf-format-f-char.c
new file mode 100644
index 0000000000000000..a492f318620bc82e
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-char.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for signed char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-char.c"
diff --git a/stdio-common/tst-printf-format-f-double.c b/stdio-common/tst-printf-format-f-double.c
new file mode 100644
index 0000000000000000..906ef0b90b9e7d1d
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-double.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-double.c"
diff --git a/stdio-common/tst-printf-format-f-int.c b/stdio-common/tst-printf-format-f-int.c
new file mode 100644
index 0000000000000000..92dc0c919771ae26
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-int.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-int.c"
diff --git a/stdio-common/tst-printf-format-f-ldouble.c b/stdio-common/tst-printf-format-f-ldouble.c
new file mode 100644
index 0000000000000000..0a0c88d64c873061
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-ldouble.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for long double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-ldouble.c"
diff --git a/stdio-common/tst-printf-format-f-llong.c b/stdio-common/tst-printf-format-f-llong.c
new file mode 100644
index 0000000000000000..ceb8d035faaf0a12
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-llong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-llong.c"
diff --git a/stdio-common/tst-printf-format-f-long.c b/stdio-common/tst-printf-format-f-long.c
new file mode 100644
index 0000000000000000..a4a5dca5ebd7b3a4
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-long.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-long.c"
diff --git a/stdio-common/tst-printf-format-f-s.c b/stdio-common/tst-printf-format-f-s.c
new file mode 100644
index 0000000000000000..da0e0f8bacab6992
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-s.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for the 's' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-s.c"
diff --git a/stdio-common/tst-printf-format-f-short.c b/stdio-common/tst-printf-format-f-short.c
new file mode 100644
index 0000000000000000..3abd134d0aeeef87
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-short.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-short.c"
diff --git a/stdio-common/tst-printf-format-f-uchar.c b/stdio-common/tst-printf-format-f-uchar.c
new file mode 100644
index 0000000000000000..f104cde37b322b60
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-uchar.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for unsigned char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-uchar.c"
diff --git a/stdio-common/tst-printf-format-f-uint.c b/stdio-common/tst-printf-format-f-uint.c
new file mode 100644
index 0000000000000000..0e1fdb4b367032a2
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-uint.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for unsigned int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-uint.c"
diff --git a/stdio-common/tst-printf-format-f-ullong.c b/stdio-common/tst-printf-format-f-ullong.c
new file mode 100644
index 0000000000000000..b4669fcbb7dd3282
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-ullong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for unsigned long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-ullong.c"
diff --git a/stdio-common/tst-printf-format-f-ulong.c b/stdio-common/tst-printf-format-f-ulong.c
new file mode 100644
index 0000000000000000..3f4f900362a153b5
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-ulong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for unsigned long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-ulong.c"
diff --git a/stdio-common/tst-printf-format-f-ushort.c b/stdio-common/tst-printf-format-f-ushort.c
new file mode 100644
index 0000000000000000..d49c2371403e20a6
--- /dev/null
+++ b/stdio-common/tst-printf-format-f-ushort.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'fprintf' output for unsigned short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-f.h"
+#include "tst-printf-format-skeleton-ushort.c"
diff --git a/stdio-common/tst-printf-format-f.h b/stdio-common/tst-printf-format-f.h
new file mode 100644
index 0000000000000000..efb3283a032f44a7
--- /dev/null
+++ b/stdio-common/tst-printf-format-f.h
@@ -0,0 +1,29 @@
+/* Test feature wrapper for formatted 'fprintf' output.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+
+#define printf_under_test(...) \
+({ \
+ int result; \
+ \
+ result = fprintf (stdout, __VA_ARGS__); \
+ if (result < 0) \
+ perror ("fprintf"); \
+ result; \
+})

454
glibc-RHEL-119390-5.patch Normal file
View File

@ -0,0 +1,454 @@
commit c683ac8520e8064e7be3a22922d80849271290ac
Author: Maciej W. Rozycki <macro@redhat.com>
Date: Thu Nov 7 06:14:24 2024 +0000
stdio-common: Add tests for formatted sprintf output specifiers
Wire sprintf into test infrastructure for formatted printf output
specifiers.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index a40b4e15e346d170..f4e4a8554319f16e 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -24,7 +24,7 @@ subdir := stdio-common
include ../Makeconfig
# List of markers for printf family function tests.
-xprintf-funcs := p as d f
+xprintf-funcs := p as d f s
# List of data types and formats for individual per-conversion printf tests.
fmt-convs := double ldouble
diff --git a/stdio-common/tst-printf-format-s-c.c b/stdio-common/tst-printf-format-s-c.c
new file mode 100644
index 0000000000000000..87dad077b9c05216
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-c.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for the 'c' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-c.c"
diff --git a/stdio-common/tst-printf-format-s-char.c b/stdio-common/tst-printf-format-s-char.c
new file mode 100644
index 0000000000000000..f67ac94a5dd70cf4
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-char.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for signed char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-char.c"
diff --git a/stdio-common/tst-printf-format-s-double.c b/stdio-common/tst-printf-format-s-double.c
new file mode 100644
index 0000000000000000..16186b5dc55eabaf
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-double.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-double.c"
diff --git a/stdio-common/tst-printf-format-s-int.c b/stdio-common/tst-printf-format-s-int.c
new file mode 100644
index 0000000000000000..2ed7b2df471cd96d
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-int.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-int.c"
diff --git a/stdio-common/tst-printf-format-s-ldouble.c b/stdio-common/tst-printf-format-s-ldouble.c
new file mode 100644
index 0000000000000000..0362cc50f78ffdfa
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-ldouble.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for long double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-ldouble.c"
diff --git a/stdio-common/tst-printf-format-s-llong.c b/stdio-common/tst-printf-format-s-llong.c
new file mode 100644
index 0000000000000000..b49f84998a7e7ca2
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-llong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-llong.c"
diff --git a/stdio-common/tst-printf-format-s-long.c b/stdio-common/tst-printf-format-s-long.c
new file mode 100644
index 0000000000000000..49224d7f29d7e65c
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-long.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-long.c"
diff --git a/stdio-common/tst-printf-format-s-s.c b/stdio-common/tst-printf-format-s-s.c
new file mode 100644
index 0000000000000000..3a400f8907895db0
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-s.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for the 's' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-s.c"
diff --git a/stdio-common/tst-printf-format-s-short.c b/stdio-common/tst-printf-format-s-short.c
new file mode 100644
index 0000000000000000..c98a808cff51c52a
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-short.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-short.c"
diff --git a/stdio-common/tst-printf-format-s-uchar.c b/stdio-common/tst-printf-format-s-uchar.c
new file mode 100644
index 0000000000000000..befc36894975a9a1
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-uchar.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for unsigned char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-uchar.c"
diff --git a/stdio-common/tst-printf-format-s-uint.c b/stdio-common/tst-printf-format-s-uint.c
new file mode 100644
index 0000000000000000..f3a4c49632a3be07
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-uint.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for unsigned int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-uint.c"
diff --git a/stdio-common/tst-printf-format-s-ullong.c b/stdio-common/tst-printf-format-s-ullong.c
new file mode 100644
index 0000000000000000..4ce559037921e01f
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-ullong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for unsigned long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-ullong.c"
diff --git a/stdio-common/tst-printf-format-s-ulong.c b/stdio-common/tst-printf-format-s-ulong.c
new file mode 100644
index 0000000000000000..81f9eea893c194d2
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-ulong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for unsigned long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-ulong.c"
diff --git a/stdio-common/tst-printf-format-s-ushort.c b/stdio-common/tst-printf-format-s-ushort.c
new file mode 100644
index 0000000000000000..4b1cca6e6cf6b342
--- /dev/null
+++ b/stdio-common/tst-printf-format-s-ushort.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'sprintf' output for unsigned short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-s.h"
+#include "tst-printf-format-skeleton-ushort.c"
diff --git a/stdio-common/tst-printf-format-s.h b/stdio-common/tst-printf-format-s.h
new file mode 100644
index 0000000000000000..7d1e72cffb7b20fb
--- /dev/null
+++ b/stdio-common/tst-printf-format-s.h
@@ -0,0 +1,60 @@
+/* Test feature wrapper for formatted 'sprintf' output.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <support/next_to_fault.h>
+
+#define SPRINTF_BUFFER_SIZE 65536
+
+static struct support_next_to_fault ntf;
+
+#define PREPARE printf_under_test_init
+static void
+printf_under_test_init (int argc, char **argv)
+{
+ ntf = support_next_to_fault_allocate (SPRINTF_BUFFER_SIZE);
+}
+
+static void __attribute__ ((destructor))
+printf_under_test_fini (void)
+{
+ support_next_to_fault_free (&ntf);
+}
+
+#define printf_under_test(...) \
+({ \
+ __label__ out; \
+ char *str = ntf.buffer; \
+ int result; \
+ \
+ result = sprintf (str, __VA_ARGS__); \
+ if (result < 0) \
+ { \
+ perror ("sprintf"); \
+ goto out; \
+ } \
+ if (fwrite (str, sizeof (*str), result, stdout) != result) \
+ { \
+ perror ("fwrite"); \
+ result = -1; \
+ } \
+out: \
+ result; \
+})

454
glibc-RHEL-119390-6.patch Normal file
View File

@ -0,0 +1,454 @@
commit 0b6379cb98590c28088f017ddcc0edb8ad7d0131
Author: Maciej W. Rozycki <macro@redhat.com>
Date: Thu Nov 7 06:14:24 2024 +0000
stdio-common: Add tests for formatted snprintf output specifiers
Wire snprintf into test infrastructure for formatted printf output
specifiers.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index f4e4a8554319f16e..80833d42c24ea899 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -24,7 +24,7 @@ subdir := stdio-common
include ../Makeconfig
# List of markers for printf family function tests.
-xprintf-funcs := p as d f s
+xprintf-funcs := p as d f s sn
# List of data types and formats for individual per-conversion printf tests.
fmt-convs := double ldouble
diff --git a/stdio-common/tst-printf-format-sn-c.c b/stdio-common/tst-printf-format-sn-c.c
new file mode 100644
index 0000000000000000..59f51d635bc46d28
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-c.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for the 'c' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-c.c"
diff --git a/stdio-common/tst-printf-format-sn-char.c b/stdio-common/tst-printf-format-sn-char.c
new file mode 100644
index 0000000000000000..8b682dd2d52f04d2
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-char.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for signed char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-char.c"
diff --git a/stdio-common/tst-printf-format-sn-double.c b/stdio-common/tst-printf-format-sn-double.c
new file mode 100644
index 0000000000000000..4719a58d84bda809
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-double.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-double.c"
diff --git a/stdio-common/tst-printf-format-sn-int.c b/stdio-common/tst-printf-format-sn-int.c
new file mode 100644
index 0000000000000000..94c42f246fc8ce49
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-int.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-int.c"
diff --git a/stdio-common/tst-printf-format-sn-ldouble.c b/stdio-common/tst-printf-format-sn-ldouble.c
new file mode 100644
index 0000000000000000..921f3ffe3b79a05d
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-ldouble.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for long double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-ldouble.c"
diff --git a/stdio-common/tst-printf-format-sn-llong.c b/stdio-common/tst-printf-format-sn-llong.c
new file mode 100644
index 0000000000000000..013552791e087d0c
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-llong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-llong.c"
diff --git a/stdio-common/tst-printf-format-sn-long.c b/stdio-common/tst-printf-format-sn-long.c
new file mode 100644
index 0000000000000000..58c8912746c1108b
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-long.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-long.c"
diff --git a/stdio-common/tst-printf-format-sn-s.c b/stdio-common/tst-printf-format-sn-s.c
new file mode 100644
index 0000000000000000..aa3f170c14790926
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-s.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for the 's' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-s.c"
diff --git a/stdio-common/tst-printf-format-sn-short.c b/stdio-common/tst-printf-format-sn-short.c
new file mode 100644
index 0000000000000000..f7baa1211d16e203
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-short.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-short.c"
diff --git a/stdio-common/tst-printf-format-sn-uchar.c b/stdio-common/tst-printf-format-sn-uchar.c
new file mode 100644
index 0000000000000000..6ae5f121bd76ab9d
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-uchar.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for unsigned char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-uchar.c"
diff --git a/stdio-common/tst-printf-format-sn-uint.c b/stdio-common/tst-printf-format-sn-uint.c
new file mode 100644
index 0000000000000000..f0a0c3063f89781a
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-uint.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for unsigned int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-uint.c"
diff --git a/stdio-common/tst-printf-format-sn-ullong.c b/stdio-common/tst-printf-format-sn-ullong.c
new file mode 100644
index 0000000000000000..0dc0a50c4f2362a2
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-ullong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for unsigned long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-ullong.c"
diff --git a/stdio-common/tst-printf-format-sn-ulong.c b/stdio-common/tst-printf-format-sn-ulong.c
new file mode 100644
index 0000000000000000..23ff5a27d7c25c34
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-ulong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for unsigned long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-ulong.c"
diff --git a/stdio-common/tst-printf-format-sn-ushort.c b/stdio-common/tst-printf-format-sn-ushort.c
new file mode 100644
index 0000000000000000..1c5cffbeb414048e
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn-ushort.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'snprintf' output for unsigned short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-sn.h"
+#include "tst-printf-format-skeleton-ushort.c"
diff --git a/stdio-common/tst-printf-format-sn.h b/stdio-common/tst-printf-format-sn.h
new file mode 100644
index 0000000000000000..ec2645bf57da8bbb
--- /dev/null
+++ b/stdio-common/tst-printf-format-sn.h
@@ -0,0 +1,60 @@
+/* Test feature wrapper for formatted 'snprintf' output.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <support/next_to_fault.h>
+
+#define SPRINTF_BUFFER_SIZE 65536
+
+static struct support_next_to_fault ntf;
+
+#define PREPARE printf_under_test_init
+static void
+printf_under_test_init (int argc, char **argv)
+{
+ ntf = support_next_to_fault_allocate (SPRINTF_BUFFER_SIZE);
+}
+
+static void __attribute__ ((destructor))
+printf_under_test_fini (void)
+{
+ support_next_to_fault_free (&ntf);
+}
+
+#define printf_under_test(...) \
+({ \
+ __label__ out; \
+ char *str = ntf.buffer; \
+ int result; \
+ \
+ result = snprintf (str, ntf.length, __VA_ARGS__); \
+ if (result < 0) \
+ { \
+ perror ("snprintf"); \
+ goto out; \
+ } \
+ if (fwrite (str, sizeof (*str), result, stdout) != result) \
+ { \
+ perror ("fwrite"); \
+ result = -1; \
+ } \
+out: \
+ result; \
+})

428
glibc-RHEL-119390-7.patch Normal file
View File

@ -0,0 +1,428 @@
commit bad554d9b4f10988eb7fdb814fbaa5e89416d781
Author: Maciej W. Rozycki <macro@redhat.com>
Date: Thu Nov 7 06:14:24 2024 +0000
stdio-common: Add tests for formatted vprintf output specifiers
Wire vprintf into test infrastructure for formatted printf output
specifiers.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 80833d42c24ea899..bdf99f9ce043566d 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -24,7 +24,7 @@ subdir := stdio-common
include ../Makeconfig
# List of markers for printf family function tests.
-xprintf-funcs := p as d f s sn
+xprintf-funcs := p as d f s sn v
# List of data types and formats for individual per-conversion printf tests.
fmt-convs := double ldouble
diff --git a/stdio-common/tst-printf-format-v-c.c b/stdio-common/tst-printf-format-v-c.c
new file mode 100644
index 0000000000000000..94aa3042aaee6d97
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-c.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for the 'c' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-c.c"
diff --git a/stdio-common/tst-printf-format-v-char.c b/stdio-common/tst-printf-format-v-char.c
new file mode 100644
index 0000000000000000..c813d81e53956295
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-char.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for signed char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-char.c"
diff --git a/stdio-common/tst-printf-format-v-double.c b/stdio-common/tst-printf-format-v-double.c
new file mode 100644
index 0000000000000000..90cc1704eb3da2f3
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-double.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-double.c"
diff --git a/stdio-common/tst-printf-format-v-int.c b/stdio-common/tst-printf-format-v-int.c
new file mode 100644
index 0000000000000000..6529425b263975c8
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-int.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-int.c"
diff --git a/stdio-common/tst-printf-format-v-ldouble.c b/stdio-common/tst-printf-format-v-ldouble.c
new file mode 100644
index 0000000000000000..813f4a510dc833cc
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-ldouble.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for long double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-ldouble.c"
diff --git a/stdio-common/tst-printf-format-v-llong.c b/stdio-common/tst-printf-format-v-llong.c
new file mode 100644
index 0000000000000000..270ad08bf5e948fd
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-llong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-llong.c"
diff --git a/stdio-common/tst-printf-format-v-long.c b/stdio-common/tst-printf-format-v-long.c
new file mode 100644
index 0000000000000000..2f5f653fecb25040
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-long.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-long.c"
diff --git a/stdio-common/tst-printf-format-v-s.c b/stdio-common/tst-printf-format-v-s.c
new file mode 100644
index 0000000000000000..ebc253b3e83291f2
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-s.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for the 's' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-s.c"
diff --git a/stdio-common/tst-printf-format-v-short.c b/stdio-common/tst-printf-format-v-short.c
new file mode 100644
index 0000000000000000..92a59d9fd3972f1e
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-short.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-short.c"
diff --git a/stdio-common/tst-printf-format-v-uchar.c b/stdio-common/tst-printf-format-v-uchar.c
new file mode 100644
index 0000000000000000..045ffd2864f8158f
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-uchar.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for unsigned char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-uchar.c"
diff --git a/stdio-common/tst-printf-format-v-uint.c b/stdio-common/tst-printf-format-v-uint.c
new file mode 100644
index 0000000000000000..17b1ce3aa7780209
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-uint.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for unsigned int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-uint.c"
diff --git a/stdio-common/tst-printf-format-v-ullong.c b/stdio-common/tst-printf-format-v-ullong.c
new file mode 100644
index 0000000000000000..590b04f339e01cf6
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-ullong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for unsigned long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-ullong.c"
diff --git a/stdio-common/tst-printf-format-v-ulong.c b/stdio-common/tst-printf-format-v-ulong.c
new file mode 100644
index 0000000000000000..6747677a42b38e37
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-ulong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for unsigned long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-ulong.c"
diff --git a/stdio-common/tst-printf-format-v-ushort.c b/stdio-common/tst-printf-format-v-ushort.c
new file mode 100644
index 0000000000000000..1e782715627f7b77
--- /dev/null
+++ b/stdio-common/tst-printf-format-v-ushort.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vprintf' output for unsigned short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-v.h"
+#include "tst-printf-format-skeleton-ushort.c"
diff --git a/stdio-common/tst-printf-format-v.h b/stdio-common/tst-printf-format-v.h
new file mode 100644
index 0000000000000000..711b290b59df8781
--- /dev/null
+++ b/stdio-common/tst-printf-format-v.h
@@ -0,0 +1,34 @@
+/* Test feature wrapper for formatted 'vprintf' output.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdarg.h>
+#include <stdio.h>
+
+static int
+printf_under_test (const char *restrict fmt, ...)
+{
+ va_list ap;
+ int result;
+
+ va_start (ap, fmt);
+ result = vprintf (fmt, ap);
+ va_end (ap);
+ if (result < 0)
+ perror ("vprintf");
+ return result;
+}

454
glibc-RHEL-119390-8.patch Normal file
View File

@ -0,0 +1,454 @@
commit 349670f8093d920d4d683472c88029f6901f7ae7
Author: Maciej W. Rozycki <macro@redhat.com>
Date: Thu Nov 7 06:14:24 2024 +0000
stdio-common: Add tests for formatted vasprintf output specifiers
Wire vasprintf into test infrastructure for formatted printf output
specifiers.
Owing to mtrace logging these tests take amounts of time to complete
similar to those of corresponding asprintf tests, so set timeouts for
the tests accordingly, with a global default for all the vasprintf
tests, and then individual higher settings for double and long double
tests each.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index bdf99f9ce043566d..40327c698da75a0a 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -24,7 +24,7 @@ subdir := stdio-common
include ../Makeconfig
# List of markers for printf family function tests.
-xprintf-funcs := p as d f s sn v
+xprintf-funcs := p as d f s sn v vas
# List of data types and formats for individual per-conversion printf tests.
fmt-convs := double ldouble
diff --git a/stdio-common/tst-printf-format-vas-c.c b/stdio-common/tst-printf-format-vas-c.c
new file mode 100644
index 0000000000000000..f8cf814c8c3bc293
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-c.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vasprintf' output for the 'c' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-c.c"
diff --git a/stdio-common/tst-printf-format-vas-char.c b/stdio-common/tst-printf-format-vas-char.c
new file mode 100644
index 0000000000000000..39c6e73977f3f32f
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-char.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vasprintf' output for signed char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-char.c"
diff --git a/stdio-common/tst-printf-format-vas-double.c b/stdio-common/tst-printf-format-vas-double.c
new file mode 100644
index 0000000000000000..25a21bb0adc2726f
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-double.c
@@ -0,0 +1,22 @@
+/* Test for formatted 'vasprintf' output for double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define TIMEOUT (DEFAULT_TIMEOUT * 32)
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-double.c"
diff --git a/stdio-common/tst-printf-format-vas-int.c b/stdio-common/tst-printf-format-vas-int.c
new file mode 100644
index 0000000000000000..9cd70c8fcbae1c03
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-int.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vasprintf' output for int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-int.c"
diff --git a/stdio-common/tst-printf-format-vas-ldouble.c b/stdio-common/tst-printf-format-vas-ldouble.c
new file mode 100644
index 0000000000000000..60c3933fab1b9216
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-ldouble.c
@@ -0,0 +1,22 @@
+/* Test for formatted 'vasprintf' output for long double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define TIMEOUT (DEFAULT_TIMEOUT * 128)
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-ldouble.c"
diff --git a/stdio-common/tst-printf-format-vas-llong.c b/stdio-common/tst-printf-format-vas-llong.c
new file mode 100644
index 0000000000000000..5d5322b8b9dfe0a4
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-llong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vasprintf' output for long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-llong.c"
diff --git a/stdio-common/tst-printf-format-vas-long.c b/stdio-common/tst-printf-format-vas-long.c
new file mode 100644
index 0000000000000000..d9651053f8f14dcd
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-long.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vasprintf' output for long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-long.c"
diff --git a/stdio-common/tst-printf-format-vas-s.c b/stdio-common/tst-printf-format-vas-s.c
new file mode 100644
index 0000000000000000..6d74ab83e3038b44
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-s.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vasprintf' output for the 's' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-s.c"
diff --git a/stdio-common/tst-printf-format-vas-short.c b/stdio-common/tst-printf-format-vas-short.c
new file mode 100644
index 0000000000000000..a6d76a97055fbb89
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-short.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vasprintf' output for short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-short.c"
diff --git a/stdio-common/tst-printf-format-vas-uchar.c b/stdio-common/tst-printf-format-vas-uchar.c
new file mode 100644
index 0000000000000000..c3dee11b7d8092d9
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-uchar.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vasprintf' output for unsigned char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-uchar.c"
diff --git a/stdio-common/tst-printf-format-vas-uint.c b/stdio-common/tst-printf-format-vas-uint.c
new file mode 100644
index 0000000000000000..e56e89374db69c3f
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-uint.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vasprintf' output for unsigned int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-uint.c"
diff --git a/stdio-common/tst-printf-format-vas-ullong.c b/stdio-common/tst-printf-format-vas-ullong.c
new file mode 100644
index 0000000000000000..05691bc3181c6d18
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-ullong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vasprintf' output for unsigned long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-ullong.c"
diff --git a/stdio-common/tst-printf-format-vas-ulong.c b/stdio-common/tst-printf-format-vas-ulong.c
new file mode 100644
index 0000000000000000..767d9cb6a6ac64a2
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-ulong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vasprintf' output for unsigned long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-ulong.c"
diff --git a/stdio-common/tst-printf-format-vas-ushort.c b/stdio-common/tst-printf-format-vas-ushort.c
new file mode 100644
index 0000000000000000..284d79f75364504f
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas-ushort.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vasprintf' output for unsigned short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vas.h"
+#include "tst-printf-format-skeleton-ushort.c"
diff --git a/stdio-common/tst-printf-format-vas.h b/stdio-common/tst-printf-format-vas.h
new file mode 100644
index 0000000000000000..3e38e729ec97ceed
--- /dev/null
+++ b/stdio-common/tst-printf-format-vas.h
@@ -0,0 +1,50 @@
+/* Test feature wrapper for formatted 'vasprintf' output.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static int
+printf_under_test (const char *restrict fmt, ...)
+{
+ va_list ap;
+ int result;
+ char *str;
+
+ va_start (ap, fmt);
+ result = vasprintf (&str, fmt, ap);
+ va_end (ap);
+ if (result < 0)
+ {
+ perror ("vasprintf");
+ goto out;
+ }
+ if (fwrite (str, sizeof (*str), result, stdout) != result)
+ {
+ perror ("fwrite");
+ result = -1;
+ }
+ free (str);
+out:
+ return result;
+}
+
+#ifndef TIMEOUT
+# define TIMEOUT (DEFAULT_TIMEOUT * 12)
+#endif

456
glibc-RHEL-119390-9.patch Normal file
View File

@ -0,0 +1,456 @@
commit fae4eacae75e4f2767998aca703d6efaae2a747f
Author: Maciej W. Rozycki <macro@redhat.com>
Date: Thu Nov 7 06:14:24 2024 +0000
stdio-common: Add tests for formatted vdprintf output specifiers
Wire vdprintf into test infrastructure for formatted printf output
specifiers.
Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 40327c698da75a0a..01018c53b0f4a13f 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -24,7 +24,7 @@ subdir := stdio-common
include ../Makeconfig
# List of markers for printf family function tests.
-xprintf-funcs := p as d f s sn v vas
+xprintf-funcs := p as d f s sn v vas vd
# List of data types and formats for individual per-conversion printf tests.
fmt-convs := double ldouble
diff --git a/stdio-common/tst-printf-format-vd-c.c b/stdio-common/tst-printf-format-vd-c.c
new file mode 100644
index 0000000000000000..209b1784896a0bfd
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-c.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for the 'c' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-c.c"
diff --git a/stdio-common/tst-printf-format-vd-char.c b/stdio-common/tst-printf-format-vd-char.c
new file mode 100644
index 0000000000000000..8286b6d5b227e83a
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-char.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for signed char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-char.c"
diff --git a/stdio-common/tst-printf-format-vd-double.c b/stdio-common/tst-printf-format-vd-double.c
new file mode 100644
index 0000000000000000..e89a2ca5983697ce
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-double.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-double.c"
diff --git a/stdio-common/tst-printf-format-vd-int.c b/stdio-common/tst-printf-format-vd-int.c
new file mode 100644
index 0000000000000000..598a888b1c937361
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-int.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-int.c"
diff --git a/stdio-common/tst-printf-format-vd-ldouble.c b/stdio-common/tst-printf-format-vd-ldouble.c
new file mode 100644
index 0000000000000000..d3ada6ff0bcdd9c1
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-ldouble.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for long double conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-ldouble.c"
diff --git a/stdio-common/tst-printf-format-vd-llong.c b/stdio-common/tst-printf-format-vd-llong.c
new file mode 100644
index 0000000000000000..ea6ea7b2157dc0c9
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-llong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-llong.c"
diff --git a/stdio-common/tst-printf-format-vd-long.c b/stdio-common/tst-printf-format-vd-long.c
new file mode 100644
index 0000000000000000..4ee1cdacffb4fe77
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-long.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-long.c"
diff --git a/stdio-common/tst-printf-format-vd-s.c b/stdio-common/tst-printf-format-vd-s.c
new file mode 100644
index 0000000000000000..df7cf9a6fbf04c9b
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-s.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for the 's' conversion.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-s.c"
diff --git a/stdio-common/tst-printf-format-vd-short.c b/stdio-common/tst-printf-format-vd-short.c
new file mode 100644
index 0000000000000000..87128c8303b57cd8
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-short.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-short.c"
diff --git a/stdio-common/tst-printf-format-vd-uchar.c b/stdio-common/tst-printf-format-vd-uchar.c
new file mode 100644
index 0000000000000000..90dea719471dcb30
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-uchar.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for unsigned char conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-uchar.c"
diff --git a/stdio-common/tst-printf-format-vd-uint.c b/stdio-common/tst-printf-format-vd-uint.c
new file mode 100644
index 0000000000000000..feb95dc018ee20fe
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-uint.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for unsigned int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-uint.c"
diff --git a/stdio-common/tst-printf-format-vd-ullong.c b/stdio-common/tst-printf-format-vd-ullong.c
new file mode 100644
index 0000000000000000..8f62fb0aeeaca3a3
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-ullong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for unsigned long long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-ullong.c"
diff --git a/stdio-common/tst-printf-format-vd-ulong.c b/stdio-common/tst-printf-format-vd-ulong.c
new file mode 100644
index 0000000000000000..59b2015cd33aaede
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-ulong.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for unsigned long int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-ulong.c"
diff --git a/stdio-common/tst-printf-format-vd-ushort.c b/stdio-common/tst-printf-format-vd-ushort.c
new file mode 100644
index 0000000000000000..5d096502d9f44959
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd-ushort.c
@@ -0,0 +1,20 @@
+/* Test for formatted 'vdprintf' output for unsigned short int conversions.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include "tst-printf-format-vd.h"
+#include "tst-printf-format-skeleton-ushort.c"
diff --git a/stdio-common/tst-printf-format-vd.h b/stdio-common/tst-printf-format-vd.h
new file mode 100644
index 0000000000000000..d721edadc6f7786b
--- /dev/null
+++ b/stdio-common/tst-printf-format-vd.h
@@ -0,0 +1,62 @@
+/* Test feature wrapper for formatted 'vdprintf' output.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <unistd.h>
+
+/* We need to go through the POSIX-mandated dance to switch between
+ handles on an open file description. */
+
+static int
+printf_under_test (const char *restrict fmt, ...)
+{
+ va_list ap;
+ int result;
+
+ result = fflush (stdout);
+ if (result == EOF)
+ {
+ perror ("fflush");
+ goto out;
+ }
+ result = lseek (STDOUT_FILENO, 0, SEEK_END);
+ if (result < 0 && errno == ESPIPE)
+ result = 0;
+ if (result < 0)
+ {
+ perror ("lseek");
+ goto out;
+ }
+ va_start (ap, fmt);
+ result = vdprintf (STDOUT_FILENO, fmt, ap);
+ va_end (ap);
+ if (result < 0)
+ {
+ perror ("vdprintf");
+ goto out;
+ }
+ result = fseek (stdout, 0, SEEK_END);
+ if (result < 0 && errno == ESPIPE)
+ result = 0;
+ if (result < 0)
+ perror ("fseek");
+out:
+ return result;
+}

84
glibc-RHEL-119392-1.patch Normal file
View File

@ -0,0 +1,84 @@
commit f745d78e2628cd5b13ca119ae0c0e21d08ad1906
Author: Joseph Myers <josmyers@redhat.com>
Date: Fri Nov 8 01:53:48 2024 +0000
Avoid uninitialized result in sem_open when file does not exist
A static analyzer apparently reported an uninitialized use of the
variable result in sem_open in the case where the file is required to
exist but does not exist.
The report appears to be correct; set result to SEM_FAILED in that
case, and add a test for it.
Note: the test passes for me even without the sem_open fix, I guess
because result happens to get value SEM_FAILED (i.e. 0) when
uninitialized.
Tested for x86_64.
diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
index 0d9e232acec2ed39..449478a847ad2292 100644
--- a/sysdeps/pthread/Makefile
+++ b/sysdeps/pthread/Makefile
@@ -256,6 +256,7 @@ tests += \
tst-sem14 \
tst-sem15 \
tst-sem16 \
+ tst-sem17 \
tst-setuid3 \
tst-signal1 \
tst-signal2 \
diff --git a/sysdeps/pthread/sem_open.c b/sysdeps/pthread/sem_open.c
index e41236157a5d1b0a..dab734191a8ca208 100644
--- a/sysdeps/pthread/sem_open.c
+++ b/sysdeps/pthread/sem_open.c
@@ -76,6 +76,7 @@ __sem_open (const char *name, int oflag, ...)
goto try_create;
/* Return. errno is already set. */
+ result = SEM_FAILED;
}
else
/* Check whether we already have this semaphore mapped and
diff --git a/sysdeps/pthread/tst-sem17.c b/sysdeps/pthread/tst-sem17.c
new file mode 100644
index 0000000000000000..c3f05d196f4ef17a
--- /dev/null
+++ b/sysdeps/pthread/tst-sem17.c
@@ -0,0 +1,35 @@
+/* Test sem_open with missing file.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <semaphore.h>
+
+#include <support/check.h>
+
+int
+do_test (void)
+{
+ sem_unlink ("/glibc-tst-sem17");
+ errno = 0;
+ sem_t *s = sem_open ("/glibc-tst-sem17", 0);
+ TEST_VERIFY (s == SEM_FAILED);
+ TEST_COMPARE (errno, ENOENT);
+ return 0;
+}
+
+#include <support/test-driver.c>

42
glibc-RHEL-119392-2.patch Normal file
View File

@ -0,0 +1,42 @@
commit c7dcf594f4c52fa7e2cc76918c8aa9abb98e9625
Author: Joseph Myers <josmyers@redhat.com>
Date: Fri Nov 8 17:08:09 2024 +0000
Rename new tst-sem17 test to tst-sem18
As noted by Adhemerval, we already have a tst-sem17 in nptl.
Tested for x86_64.
diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
index 449478a847ad2292..aef323296d7926f6 100644
--- a/sysdeps/pthread/Makefile
+++ b/sysdeps/pthread/Makefile
@@ -256,7 +256,7 @@ tests += \
tst-sem14 \
tst-sem15 \
tst-sem16 \
- tst-sem17 \
+ tst-sem18 \
tst-setuid3 \
tst-signal1 \
tst-signal2 \
diff --git a/sysdeps/pthread/tst-sem17.c b/sysdeps/pthread/tst-sem18.c
similarity index 92%
rename from sysdeps/pthread/tst-sem17.c
rename to sysdeps/pthread/tst-sem18.c
index c3f05d196f4ef17a..1be207bcbeeb56f1 100644
--- a/sysdeps/pthread/tst-sem17.c
+++ b/sysdeps/pthread/tst-sem18.c
@@ -24,9 +24,9 @@
int
do_test (void)
{
- sem_unlink ("/glibc-tst-sem17");
+ sem_unlink ("/glibc-tst-sem18");
errno = 0;
- sem_t *s = sem_open ("/glibc-tst-sem17", 0);
+ sem_t *s = sem_open ("/glibc-tst-sem18", 0);
TEST_VERIFY (s == SEM_FAILED);
TEST_COMPARE (errno, ENOENT);
return 0;

58
glibc-RHEL-119398.patch Normal file
View File

@ -0,0 +1,58 @@
commit 6c915c73d08028987232f6dc718f218c61113240
Author: Aurelien Jarno <aurelien@aurel32.net>
Date: Sun Nov 10 10:50:34 2024 +0100
elf: handle addition overflow in _dl_find_object_update_1 [BZ #32245]
The remaining_to_add variable can be 0 if (current_used + count) wraps,
This is caught by GCC 14+ on hppa, which determines from there that
target_seg could be be NULL when remaining_to_add is zero, which in
turns causes a -Wstringop-overflow warning:
In file included from ../include/atomic.h:49,
from dl-find_object.c:20:
In function '_dlfo_update_init_seg',
inlined from '_dl_find_object_update_1' at dl-find_object.c:689:30,
inlined from '_dl_find_object_update' at dl-find_object.c:805:13:
../sysdeps/unix/sysv/linux/hppa/atomic-machine.h:44:4: error: '__atomic_store_4' writing 4 bytes into a region of size 0 overflows the destination [-Werror=stringop-overflow=]
44 | __atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
dl-find_object.c:644:3: note: in expansion of macro 'atomic_store_relaxed'
644 | atomic_store_relaxed (&seg->size, new_seg_size);
| ^~~~~~~~~~~~~~~~~~~~
In function '_dl_find_object_update':
cc1: note: destination object is likely at address zero
In practice, this is not possible as it represent counts of link maps.
Link maps have sizes larger than 1 byte, so the sum of any two link map
counts will always fit within a size_t without wrapping around.
This patch therefore adds a check on remaining_to_add == 0 and tell GCC
that this can not happen using __builtin_unreachable.
Thanks to Andreas Schwab for the investigation.
Closes: BZ #32245
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Tested-by: John David Anglin <dave.anglin@bell.net>
Reviewed-by: Florian Weimer <fweimer@redhat.com>
diff --git a/elf/dl-find_object.c b/elf/dl-find_object.c
index 449302eda3..ae18b438d3 100644
--- a/elf/dl-find_object.c
+++ b/elf/dl-find_object.c
@@ -682,6 +682,14 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count)
= _dlfo_loaded_mappings[!active_idx];
size_t remaining_to_add = current_used + count;
+ /* remaining_to_add can be 0 if (current_used + count) wraps, but in practice
+ this is not possible as it represent counts of link maps. Link maps have
+ sizes larger than 1 byte, so the sum of any two link map counts will
+ always fit within a size_t without wrapping around. This check ensures
+ that target_seg is not erroneously considered potentially NULL by GCC. */
+ if (remaining_to_add == 0)
+ __builtin_unreachable ();
+
/* Ensure that the new segment chain has enough space. */
{
size_t new_allocated

84
glibc-RHEL-119400-1.patch Normal file
View File

@ -0,0 +1,84 @@
commit 03b8d764109be48a53b18abd4b5050e8cdc2c6da
Author: Siddhesh Poyarekar <siddhesh@sourceware.org>
Date: Thu Nov 21 17:13:33 2024 -0500
nptl: Add smoke test for pthread_getcpuclockid failure
Exercise the case where an exited thread will cause
pthread_getcpuclockid to fail.
Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
Reviewed-by: Florian Weimer <fweimer@redhat.com>
Conflicts:
nptl/Makefile
(fixup context)
diff --git a/nptl/Makefile b/nptl/Makefile
index 34c80f6f38261669..7139f76827b5ffe6 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -318,6 +318,7 @@ tests = \
tst-pthread-defaultattr-free \
tst-pthread-gdb-attach \
tst-pthread-gdb-attach-static \
+ tst-pthread-getcpuclockid-invalid \
tst-pthread-timedlock-lockloop \
tst-pthread_exit-nothreads \
tst-pthread_exit-nothreads-static \
diff --git a/nptl/tst-pthread-getcpuclockid-invalid.c b/nptl/tst-pthread-getcpuclockid-invalid.c
new file mode 100644
index 0000000000000000..e88a56342767a83e
--- /dev/null
+++ b/nptl/tst-pthread-getcpuclockid-invalid.c
@@ -0,0 +1,50 @@
+/* Smoke test to verify that pthread_getcpuclockid fails with ESRCH when the
+ thread in question has exited.
+ Copyright the GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <pthread.h>
+#include <sched.h>
+#include <time.h>
+
+#include <support/check.h>
+#include <support/test-driver.h>
+#include <support/xthread.h>
+
+void *
+thr (void *in)
+{
+ return in;
+}
+
+int
+do_test (void)
+{
+ clockid_t c;
+ pthread_t t = xpthread_create (NULL, thr, NULL);
+
+ int ret = 0;
+ while ((ret = pthread_getcpuclockid (t, &c)) == 0)
+ sched_yield ();
+
+ TEST_COMPARE (ret, ESRCH);
+
+ return 0;
+}
+
+#include <support/test-driver.c>

51
glibc-RHEL-119400-2.patch Normal file
View File

@ -0,0 +1,51 @@
commit 19a198f05802fcc05441c364ed75311ef3f6d663
Author: Siddhesh Poyarekar <siddhesh@sourceware.org>
Date: Thu Nov 28 06:30:40 2024 -0500
pthread_getcpuclockid: Add descriptive comment to smoke test
Add a descriptive comment to the tst-pthread-cpuclockid-invalid test and
also drop pthread_getcpuclockid from the TODO-testing list since it now
has full coverage.
Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
diff --git a/nptl/TODO-testing b/nptl/TODO-testing
index e076e5624f1cfbaa..f50d2ceb51b247c3 100644
--- a/nptl/TODO-testing
+++ b/nptl/TODO-testing
@@ -10,10 +10,6 @@ pthread_attr_[sg]etstack
some more tests needed
-pthread_getcpuclockid
-
- check that value is reset -> rt subdir
-
pthread_getschedparam
pthread_setschedparam
diff --git a/nptl/tst-pthread-getcpuclockid-invalid.c b/nptl/tst-pthread-getcpuclockid-invalid.c
index e88a56342767a83e..7ac46acad8fe0fd7 100644
--- a/nptl/tst-pthread-getcpuclockid-invalid.c
+++ b/nptl/tst-pthread-getcpuclockid-invalid.c
@@ -1,5 +1,4 @@
-/* Smoke test to verify that pthread_getcpuclockid fails with ESRCH when the
- thread in question has exited.
+/* pthread_getcpuclockid should fail with ESRCH when the thread exits.
Copyright the GNU Toolchain Authors.
This file is part of the GNU C Library.
@@ -17,6 +16,12 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
+/* The input thread descriptor to pthread_getcpuclockid needs to be valid when
+ the function is called. For the purposes of this test, this means that the
+ thread should not be detached, have exited, but not joined. This should be
+ good enough to complete coverage for pthread_getcpuclockid alongside
+ tst-clock2. */
+
#include <errno.h>
#include <pthread.h>
#include <sched.h>

1531
glibc-RHEL-119402.patch Normal file

File diff suppressed because it is too large Load Diff

106
glibc-RHEL-119403.patch Normal file
View File

@ -0,0 +1,106 @@
commit 3c2b9dc41cd05da055fae6f793a355063156bdf3
Author: Joseph Myers <josmyers@redhat.com>
Date: Fri Nov 29 20:25:04 2024 +0000
Add threaded test of sem_trywait
All the existing glibc tests of sem_trywait are single-threaded. Add
one that calls sem_trywait and sem_post in separate threads.
Tested for x86_64.
diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
index aef323296d7926f6..93a8534cbf814f27 100644
--- a/sysdeps/pthread/Makefile
+++ b/sysdeps/pthread/Makefile
@@ -257,6 +257,7 @@ tests += \
tst-sem15 \
tst-sem16 \
tst-sem18 \
+ tst-sem19 \
tst-setuid3 \
tst-signal1 \
tst-signal2 \
diff --git a/sysdeps/pthread/tst-sem19.c b/sysdeps/pthread/tst-sem19.c
new file mode 100644
index 0000000000000000..9ef461e008ab9eab
--- /dev/null
+++ b/sysdeps/pthread/tst-sem19.c
@@ -0,0 +1,77 @@
+/* Test sem_trywait with threads.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <support/check.h>
+#include <support/xthread.h>
+
+/* The test uses two threads, the main thread and a newly created
+ thread to test the operation of sem_trywait in a threaded scenario.
+ The intent is to test sem_trywait when it would return EAGAIN, and
+ then again after the critical section in the new thread has posted
+ to the semaphore and the main thread succeeds in calling
+ sem_trywait. It is possible this test fails with a timeout if the
+ second thread takes longer than the test timeout to acquire the
+ lock, and post. */
+
+static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+static sem_t sem;
+
+static void *
+tf (void *arg)
+{
+ xpthread_mutex_lock (&lock);
+ sem_post (&sem);
+ xpthread_mutex_unlock (&lock);
+ return NULL;
+}
+
+static int
+do_test (void)
+{
+ int ret;
+
+ ret = sem_init (&sem, 0, 0);
+ TEST_VERIFY_EXIT (ret == 0);
+ xpthread_mutex_lock (&lock);
+ pthread_t th = xpthread_create (NULL, tf, NULL);
+ errno = 0;
+ /* The other thread is waiting on the lock before it calls sem_post,
+ so sem_trywait should fail. */
+ ret = sem_trywait (&sem);
+ TEST_COMPARE (ret, -1);
+ TEST_COMPARE (errno, EAGAIN);
+ xpthread_mutex_unlock (&lock);
+ /* The other thread now takes the lock, calls sem_post and releases
+ the lock. */
+ for (;;)
+ {
+ errno = 0;
+ ret = sem_trywait (&sem);
+ if (ret == 0)
+ break;
+ TEST_COMPARE (errno, EAGAIN);
+ }
+ xpthread_join (th);
+
+ return 0;
+}
+
+#include <support/test-driver.c>

Some files were not shown because too many files have changed in this diff Show More