glibc/glibc-RHEL-118273-45.patch
Yuki Inoguchi 9dd92cac18 aarch64: Add GLIBC_2.40 vector functions and performance fixes (RHEL-118273)
This combines the following upstream commits:

e45af510bc AArch64: Fix instability in AdvSIMD sinh
6c22823da5 AArch64: Fix instability in AdvSIMD tan
aebaeb2c33 AArch64: Update math-vector-fortran.h
e20ca759af AArch64: add optimised strspn/strcspn
aac077645a AArch64: Fix SVE powf routine [BZ #33299]
1e3d1ddf97 AArch64: Optimize SVE exp functions
dee22d2a81 AArch64: Optimise SVE FP64 Hyperbolics
6849c5b791 AArch64: Improve codegen SVE log1p helper
09795c5612 AArch64: Fix builderror with GCC 12.1/12.2
aa18367c11 AArch64: Improve enabling of SVE for libmvec
691edbdf77 aarch64: fix unwinding in longjmp
4352e2cc93 aarch64: Fix _dl_tlsdesc_dynamic unwind for pac-ret (BZ 32612)
cf56eb28fa AArch64: Optimize algorithm in users of SVE expf helper
ce2f26a22e AArch64: Remove PTR_ARG/SIZE_ARG defines
8f0e7fe61e Aarch64: Improve codegen in SVE asinh
c0ff447edf Aarch64: Improve codegen in SVE exp and users, and update expf_inline
f5ff34cb3c AArch64: Improve codegen for SVE erfcf
0b195651db AArch64: Improve codegen for SVE pow
95e807209b AArch64: Improve codegen for SVE powf
d3f2b71ef1 aarch64: Fix tests not compatible with targets supporting GCS
f86b4cf875 AArch64: Improve codegen in SVE expm1f and users
140b985e5a AArch64: Improve codegen in AdvSIMD asinh
91c1fadba3 AArch64: Improve codegen for SVE log1pf users
cff9648d0b AArch64: Improve codegen of AdvSIMD expf family
569cfaaf49 AArch64: Improve codegen in AdvSIMD pow
ca0c0d0f26 AArch64: Improve codegen in users of ADVSIMD log1p helper
13a7ef5999 AArch64: Improve codegen in users of ADVSIMD expm1 helper
2d82d781a5 AArch64: Remove SVE erf and erfc tables
1cf29fbc5b AArch64: Small optimisation in AdvSIMD erf and erfc
7b8c134b54 AArch64: Improve codegen in SVE expf & related routines
a15b1394b5 AArch64: Improve codegen in SVE F32 logs
5bc100bd4b AArch64: Improve codegen in users of AdvSIMD log1pf helper
7900ac490d AArch64: Improve codegen in users of ADVSIMD expm1f helper
0fed0b250f aarch64/fpu: Add vector variants of pow
75207bde68 aarch64/fpu: Add vector variants of cbrt
157f89fa3d aarch64/fpu: Add vector variants of hypot
90a6ca8b28 aarch64: Fix AdvSIMD libmvec routines for big-endian
87cb1dfcd6 aarch64/fpu: Add vector variants of erfc
3d3a4fb8e4 aarch64/fpu: Add vector variants of tanh
eedbbca0bf aarch64/fpu: Add vector variants of sinh
8b67920528 aarch64/fpu: Add vector variants of atanh
81406ea3c5 aarch64/fpu: Add vector variants of asinh
b09fee1d21 aarch64/fpu: Add vector variants of acosh
bdb5705b7b aarch64/fpu: Add vector variants of cosh
cb5d84f1f8 aarch64/fpu: Add vector variants of erf

Resolves: RHEL-118273
2025-12-05 16:24:54 +01:00

89 lines
3.2 KiB
Diff

commit e45af510bc816e860c8e2e1d4a652b4fe15c4b34
Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date: Thu Nov 6 18:29:33 2025 +0000
AArch64: Fix instability in AdvSIMD sinh
Previously presence of special-cases in one lane could affect the
results in other lanes due to unconditional scalar fallback. The old
WANT_SIMD_EXCEPT option (which has never been enabled in libmvec) has
been removed from AOR, making it easier to spot and fix
this. No measured change in performance. This patch applies cleanly as
far back as 2.41, however there are conflicts with 2.40 where sinh was
first introduced.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
diff --git a/sysdeps/aarch64/fpu/sinh_advsimd.c b/sysdeps/aarch64/fpu/sinh_advsimd.c
index 7adf771517de2507..66504cdee84ee77e 100644
--- a/sysdeps/aarch64/fpu/sinh_advsimd.c
+++ b/sysdeps/aarch64/fpu/sinh_advsimd.c
@@ -24,36 +24,26 @@ static const struct data
{
struct v_expm1_data d;
uint64x2_t halff;
-#if WANT_SIMD_EXCEPT
- uint64x2_t tiny_bound, thresh;
-#else
float64x2_t large_bound;
-#endif
} data = {
.d = V_EXPM1_DATA,
.halff = V2 (0x3fe0000000000000),
-#if WANT_SIMD_EXCEPT
- /* 2^-26, below which sinh(x) rounds to x. */
- .tiny_bound = V2 (0x3e50000000000000),
- /* asuint(large_bound) - asuint(tiny_bound). */
- .thresh = V2 (0x0230000000000000),
-#else
/* 2^9. expm1 helper overflows for large input. */
.large_bound = V2 (0x1p+9),
-#endif
};
static float64x2_t NOINLINE VPCS_ATTR
-special_case (float64x2_t x)
+special_case (float64x2_t x, float64x2_t t, float64x2_t halfsign,
+ uint64x2_t special)
{
- return v_call_f64 (sinh, x, x, v_u64 (-1));
+ return v_call_f64 (sinh, x, vmulq_f64 (t, halfsign), special);
}
/* Approximation for vector double-precision sinh(x) using expm1.
sinh(x) = (exp(x) - exp(-x)) / 2.
The greatest observed error is 2.52 ULP:
- _ZGVnN2v_sinh(-0x1.a098a2177a2b9p-2) got -0x1.ac2f05bb66fccp-2
- want -0x1.ac2f05bb66fc9p-2. */
+ _ZGVnN2v_sinh(0x1.9f6ff2ab6fb19p-2) got 0x1.aaed83a3153ccp-2
+ want 0x1.aaed83a3153c9p-2. */
float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
{
const struct data *d = ptr_barrier (&data);
@@ -63,21 +53,16 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
float64x2_t halfsign = vreinterpretq_f64_u64 (
vbslq_u64 (v_u64 (0x8000000000000000), ix, d->halff));
-#if WANT_SIMD_EXCEPT
- uint64x2_t special = vcgeq_u64 (
- vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh);
-#else
uint64x2_t special = vcageq_f64 (x, d->large_bound);
-#endif
-
- /* Fall back to scalar variant for all lanes if any of them are special. */
- if (__glibc_unlikely (v_any_u64 (special)))
- return special_case (x);
/* Up to the point that expm1 overflows, we can use it to calculate sinh
using a slight rearrangement of the definition of sinh. This allows us to
retain acceptable accuracy for very small inputs. */
float64x2_t t = expm1_inline (ax, &d->d);
t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0))));
+
+ if (__glibc_unlikely (v_any_u64 (special)))
+ return special_case (x, t, halfsign, special);
+
return vmulq_f64 (t, halfsign);
}