This combines the following upstream commits: e45af510bc AArch64: Fix instability in AdvSIMD sinh 6c22823da5 AArch64: Fix instability in AdvSIMD tan aebaeb2c33 AArch64: Update math-vector-fortran.h e20ca759af AArch64: add optimised strspn/strcspn aac077645a AArch64: Fix SVE powf routine [BZ #33299] 1e3d1ddf97 AArch64: Optimize SVE exp functions dee22d2a81 AArch64: Optimise SVE FP64 Hyperbolics 6849c5b791 AArch64: Improve codegen SVE log1p helper 09795c5612 AArch64: Fix builderror with GCC 12.1/12.2 aa18367c11 AArch64: Improve enabling of SVE for libmvec 691edbdf77 aarch64: fix unwinding in longjmp 4352e2cc93 aarch64: Fix _dl_tlsdesc_dynamic unwind for pac-ret (BZ 32612) cf56eb28fa AArch64: Optimize algorithm in users of SVE expf helper ce2f26a22e AArch64: Remove PTR_ARG/SIZE_ARG defines 8f0e7fe61e Aarch64: Improve codegen in SVE asinh c0ff447edf Aarch64: Improve codegen in SVE exp and users, and update expf_inline f5ff34cb3c AArch64: Improve codegen for SVE erfcf 0b195651db AArch64: Improve codegen for SVE pow 95e807209b AArch64: Improve codegen for SVE powf d3f2b71ef1 aarch64: Fix tests not compatible with targets supporting GCS f86b4cf875 AArch64: Improve codegen in SVE expm1f and users 140b985e5a AArch64: Improve codegen in AdvSIMD asinh 91c1fadba3 AArch64: Improve codegen for SVE log1pf users cff9648d0b AArch64: Improve codegen of AdvSIMD expf family 569cfaaf49 AArch64: Improve codegen in AdvSIMD pow ca0c0d0f26 AArch64: Improve codegen in users of ADVSIMD log1p helper 13a7ef5999 AArch64: Improve codegen in users of ADVSIMD expm1 helper 2d82d781a5 AArch64: Remove SVE erf and erfc tables 1cf29fbc5b AArch64: Small optimisation in AdvSIMD erf and erfc 7b8c134b54 AArch64: Improve codegen in SVE expf & related routines a15b1394b5 AArch64: Improve codegen in SVE F32 logs 5bc100bd4b AArch64: Improve codegen in users of AdvSIMD log1pf helper 7900ac490d AArch64: Improve codegen in users of ADVSIMD expm1f helper 0fed0b250f aarch64/fpu: Add vector variants of pow 75207bde68 aarch64/fpu: Add vector variants of cbrt 157f89fa3d aarch64/fpu: Add vector variants of hypot 90a6ca8b28 aarch64: Fix AdvSIMD libmvec routines for big-endian 87cb1dfcd6 aarch64/fpu: Add vector variants of erfc 3d3a4fb8e4 aarch64/fpu: Add vector variants of tanh eedbbca0bf aarch64/fpu: Add vector variants of sinh 8b67920528 aarch64/fpu: Add vector variants of atanh 81406ea3c5 aarch64/fpu: Add vector variants of asinh b09fee1d21 aarch64/fpu: Add vector variants of acosh bdb5705b7b aarch64/fpu: Add vector variants of cosh cb5d84f1f8 aarch64/fpu: Add vector variants of erf Resolves: RHEL-118273
89 lines
3.2 KiB
Diff
89 lines
3.2 KiB
Diff
commit e45af510bc816e860c8e2e1d4a652b4fe15c4b34
|
|
Author: Joe Ramsay <Joe.Ramsay@arm.com>
|
|
Date: Thu Nov 6 18:29:33 2025 +0000
|
|
|
|
AArch64: Fix instability in AdvSIMD sinh
|
|
|
|
Previously presence of special-cases in one lane could affect the
|
|
results in other lanes due to unconditional scalar fallback. The old
|
|
WANT_SIMD_EXCEPT option (which has never been enabled in libmvec) has
|
|
been removed from AOR, making it easier to spot and fix
|
|
this. No measured change in performance. This patch applies cleanly as
|
|
far back as 2.41, however there are conflicts with 2.40 where sinh was
|
|
first introduced.
|
|
|
|
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
|
|
|
|
diff --git a/sysdeps/aarch64/fpu/sinh_advsimd.c b/sysdeps/aarch64/fpu/sinh_advsimd.c
|
|
index 7adf771517de2507..66504cdee84ee77e 100644
|
|
--- a/sysdeps/aarch64/fpu/sinh_advsimd.c
|
|
+++ b/sysdeps/aarch64/fpu/sinh_advsimd.c
|
|
@@ -24,36 +24,26 @@ static const struct data
|
|
{
|
|
struct v_expm1_data d;
|
|
uint64x2_t halff;
|
|
-#if WANT_SIMD_EXCEPT
|
|
- uint64x2_t tiny_bound, thresh;
|
|
-#else
|
|
float64x2_t large_bound;
|
|
-#endif
|
|
} data = {
|
|
.d = V_EXPM1_DATA,
|
|
.halff = V2 (0x3fe0000000000000),
|
|
-#if WANT_SIMD_EXCEPT
|
|
- /* 2^-26, below which sinh(x) rounds to x. */
|
|
- .tiny_bound = V2 (0x3e50000000000000),
|
|
- /* asuint(large_bound) - asuint(tiny_bound). */
|
|
- .thresh = V2 (0x0230000000000000),
|
|
-#else
|
|
/* 2^9. expm1 helper overflows for large input. */
|
|
.large_bound = V2 (0x1p+9),
|
|
-#endif
|
|
};
|
|
|
|
static float64x2_t NOINLINE VPCS_ATTR
|
|
-special_case (float64x2_t x)
|
|
+special_case (float64x2_t x, float64x2_t t, float64x2_t halfsign,
|
|
+ uint64x2_t special)
|
|
{
|
|
- return v_call_f64 (sinh, x, x, v_u64 (-1));
|
|
+ return v_call_f64 (sinh, x, vmulq_f64 (t, halfsign), special);
|
|
}
|
|
|
|
/* Approximation for vector double-precision sinh(x) using expm1.
|
|
sinh(x) = (exp(x) - exp(-x)) / 2.
|
|
The greatest observed error is 2.52 ULP:
|
|
- _ZGVnN2v_sinh(-0x1.a098a2177a2b9p-2) got -0x1.ac2f05bb66fccp-2
|
|
- want -0x1.ac2f05bb66fc9p-2. */
|
|
+ _ZGVnN2v_sinh(0x1.9f6ff2ab6fb19p-2) got 0x1.aaed83a3153ccp-2
|
|
+ want 0x1.aaed83a3153c9p-2. */
|
|
float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
|
|
{
|
|
const struct data *d = ptr_barrier (&data);
|
|
@@ -63,21 +53,16 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
|
|
float64x2_t halfsign = vreinterpretq_f64_u64 (
|
|
vbslq_u64 (v_u64 (0x8000000000000000), ix, d->halff));
|
|
|
|
-#if WANT_SIMD_EXCEPT
|
|
- uint64x2_t special = vcgeq_u64 (
|
|
- vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh);
|
|
-#else
|
|
uint64x2_t special = vcageq_f64 (x, d->large_bound);
|
|
-#endif
|
|
-
|
|
- /* Fall back to scalar variant for all lanes if any of them are special. */
|
|
- if (__glibc_unlikely (v_any_u64 (special)))
|
|
- return special_case (x);
|
|
|
|
/* Up to the point that expm1 overflows, we can use it to calculate sinh
|
|
using a slight rearrangement of the definition of sinh. This allows us to
|
|
retain acceptable accuracy for very small inputs. */
|
|
float64x2_t t = expm1_inline (ax, &d->d);
|
|
t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0))));
|
|
+
|
|
+ if (__glibc_unlikely (v_any_u64 (special)))
|
|
+ return special_case (x, t, halfsign, special);
|
|
+
|
|
return vmulq_f64 (t, halfsign);
|
|
}
|