This combines the following upstream commits: e45af510bc AArch64: Fix instability in AdvSIMD sinh 6c22823da5 AArch64: Fix instability in AdvSIMD tan aebaeb2c33 AArch64: Update math-vector-fortran.h e20ca759af AArch64: add optimised strspn/strcspn aac077645a AArch64: Fix SVE powf routine [BZ #33299] 1e3d1ddf97 AArch64: Optimize SVE exp functions dee22d2a81 AArch64: Optimise SVE FP64 Hyperbolics 6849c5b791 AArch64: Improve codegen SVE log1p helper 09795c5612 AArch64: Fix builderror with GCC 12.1/12.2 aa18367c11 AArch64: Improve enabling of SVE for libmvec 691edbdf77 aarch64: fix unwinding in longjmp 4352e2cc93 aarch64: Fix _dl_tlsdesc_dynamic unwind for pac-ret (BZ 32612) cf56eb28fa AArch64: Optimize algorithm in users of SVE expf helper ce2f26a22e AArch64: Remove PTR_ARG/SIZE_ARG defines 8f0e7fe61e Aarch64: Improve codegen in SVE asinh c0ff447edf Aarch64: Improve codegen in SVE exp and users, and update expf_inline f5ff34cb3c AArch64: Improve codegen for SVE erfcf 0b195651db AArch64: Improve codegen for SVE pow 95e807209b AArch64: Improve codegen for SVE powf d3f2b71ef1 aarch64: Fix tests not compatible with targets supporting GCS f86b4cf875 AArch64: Improve codegen in SVE expm1f and users 140b985e5a AArch64: Improve codegen in AdvSIMD asinh 91c1fadba3 AArch64: Improve codegen for SVE log1pf users cff9648d0b AArch64: Improve codegen of AdvSIMD expf family 569cfaaf49 AArch64: Improve codegen in AdvSIMD pow ca0c0d0f26 AArch64: Improve codegen in users of ADVSIMD log1p helper 13a7ef5999 AArch64: Improve codegen in users of ADVSIMD expm1 helper 2d82d781a5 AArch64: Remove SVE erf and erfc tables 1cf29fbc5b AArch64: Small optimisation in AdvSIMD erf and erfc 7b8c134b54 AArch64: Improve codegen in SVE expf & related routines a15b1394b5 AArch64: Improve codegen in SVE F32 logs 5bc100bd4b AArch64: Improve codegen in users of AdvSIMD log1pf helper 7900ac490d AArch64: Improve codegen in users of ADVSIMD expm1f helper 0fed0b250f aarch64/fpu: Add vector variants of pow 75207bde68 aarch64/fpu: Add vector variants of cbrt 157f89fa3d aarch64/fpu: Add vector variants of hypot 90a6ca8b28 aarch64: Fix AdvSIMD libmvec routines for big-endian 87cb1dfcd6 aarch64/fpu: Add vector variants of erfc 3d3a4fb8e4 aarch64/fpu: Add vector variants of tanh eedbbca0bf aarch64/fpu: Add vector variants of sinh 8b67920528 aarch64/fpu: Add vector variants of atanh 81406ea3c5 aarch64/fpu: Add vector variants of asinh b09fee1d21 aarch64/fpu: Add vector variants of acosh bdb5705b7b aarch64/fpu: Add vector variants of cosh cb5d84f1f8 aarch64/fpu: Add vector variants of erf Resolves: RHEL-118273
50 lines
2.3 KiB
Diff
50 lines
2.3 KiB
Diff
commit aac077645a645bba0d67f3250e82017c539d0f4b
|
|
Author: Pierre Blanchard <pierre.blanchard@arm.com>
|
|
Date: Wed Aug 20 17:41:50 2025 +0000
|
|
|
|
AArch64: Fix SVE powf routine [BZ #33299]
|
|
|
|
Fix a bug in predicate logic introduced in last change.
|
|
A slight performance improvement from relying on all true
|
|
predicates during conversion from single to double.
|
|
This fixes BZ #33299.
|
|
|
|
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
|
|
|
|
diff --git a/sysdeps/aarch64/fpu/powf_sve.c b/sysdeps/aarch64/fpu/powf_sve.c
|
|
index 08d7019a1855ff3c..33bba96054cf4cc8 100644
|
|
--- a/sysdeps/aarch64/fpu/powf_sve.c
|
|
+++ b/sysdeps/aarch64/fpu/powf_sve.c
|
|
@@ -223,15 +223,15 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
|
|
const svbool_t ptrue = svptrue_b64 ();
|
|
|
|
/* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two
|
|
- * in order to perform core computation in double precision. */
|
|
+ in order to perform core computation in double precision. */
|
|
const svbool_t pg_lo = svunpklo (pg);
|
|
const svbool_t pg_hi = svunpkhi (pg);
|
|
- svfloat64_t y_lo
|
|
- = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
|
|
- svfloat64_t y_hi
|
|
- = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
|
|
- svfloat64_t z_lo = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (iz)));
|
|
- svfloat64_t z_hi = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (iz)));
|
|
+ svfloat64_t y_lo = svcvt_f64_x (
|
|
+ ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
|
|
+ svfloat64_t y_hi = svcvt_f64_x (
|
|
+ ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
|
|
+ svfloat64_t z_lo = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpklo (iz)));
|
|
+ svfloat64_t z_hi = svcvt_f64_x (ptrue, svreinterpret_f32 (svunpkhi (iz)));
|
|
svuint64_t i_lo = svunpklo (i);
|
|
svuint64_t i_hi = svunpkhi (i);
|
|
svint64_t k_lo = svunpklo (k);
|
|
@@ -312,7 +312,7 @@ svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
|
|
(23 - V_POWF_EXP2_TABLE_BITS));
|
|
|
|
/* Compute core in extended precision and return intermediate ylogx results
|
|
- * to handle cases of underflow and underflow in exp. */
|
|
+ to handle cases of underflow and overflow in exp. */
|
|
svfloat32_t ylogx;
|
|
svfloat32_t ret
|
|
= sv_powf_core (yint_or_xpos, i, iz, k, y, sign_bias, &ylogx, d);
|