diff --git a/glibc-upstream-2.39-22.patch b/glibc-upstream-2.39-22.patch new file mode 100644 index 0000000..fb7281f --- /dev/null +++ b/glibc-upstream-2.39-22.patch @@ -0,0 +1,32 @@ +commit 31c7d69af59da0da80caa74b2ec6ae149013384d +Author: Florian Weimer +Date: Fri Feb 16 07:40:37 2024 +0100 + + i386: Use generic memrchr in libc (bug 31316) + + Before this change, we incorrectly used the SSE2 variant in the + implementation, without checking that the system actually supports + SSE2. + + Tested-by: Sam James + (cherry picked from commit 0d9166c2245cad4ac520b337dee40c9a583872b6) + +diff --git a/sysdeps/i386/i686/multiarch/memrchr-c.c b/sysdeps/i386/i686/multiarch/memrchr-c.c +index ef7bbbe792afc78e..20bfdf3af35b66e4 100644 +--- a/sysdeps/i386/i686/multiarch/memrchr-c.c ++++ b/sysdeps/i386/i686/multiarch/memrchr-c.c +@@ -5,3 +5,4 @@ extern void *__memrchr_ia32 (const void *, int, size_t); + #endif + + #include "string/memrchr.c" ++strong_alias (__memrchr_ia32, __GI___memrchr) +diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2.S b/sysdeps/i386/i686/multiarch/memrchr-sse2.S +index d9dae04171bfedff..e123f87435b3c6a6 100644 +--- a/sysdeps/i386/i686/multiarch/memrchr-sse2.S ++++ b/sysdeps/i386/i686/multiarch/memrchr-sse2.S +@@ -720,5 +720,4 @@ L(ret_null): + ret + + END (__memrchr_sse2) +-strong_alias (__memrchr_sse2, __GI___memrchr) + #endif diff --git a/glibc-upstream-2.39-23.patch b/glibc-upstream-2.39-23.patch new file mode 100644 index 0000000..11773cf --- /dev/null +++ b/glibc-upstream-2.39-23.patch @@ -0,0 +1,669 @@ +commit b0e0a07018098c2c5927796be5681a298c312626 +Author: Joe Ramsay +Date: Tue Feb 20 16:44:13 2024 +0000 + + aarch64/fpu: Sync libmvec routines from 2.39 and before with AOR + + This includes a fix for big-endian in AdvSIMD log, some cosmetic + changes, and numerous small optimisations mainly around inlining and + using indexed variants of MLA intrinsics. + Reviewed-by: Adhemerval Zanella + + (cherry picked from commit e302e1021391d13a9611ba3a910df128830bd19e) + +diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c +index a8eabb5e7155360f..0a86c9823a279766 100644 +--- a/sysdeps/aarch64/fpu/acos_advsimd.c ++++ b/sysdeps/aarch64/fpu/acos_advsimd.c +@@ -40,8 +40,8 @@ static const struct data + }; + + #define AllMask v_u64 (0xffffffffffffffff) +-#define Oneu (0x3ff0000000000000) +-#define Small (0x3e50000000000000) /* 2^-53. */ ++#define Oneu 0x3ff0000000000000 ++#define Small 0x3e50000000000000 /* 2^-53. */ + + #if WANT_SIMD_EXCEPT + static float64x2_t VPCS_ATTR NOINLINE +diff --git a/sysdeps/aarch64/fpu/asin_advsimd.c b/sysdeps/aarch64/fpu/asin_advsimd.c +index 141646e954aa8ba1..2de6eff40782bc79 100644 +--- a/sysdeps/aarch64/fpu/asin_advsimd.c ++++ b/sysdeps/aarch64/fpu/asin_advsimd.c +@@ -39,8 +39,8 @@ static const struct data + }; + + #define AllMask v_u64 (0xffffffffffffffff) +-#define One (0x3ff0000000000000) +-#define Small (0x3e50000000000000) /* 2^-12. */ ++#define One 0x3ff0000000000000 ++#define Small 0x3e50000000000000 /* 2^-12. */ + + #if WANT_SIMD_EXCEPT + static float64x2_t VPCS_ATTR NOINLINE +diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c +index 09a4c559b8afbe95..04fa71fa37c3de29 100644 +--- a/sysdeps/aarch64/fpu/atan2_sve.c ++++ b/sysdeps/aarch64/fpu/atan2_sve.c +@@ -37,9 +37,6 @@ static const struct data + .pi_over_2 = 0x1.921fb54442d18p+0, + }; + +-/* Useful constants. */ +-#define SignMask sv_u64 (0x8000000000000000) +- + /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ + static svfloat64_t NOINLINE + special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret, +@@ -72,14 +69,15 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) + svbool_t cmp_y = zeroinfnan (iy, pg); + svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y); + +- svuint64_t sign_x = svand_x (pg, ix, SignMask); +- svuint64_t sign_y = svand_x (pg, iy, SignMask); +- svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y); +- + svfloat64_t ax = svabs_x (pg, x); + svfloat64_t ay = svabs_x (pg, y); ++ svuint64_t iax = svreinterpret_u64 (ax); ++ svuint64_t iay = svreinterpret_u64 (ay); ++ ++ svuint64_t sign_x = sveor_x (pg, ix, iax); ++ svuint64_t sign_y = sveor_x (pg, iy, iay); ++ svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y); + +- svbool_t pred_xlt0 = svcmplt (pg, x, 0.0); + svbool_t pred_aygtax = svcmpgt (pg, ay, ax); + + /* Set up z for call to atan. */ +@@ -88,8 +86,9 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) + svfloat64_t z = svdiv_x (pg, n, d); + + /* Work out the correct shift. */ +- svfloat64_t shift = svsel (pred_xlt0, sv_f64 (-2.0), sv_f64 (0.0)); +- shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift); ++ svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1)); ++ shift = svsel (pred_aygtax, sv_f64 (1.0), shift); ++ shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift))); + shift = svmul_x (pg, shift, data_ptr->pi_over_2); + + /* Use split Estrin scheme for P(z^2) with deg(P)=19. */ +@@ -109,10 +108,10 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) + ret = svadd_m (pg, ret, shift); + + /* Account for the sign of x and y. */ +- ret = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)); +- + if (__glibc_unlikely (svptest_any (pg, cmp_xy))) +- return special_case (y, x, ret, cmp_xy); +- +- return ret; ++ return special_case ( ++ y, x, ++ svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)), ++ cmp_xy); ++ return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)); + } +diff --git a/sysdeps/aarch64/fpu/atan2f_sve.c b/sysdeps/aarch64/fpu/atan2f_sve.c +index b92f83cdea4fb2da..9ea197147ccca0ac 100644 +--- a/sysdeps/aarch64/fpu/atan2f_sve.c ++++ b/sysdeps/aarch64/fpu/atan2f_sve.c +@@ -32,10 +32,8 @@ static const struct data + .pi_over_2 = 0x1.921fb6p+0f, + }; + +-#define SignMask sv_u32 (0x80000000) +- + /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ +-static inline svfloat32_t ++static svfloat32_t NOINLINE + special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret, + const svbool_t cmp) + { +@@ -67,14 +65,15 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) + svbool_t cmp_y = zeroinfnan (iy, pg); + svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y); + +- svuint32_t sign_x = svand_x (pg, ix, SignMask); +- svuint32_t sign_y = svand_x (pg, iy, SignMask); +- svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y); +- + svfloat32_t ax = svabs_x (pg, x); + svfloat32_t ay = svabs_x (pg, y); ++ svuint32_t iax = svreinterpret_u32 (ax); ++ svuint32_t iay = svreinterpret_u32 (ay); ++ ++ svuint32_t sign_x = sveor_x (pg, ix, iax); ++ svuint32_t sign_y = sveor_x (pg, iy, iay); ++ svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y); + +- svbool_t pred_xlt0 = svcmplt (pg, x, 0.0); + svbool_t pred_aygtax = svcmpgt (pg, ay, ax); + + /* Set up z for call to atan. */ +@@ -83,11 +82,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) + svfloat32_t z = svdiv_x (pg, n, d); + + /* Work out the correct shift. */ +- svfloat32_t shift = svsel (pred_xlt0, sv_f32 (-2.0), sv_f32 (0.0)); +- shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift); ++ svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1)); ++ shift = svsel (pred_aygtax, sv_f32 (1.0), shift); ++ shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift))); + shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2)); + +- /* Use split Estrin scheme for P(z^2) with deg(P)=7. */ ++ /* Use pure Estrin scheme for P(z^2) with deg(P)=7. */ + svfloat32_t z2 = svmul_x (pg, z, z); + svfloat32_t z4 = svmul_x (pg, z2, z2); + svfloat32_t z8 = svmul_x (pg, z4, z4); +@@ -101,10 +101,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) + ret = svadd_m (pg, ret, shift); + + /* Account for the sign of x and y. */ +- ret = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)); + + if (__glibc_unlikely (svptest_any (pg, cmp_xy))) +- return special_case (y, x, ret, cmp_xy); ++ return special_case ( ++ y, x, ++ svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)), ++ cmp_xy); + +- return ret; ++ return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)); + } +diff --git a/sysdeps/aarch64/fpu/cos_advsimd.c b/sysdeps/aarch64/fpu/cos_advsimd.c +index 2897e8b909bbcb66..3924c9ce44c30d4d 100644 +--- a/sysdeps/aarch64/fpu/cos_advsimd.c ++++ b/sysdeps/aarch64/fpu/cos_advsimd.c +@@ -63,8 +63,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x) + special-case handler later. */ + r = vbslq_f64 (cmp, v_f64 (1.0), r); + #else +- cmp = vcageq_f64 (d->range_val, x); +- cmp = vceqzq_u64 (cmp); /* cmp = ~cmp. */ ++ cmp = vcageq_f64 (x, d->range_val); + r = x; + #endif + +diff --git a/sysdeps/aarch64/fpu/cosf_advsimd.c b/sysdeps/aarch64/fpu/cosf_advsimd.c +index 60abc8dfcfff9ed4..d0c285b03a8bfe22 100644 +--- a/sysdeps/aarch64/fpu/cosf_advsimd.c ++++ b/sysdeps/aarch64/fpu/cosf_advsimd.c +@@ -64,8 +64,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x) + special-case handler later. */ + r = vbslq_f32 (cmp, v_f32 (1.0f), r); + #else +- cmp = vcageq_f32 (d->range_val, x); +- cmp = vceqzq_u32 (cmp); /* cmp = ~cmp. */ ++ cmp = vcageq_f32 (x, d->range_val); + r = x; + #endif + +diff --git a/sysdeps/aarch64/fpu/exp10_advsimd.c b/sysdeps/aarch64/fpu/exp10_advsimd.c +index fe7149b19176da21..eeb31ca839c1f671 100644 +--- a/sysdeps/aarch64/fpu/exp10_advsimd.c ++++ b/sysdeps/aarch64/fpu/exp10_advsimd.c +@@ -57,7 +57,7 @@ const static struct data + # define BigBound v_u64 (0x4070000000000000) /* asuint64 (0x1p8). */ + # define Thres v_u64 (0x2070000000000000) /* BigBound - TinyBound. */ + +-static inline float64x2_t VPCS_ATTR ++static float64x2_t VPCS_ATTR NOINLINE + special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) + { + /* If fenv exceptions are to be triggered correctly, fall back to the scalar +@@ -72,7 +72,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) + # define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769. */ + # define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254. */ + +-static float64x2_t VPCS_ATTR NOINLINE ++static inline float64x2_t VPCS_ATTR + special_case (float64x2_t s, float64x2_t y, float64x2_t n, + const struct data *d) + { +diff --git a/sysdeps/aarch64/fpu/exp10f_advsimd.c b/sysdeps/aarch64/fpu/exp10f_advsimd.c +index 7ee0c909487c32b7..ab117b69da23e5f3 100644 +--- a/sysdeps/aarch64/fpu/exp10f_advsimd.c ++++ b/sysdeps/aarch64/fpu/exp10f_advsimd.c +@@ -25,7 +25,8 @@ + static const struct data + { + float32x4_t poly[5]; +- float32x4_t shift, log10_2, log2_10_hi, log2_10_lo; ++ float32x4_t log10_2_and_inv, shift; ++ + #if !WANT_SIMD_EXCEPT + float32x4_t scale_thresh; + #endif +@@ -38,9 +39,9 @@ static const struct data + .poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f), + V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) }, + .shift = V4 (0x1.8p23f), +- .log10_2 = V4 (0x1.a934fp+1), +- .log2_10_hi = V4 (0x1.344136p-2), +- .log2_10_lo = V4 (-0x1.ec10cp-27), ++ ++ /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0. */ ++ .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 }, + #if !WANT_SIMD_EXCEPT + .scale_thresh = V4 (ScaleBound) + #endif +@@ -98,24 +99,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x) + #if WANT_SIMD_EXCEPT + /* asuint(x) - TinyBound >= BigBound - TinyBound. */ + uint32x4_t cmp = vcgeq_u32 ( +- vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)), +- TinyBound), +- Thres); ++ vsubq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (x)), TinyBound), Thres); + float32x4_t xm = x; + /* If any lanes are special, mask them with 1 and retain a copy of x to allow + special case handler to fix special lanes later. This is only necessary if + fenv exceptions are to be triggered correctly. */ + if (__glibc_unlikely (v_any_u32 (cmp))) +- x = vbslq_f32 (cmp, v_f32 (1), x); ++ x = v_zerofy_f32 (x, cmp); + #endif + + /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)), + with poly(r) in [1/sqrt(2), sqrt(2)] and + x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2]. */ +- float32x4_t z = vfmaq_f32 (d->shift, x, d->log10_2); ++ float32x4_t z = vfmaq_laneq_f32 (d->shift, x, d->log10_2_and_inv, 0); + float32x4_t n = vsubq_f32 (z, d->shift); +- float32x4_t r = vfmsq_f32 (x, n, d->log2_10_hi); +- r = vfmsq_f32 (r, n, d->log2_10_lo); ++ float32x4_t r = vfmsq_laneq_f32 (x, n, d->log10_2_and_inv, 1); ++ r = vfmsq_laneq_f32 (r, n, d->log10_2_and_inv, 2); + uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23); + + float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias)); +diff --git a/sysdeps/aarch64/fpu/exp2_advsimd.c b/sysdeps/aarch64/fpu/exp2_advsimd.c +index 391a93180c93427d..ae1e63d5030633d5 100644 +--- a/sysdeps/aarch64/fpu/exp2_advsimd.c ++++ b/sysdeps/aarch64/fpu/exp2_advsimd.c +@@ -24,6 +24,7 @@ + #define IndexMask (N - 1) + #define BigBound 1022.0 + #define UOFlowBound 1280.0 ++#define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */ + + static const struct data + { +@@ -48,14 +49,13 @@ lookup_sbits (uint64x2_t i) + + #if WANT_SIMD_EXCEPT + +-# define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */ + # define Thres 0x2080000000000000 /* asuint64(512.0) - TinyBound. */ + + /* Call scalar exp2 as a fallback. */ + static float64x2_t VPCS_ATTR NOINLINE +-special_case (float64x2_t x) ++special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special) + { +- return v_call_f64 (exp2, x, x, v_u64 (0xffffffffffffffff)); ++ return v_call_f64 (exp2, x, y, is_special); + } + + #else +@@ -65,7 +65,7 @@ special_case (float64x2_t x) + # define SpecialBias1 0x7000000000000000 /* 0x1p769. */ + # define SpecialBias2 0x3010000000000000 /* 0x1p-254. */ + +-static float64x2_t VPCS_ATTR ++static inline float64x2_t VPCS_ATTR + special_case (float64x2_t s, float64x2_t y, float64x2_t n, + const struct data *d) + { +@@ -94,10 +94,10 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x) + #if WANT_SIMD_EXCEPT + uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x)); + cmp = vcgeq_u64 (vsubq_u64 (ia, v_u64 (TinyBound)), v_u64 (Thres)); +- /* If any special case (inf, nan, small and large x) is detected, +- fall back to scalar for all lanes. */ +- if (__glibc_unlikely (v_any_u64 (cmp))) +- return special_case (x); ++ /* Mask special lanes and retain a copy of x for passing to special-case ++ handler. */ ++ float64x2_t xc = x; ++ x = v_zerofy_f64 (x, cmp); + #else + cmp = vcagtq_f64 (x, d->scale_big_bound); + #endif +@@ -120,9 +120,11 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x) + float64x2_t y = v_pairwise_poly_3_f64 (r, r2, d->poly); + y = vmulq_f64 (r, y); + +-#if !WANT_SIMD_EXCEPT + if (__glibc_unlikely (v_any_u64 (cmp))) ++#if !WANT_SIMD_EXCEPT + return special_case (s, y, n, d); ++#else ++ return special_case (xc, vfmaq_f64 (s, s, y), cmp); + #endif + return vfmaq_f64 (s, s, y); + } +diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c +index 9a5a523a10280d1d..8a686e3e054cb7f5 100644 +--- a/sysdeps/aarch64/fpu/exp2f_sve.c ++++ b/sysdeps/aarch64/fpu/exp2f_sve.c +@@ -20,6 +20,8 @@ + #include "sv_math.h" + #include "poly_sve_f32.h" + ++#define Thres 0x1.5d5e2ap+6f ++ + static const struct data + { + float poly[5]; +@@ -33,7 +35,7 @@ static const struct data + .shift = 0x1.903f8p17f, + /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled + correctly by FEXPA. */ +- .thres = 0x1.5d5e2ap+6f, ++ .thres = Thres, + }; + + static svfloat32_t NOINLINE +diff --git a/sysdeps/aarch64/fpu/exp_advsimd.c b/sysdeps/aarch64/fpu/exp_advsimd.c +index fd215f1d2c5e9eea..5e3a9a0d44a43656 100644 +--- a/sysdeps/aarch64/fpu/exp_advsimd.c ++++ b/sysdeps/aarch64/fpu/exp_advsimd.c +@@ -54,7 +54,7 @@ const static volatile struct + # define BigBound v_u64 (0x4080000000000000) /* asuint64 (0x1p9). */ + # define SpecialBound v_u64 (0x2080000000000000) /* BigBound - TinyBound. */ + +-static inline float64x2_t VPCS_ATTR ++static float64x2_t VPCS_ATTR NOINLINE + special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) + { + /* If fenv exceptions are to be triggered correctly, fall back to the scalar +@@ -69,7 +69,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) + # define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769. */ + # define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254. */ + +-static float64x2_t VPCS_ATTR NOINLINE ++static inline float64x2_t VPCS_ATTR + special_case (float64x2_t s, float64x2_t y, float64x2_t n) + { + /* 2^(n/N) may overflow, break it up into s1*s2. */ +diff --git a/sysdeps/aarch64/fpu/expm1_advsimd.c b/sysdeps/aarch64/fpu/expm1_advsimd.c +index 0b85bd06f3c10068..3628398674468131 100644 +--- a/sysdeps/aarch64/fpu/expm1_advsimd.c ++++ b/sysdeps/aarch64/fpu/expm1_advsimd.c +@@ -23,7 +23,7 @@ + static const struct data + { + float64x2_t poly[11]; +- float64x2_t invln2, ln2_lo, ln2_hi, shift; ++ float64x2_t invln2, ln2, shift; + int64x2_t exponent_bias; + #if WANT_SIMD_EXCEPT + uint64x2_t thresh, tiny_bound; +@@ -38,8 +38,7 @@ static const struct data + V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22), + V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29) }, + .invln2 = V2 (0x1.71547652b82fep0), +- .ln2_hi = V2 (0x1.62e42fefa39efp-1), +- .ln2_lo = V2 (0x1.abc9e3b39803fp-56), ++ .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 }, + .shift = V2 (0x1.8p52), + .exponent_bias = V2 (0x3ff0000000000000), + #if WANT_SIMD_EXCEPT +@@ -83,7 +82,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x) + x = v_zerofy_f64 (x, special); + #else + /* Large input, NaNs and Infs. */ +- uint64x2_t special = vceqzq_u64 (vcaltq_f64 (x, d->oflow_bound)); ++ uint64x2_t special = vcageq_f64 (x, d->oflow_bound); + #endif + + /* Reduce argument to smaller range: +@@ -93,8 +92,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x) + where 2^i is exact because i is an integer. */ + float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift); + int64x2_t i = vcvtq_s64_f64 (n); +- float64x2_t f = vfmsq_f64 (x, n, d->ln2_hi); +- f = vfmsq_f64 (f, n, d->ln2_lo); ++ float64x2_t f = vfmsq_laneq_f64 (x, n, d->ln2, 0); ++ f = vfmsq_laneq_f64 (f, n, d->ln2, 1); + + /* Approximate expm1(f) using polynomial. + Taylor expansion for expm1(x) has the form: +diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c +index 8d4c9a21936d31dd..93db200f618379be 100644 +--- a/sysdeps/aarch64/fpu/expm1f_advsimd.c ++++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c +@@ -23,7 +23,8 @@ + static const struct data + { + float32x4_t poly[5]; +- float32x4_t invln2, ln2_lo, ln2_hi, shift; ++ float32x4_t invln2_and_ln2; ++ float32x4_t shift; + int32x4_t exponent_bias; + #if WANT_SIMD_EXCEPT + uint32x4_t thresh; +@@ -34,9 +35,8 @@ static const struct data + /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2]. */ + .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), + V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, +- .invln2 = V4 (0x1.715476p+0f), +- .ln2_hi = V4 (0x1.62e4p-1f), +- .ln2_lo = V4 (0x1.7f7d1cp-20f), ++ /* Stores constants: invln2, ln2_hi, ln2_lo, 0. */ ++ .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, + .shift = V4 (0x1.8p23f), + .exponent_bias = V4 (0x3f800000), + #if !WANT_SIMD_EXCEPT +@@ -80,7 +80,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x) + x = v_zerofy_f32 (x, special); + #else + /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf. */ +- uint32x4_t special = vceqzq_u32 (vcaltq_f32 (x, d->oflow_bound)); ++ uint32x4_t special = vcagtq_f32 (x, d->oflow_bound); + #endif + + /* Reduce argument to smaller range: +@@ -88,10 +88,11 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x) + and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. + exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 + where 2^i is exact because i is an integer. */ +- float32x4_t j = vsubq_f32 (vfmaq_f32 (d->shift, d->invln2, x), d->shift); ++ float32x4_t j = vsubq_f32 ( ++ vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift); + int32x4_t i = vcvtq_s32_f32 (j); +- float32x4_t f = vfmsq_f32 (x, j, d->ln2_hi); +- f = vfmsq_f32 (f, j, d->ln2_lo); ++ float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1); ++ f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2); + + /* Approximate expm1(f) using polynomial. + Taylor expansion for expm1(x) has the form: +diff --git a/sysdeps/aarch64/fpu/log_advsimd.c b/sysdeps/aarch64/fpu/log_advsimd.c +index 067ae7961300c66b..21df61728ca87374 100644 +--- a/sysdeps/aarch64/fpu/log_advsimd.c ++++ b/sysdeps/aarch64/fpu/log_advsimd.c +@@ -58,8 +58,13 @@ lookup (uint64x2_t i) + uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask; + float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc); + float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc); ++#if __BYTE_ORDER == __LITTLE_ENDIAN + e.invc = vuzp1q_f64 (e0, e1); + e.logc = vuzp2q_f64 (e0, e1); ++#else ++ e.invc = vuzp1q_f64 (e1, e0); ++ e.logc = vuzp2q_f64 (e1, e0); ++#endif + return e; + } + +diff --git a/sysdeps/aarch64/fpu/sin_advsimd.c b/sysdeps/aarch64/fpu/sin_advsimd.c +index efce183e86e319f1..a0d9d3b81965db76 100644 +--- a/sysdeps/aarch64/fpu/sin_advsimd.c ++++ b/sysdeps/aarch64/fpu/sin_advsimd.c +@@ -75,8 +75,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x) + r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x); + #else + r = x; +- cmp = vcageq_f64 (d->range_val, x); +- cmp = vceqzq_u64 (cmp); /* cmp = ~cmp. */ ++ cmp = vcageq_f64 (x, d->range_val); + #endif + + /* n = rint(|x|/pi). */ +diff --git a/sysdeps/aarch64/fpu/sinf_advsimd.c b/sysdeps/aarch64/fpu/sinf_advsimd.c +index 60cf3f2ca102619c..375dfc3331fa6a9c 100644 +--- a/sysdeps/aarch64/fpu/sinf_advsimd.c ++++ b/sysdeps/aarch64/fpu/sinf_advsimd.c +@@ -67,8 +67,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x) + r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x); + #else + r = x; +- cmp = vcageq_f32 (d->range_val, x); +- cmp = vceqzq_u32 (cmp); /* cmp = ~cmp. */ ++ cmp = vcageq_f32 (x, d->range_val); + #endif + + /* n = rint(|x|/pi) */ +diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c +index d7e5ba7b1ab941e8..0459821ab25487a8 100644 +--- a/sysdeps/aarch64/fpu/tan_advsimd.c ++++ b/sysdeps/aarch64/fpu/tan_advsimd.c +@@ -23,7 +23,7 @@ + static const struct data + { + float64x2_t poly[9]; +- float64x2_t half_pi_hi, half_pi_lo, two_over_pi, shift; ++ float64x2_t half_pi, two_over_pi, shift; + #if !WANT_SIMD_EXCEPT + float64x2_t range_val; + #endif +@@ -34,8 +34,7 @@ static const struct data + V2 (0x1.226e5e5ecdfa3p-7), V2 (0x1.d6c7ddbf87047p-9), + V2 (0x1.7ea75d05b583ep-10), V2 (0x1.289f22964a03cp-11), + V2 (0x1.4e4fd14147622p-12) }, +- .half_pi_hi = V2 (0x1.921fb54442d18p0), +- .half_pi_lo = V2 (0x1.1a62633145c07p-54), ++ .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 }, + .two_over_pi = V2 (0x1.45f306dc9c883p-1), + .shift = V2 (0x1.8p52), + #if !WANT_SIMD_EXCEPT +@@ -56,15 +55,15 @@ special_case (float64x2_t x) + + /* Vector approximation for double-precision tan. + Maximum measured error is 3.48 ULP: +- __v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37 +- want -0x1.f6ccd8ecf7deap+37. */ ++ _ZGVnN2v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37 ++ want -0x1.f6ccd8ecf7deap+37. */ + float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) + { + const struct data *dat = ptr_barrier (&data); +- /* Our argument reduction cannot calculate q with sufficient accuracy for very +- large inputs. Fall back to scalar routine for all lanes if any are too +- large, or Inf/NaN. If fenv exceptions are expected, also fall back for tiny +- input to avoid underflow. */ ++ /* Our argument reduction cannot calculate q with sufficient accuracy for ++ very large inputs. Fall back to scalar routine for all lanes if any are ++ too large, or Inf/NaN. If fenv exceptions are expected, also fall back for ++ tiny input to avoid underflow. */ + #if WANT_SIMD_EXCEPT + uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x)); + /* iax - tiny_bound > range_val - tiny_bound. */ +@@ -82,8 +81,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) + /* Use q to reduce x to r in [-pi/4, pi/4], by: + r = x - q * pi/2, in extended precision. */ + float64x2_t r = x; +- r = vfmsq_f64 (r, q, dat->half_pi_hi); +- r = vfmsq_f64 (r, q, dat->half_pi_lo); ++ r = vfmsq_laneq_f64 (r, q, dat->half_pi, 0); ++ r = vfmsq_laneq_f64 (r, q, dat->half_pi, 1); + /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle + formula. */ + r = vmulq_n_f64 (r, 0.5); +@@ -106,14 +105,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) + and reciprocity around pi/2: + tan(x) = 1 / (tan(pi/2 - x)) + to assemble result using change-of-sign and conditional selection of +- numerator/denominator, dependent on odd/even-ness of q (hence quadrant). */ ++ numerator/denominator, dependent on odd/even-ness of q (hence quadrant). ++ */ + float64x2_t n = vfmaq_f64 (v_f64 (-1), p, p); + float64x2_t d = vaddq_f64 (p, p); + + uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1)); + + #if !WANT_SIMD_EXCEPT +- uint64x2_t special = vceqzq_u64 (vcaleq_f64 (x, dat->range_val)); ++ uint64x2_t special = vcageq_f64 (x, dat->range_val); + if (__glibc_unlikely (v_any_u64 (special))) + return special_case (x); + #endif +diff --git a/sysdeps/aarch64/fpu/tanf_advsimd.c b/sysdeps/aarch64/fpu/tanf_advsimd.c +index 1f16103f8a7668f8..5a7489390a9692c6 100644 +--- a/sysdeps/aarch64/fpu/tanf_advsimd.c ++++ b/sysdeps/aarch64/fpu/tanf_advsimd.c +@@ -23,7 +23,8 @@ + static const struct data + { + float32x4_t poly[6]; +- float32x4_t neg_half_pi_1, neg_half_pi_2, neg_half_pi_3, two_over_pi, shift; ++ float32x4_t pi_consts; ++ float32x4_t shift; + #if !WANT_SIMD_EXCEPT + float32x4_t range_val; + #endif +@@ -31,10 +32,9 @@ static const struct data + /* Coefficients generated using FPMinimax. */ + .poly = { V4 (0x1.55555p-2f), V4 (0x1.11166p-3f), V4 (0x1.b88a78p-5f), + V4 (0x1.7b5756p-6f), V4 (0x1.4ef4cep-8f), V4 (0x1.0e1e74p-7f) }, +- .neg_half_pi_1 = V4 (-0x1.921fb6p+0f), +- .neg_half_pi_2 = V4 (0x1.777a5cp-25f), +- .neg_half_pi_3 = V4 (0x1.ee59dap-50f), +- .two_over_pi = V4 (0x1.45f306p-1f), ++ /* Stores constants: (-pi/2)_high, (-pi/2)_mid, (-pi/2)_low, and 2/pi. */ ++ .pi_consts ++ = { -0x1.921fb6p+0f, 0x1.777a5cp-25f, 0x1.ee59dap-50f, 0x1.45f306p-1f }, + .shift = V4 (0x1.8p+23f), + #if !WANT_SIMD_EXCEPT + .range_val = V4 (0x1p15f), +@@ -58,10 +58,11 @@ eval_poly (float32x4_t z, const struct data *d) + { + float32x4_t z2 = vmulq_f32 (z, z); + #if WANT_SIMD_EXCEPT +- /* Tiny z (<= 0x1p-31) will underflow when calculating z^4. If fp exceptions +- are to be triggered correctly, sidestep this by fixing such lanes to 0. */ ++ /* Tiny z (<= 0x1p-31) will underflow when calculating z^4. ++ If fp exceptions are to be triggered correctly, ++ sidestep this by fixing such lanes to 0. */ + uint32x4_t will_uflow +- = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound); ++ = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound); + if (__glibc_unlikely (v_any_u32 (will_uflow))) + z2 = vbslq_f32 (will_uflow, v_f32 (0), z2); + #endif +@@ -94,16 +95,16 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tan) (float32x4_t x) + #endif + + /* n = rint(x/(pi/2)). */ +- float32x4_t q = vfmaq_f32 (d->shift, d->two_over_pi, x); ++ float32x4_t q = vfmaq_laneq_f32 (d->shift, x, d->pi_consts, 3); + float32x4_t n = vsubq_f32 (q, d->shift); + /* Determine if x lives in an interval, where |tan(x)| grows to infinity. */ + uint32x4_t pred_alt = vtstq_u32 (vreinterpretq_u32_f32 (q), v_u32 (1)); + + /* r = x - n * (pi/2) (range reduction into -pi./4 .. pi/4). */ + float32x4_t r; +- r = vfmaq_f32 (x, d->neg_half_pi_1, n); +- r = vfmaq_f32 (r, d->neg_half_pi_2, n); +- r = vfmaq_f32 (r, d->neg_half_pi_3, n); ++ r = vfmaq_laneq_f32 (x, n, d->pi_consts, 0); ++ r = vfmaq_laneq_f32 (r, n, d->pi_consts, 1); ++ r = vfmaq_laneq_f32 (r, n, d->pi_consts, 2); + + /* If x lives in an interval, where |tan(x)| + - is finite, then use a polynomial approximation of the form diff --git a/glibc-upstream-2.39-24.patch b/glibc-upstream-2.39-24.patch new file mode 100644 index 0000000..61d3f48 --- /dev/null +++ b/glibc-upstream-2.39-24.patch @@ -0,0 +1,54 @@ +commit 395a89f61e19fa916ae4cc93fc10d81a28ce3039 +Author: Szabolcs Nagy +Date: Wed Mar 13 14:34:14 2024 +0000 + + aarch64: fix check for SVE support in assembler + + Due to GCC bug 110901 -mcpu can override -march setting when compiling + asm code and thus a compiler targetting a specific cpu can fail the + configure check even when binutils gas supports SVE. + + The workaround is that explicit .arch directive overrides both -mcpu + and -march, and since that's what the actual SVE memcpy uses the + configure check should use that too even if the GCC issue is fixed + independently. + + Reviewed-by: Florian Weimer + (cherry picked from commit 73c26018ed0ecd9c807bb363cc2c2ab4aca66a82) + +diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure +old mode 100644 +new mode 100755 +index ca57edce472e4507..9606137e8ddae545 +--- a/sysdeps/aarch64/configure ++++ b/sysdeps/aarch64/configure +@@ -325,9 +325,10 @@ then : + printf %s "(cached) " >&6 + else $as_nop + cat > conftest.s <<\EOF +- ptrue p0.b ++ .arch armv8.2-a+sve ++ ptrue p0.b + EOF +-if { ac_try='${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&5' ++if { ac_try='${CC-cc} -c conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? +diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac +index 27874eceb44911e4..56d12d661d726892 100644 +--- a/sysdeps/aarch64/configure.ac ++++ b/sysdeps/aarch64/configure.ac +@@ -90,9 +90,10 @@ LIBC_CONFIG_VAR([aarch64-variant-pcs], [$libc_cv_aarch64_variant_pcs]) + # Check if asm support armv8.2-a+sve + AC_CACHE_CHECK([for SVE support in assembler], [libc_cv_aarch64_sve_asm], [dnl + cat > conftest.s <<\EOF +- ptrue p0.b ++ .arch armv8.2-a+sve ++ ptrue p0.b + EOF +-if AC_TRY_COMMAND(${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&AS_MESSAGE_LOG_FD); then ++if AC_TRY_COMMAND(${CC-cc} -c conftest.s 1>&AS_MESSAGE_LOG_FD); then + libc_cv_aarch64_sve_asm=yes + else + libc_cv_aarch64_sve_asm=no diff --git a/glibc-upstream-2.39-25.patch b/glibc-upstream-2.39-25.patch new file mode 100644 index 0000000..f168f15 --- /dev/null +++ b/glibc-upstream-2.39-25.patch @@ -0,0 +1,144 @@ +commit 9d92452c70805a2e2dbbdb2b1ffc34bd86e1c8df +Author: Wilco Dijkstra +Date: Thu Mar 21 16:48:33 2024 +0000 + + AArch64: Check kernel version for SVE ifuncs + + Old Linux kernels disable SVE after every system call. Calling the + SVE-optimized memcpy afterwards will then cause a trap to reenable SVE. + As a result, applications with a high use of syscalls may run slower with + the SVE memcpy. This is true for kernels between 4.15.0 and before 6.2.0, + except for 5.14.0 which was patched. Avoid this by checking the kernel + version and selecting the SVE ifunc on modern kernels. + + Parse the kernel version reported by uname() into a 24-bit kernel.major.minor + value without calling any library functions. If uname() is not supported or + if the version format is not recognized, assume the kernel is modern. + + Tested-by: Florian Weimer + Reviewed-by: Szabolcs Nagy + (cherry picked from commit 2e94e2f5d2bf2de124c8ad7da85463355e54ccb2) + +diff --git a/sysdeps/aarch64/cpu-features.h b/sysdeps/aarch64/cpu-features.h +index 77a782422af1b6e4..5f2da91ebbd0adaf 100644 +--- a/sysdeps/aarch64/cpu-features.h ++++ b/sysdeps/aarch64/cpu-features.h +@@ -71,6 +71,7 @@ struct cpu_features + /* Currently, the GLIBC memory tagging tunable only defines 8 bits. */ + uint8_t mte_state; + bool sve; ++ bool prefer_sve_ifuncs; + bool mops; + }; + +diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h +index c52860efb22d70eb..61dc40088f4d9e5e 100644 +--- a/sysdeps/aarch64/multiarch/init-arch.h ++++ b/sysdeps/aarch64/multiarch/init-arch.h +@@ -36,5 +36,7 @@ + MTE_ENABLED (); \ + bool __attribute__((unused)) sve = \ + GLRO(dl_aarch64_cpu_features).sve; \ ++ bool __attribute__((unused)) prefer_sve_ifuncs = \ ++ GLRO(dl_aarch64_cpu_features).prefer_sve_ifuncs; \ + bool __attribute__((unused)) mops = \ + GLRO(dl_aarch64_cpu_features).mops; +diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c +index d12eccfca51f4bcf..ce53567dab33c2f0 100644 +--- a/sysdeps/aarch64/multiarch/memcpy.c ++++ b/sysdeps/aarch64/multiarch/memcpy.c +@@ -47,7 +47,7 @@ select_memcpy_ifunc (void) + { + if (IS_A64FX (midr)) + return __memcpy_a64fx; +- return __memcpy_sve; ++ return prefer_sve_ifuncs ? __memcpy_sve : __memcpy_generic; + } + + if (IS_THUNDERX (midr)) +diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c +index 2081eeb4d40e0240..fe95037be391896c 100644 +--- a/sysdeps/aarch64/multiarch/memmove.c ++++ b/sysdeps/aarch64/multiarch/memmove.c +@@ -47,7 +47,7 @@ select_memmove_ifunc (void) + { + if (IS_A64FX (midr)) + return __memmove_a64fx; +- return __memmove_sve; ++ return prefer_sve_ifuncs ? __memmove_sve : __memmove_generic; + } + + if (IS_THUNDERX (midr)) +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +index b1a3f673f067280b..c0b047bc0dbeae42 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + #include + + #define DCZID_DZP_MASK (1 << 4) +@@ -62,6 +63,46 @@ get_midr_from_mcpu (const struct tunable_str_t *mcpu) + return UINT64_MAX; + } + ++#if __LINUX_KERNEL_VERSION < 0x060200 ++ ++/* Return true if we prefer using SVE in string ifuncs. Old kernels disable ++ SVE after every system call which results in unnecessary traps if memcpy ++ uses SVE. This is true for kernels between 4.15.0 and before 6.2.0, except ++ for 5.14.0 which was patched. For these versions return false to avoid using ++ SVE ifuncs. ++ Parse the kernel version into a 24-bit kernel.major.minor value without ++ calling any library functions. If uname() is not supported or if the version ++ format is not recognized, assume the kernel is modern and return true. */ ++ ++static inline bool ++prefer_sve_ifuncs (void) ++{ ++ struct utsname buf; ++ const char *p = &buf.release[0]; ++ int kernel = 0; ++ int val; ++ ++ if (__uname (&buf) < 0) ++ return true; ++ ++ for (int shift = 16; shift >= 0; shift -= 8) ++ { ++ for (val = 0; *p >= '0' && *p <= '9'; p++) ++ val = val * 10 + *p - '0'; ++ kernel |= (val & 255) << shift; ++ if (*p++ != '.') ++ break; ++ } ++ ++ if (kernel >= 0x060200 || kernel == 0x050e00) ++ return true; ++ if (kernel >= 0x040f00) ++ return false; ++ return true; ++} ++ ++#endif ++ + static inline void + init_cpu_features (struct cpu_features *cpu_features) + { +@@ -126,6 +167,13 @@ init_cpu_features (struct cpu_features *cpu_features) + /* Check if SVE is supported. */ + cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE; + ++ cpu_features->prefer_sve_ifuncs = cpu_features->sve; ++ ++#if __LINUX_KERNEL_VERSION < 0x060200 ++ if (cpu_features->sve) ++ cpu_features->prefer_sve_ifuncs = prefer_sve_ifuncs (); ++#endif ++ + /* Check if MOPS is supported. */ + cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS; + } diff --git a/glibc-upstream-2.39-26.patch b/glibc-upstream-2.39-26.patch new file mode 100644 index 0000000..278d238 --- /dev/null +++ b/glibc-upstream-2.39-26.patch @@ -0,0 +1,108 @@ +commit 9883f4304cfb1558d0f1e6d9f48c4ab0a35355fe +Author: H.J. Lu +Date: Wed Feb 28 09:51:14 2024 -0800 + + x86-64: Don't use SSE resolvers for ISA level 3 or above + + When glibc is built with ISA level 3 or above enabled, SSE resolvers + aren't available and glibc fails to build: + + ld: .../elf/librtld.os: in function `init_cpu_features': + .../elf/../sysdeps/x86/cpu-features.c:1200:(.text+0x1445f): undefined reference to `_dl_runtime_resolve_fxsave' + ld: .../elf/librtld.os: relocation R_X86_64_PC32 against undefined hidden symbol `_dl_runtime_resolve_fxsave' can not be used when making a shared object + /usr/local/bin/ld: final link failed: bad value + + For ISA level 3 or above, don't use _dl_runtime_resolve_fxsave nor + _dl_tlsdesc_dynamic_fxsave. + + This fixes BZ #31429. + Reviewed-by: Noah Goldstein + + (cherry picked from commit befe2d3c4dec8be2cdd01a47132e47bdb7020922) + +diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c +index 6fe1b728c607f39e..b8abe733abe54fc4 100644 +--- a/sysdeps/x86/cpu-features.c ++++ b/sysdeps/x86/cpu-features.c +@@ -18,6 +18,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -1198,7 +1199,9 @@ no_cpuid: + TUNABLE_CALLBACK (set_x86_shstk)); + #endif + ++#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL + if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) ++#endif + { + if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) + { +@@ -1219,22 +1222,24 @@ no_cpuid: + #endif + } + } ++#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL + else + { +-#ifdef __x86_64__ ++# ifdef __x86_64__ + GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave; +-# ifdef SHARED ++# ifdef SHARED + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; +-# endif +-#else +-# ifdef SHARED ++# endif ++# else ++# ifdef SHARED + if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; + else + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave; ++# endif + # endif +-#endif + } ++#endif + + #ifdef SHARED + # ifdef __x86_64__ +diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S +index ea69f5223a77e0c0..057a10862afd6208 100644 +--- a/sysdeps/x86_64/dl-tlsdesc.S ++++ b/sysdeps/x86_64/dl-tlsdesc.S +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include "tlsdesc.h" + #include "dl-trampoline-save.h" + +@@ -79,12 +80,14 @@ _dl_tlsdesc_undefweak: + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak + + #ifdef SHARED +-# define USE_FXSAVE +-# define STATE_SAVE_ALIGNMENT 16 +-# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave +-# include "dl-tlsdesc-dynamic.h" +-# undef _dl_tlsdesc_dynamic +-# undef USE_FXSAVE ++# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL ++# define USE_FXSAVE ++# define STATE_SAVE_ALIGNMENT 16 ++# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave ++# include "dl-tlsdesc-dynamic.h" ++# undef _dl_tlsdesc_dynamic ++# undef USE_FXSAVE ++# endif + + # define USE_XSAVE + # define STATE_SAVE_ALIGNMENT 64 diff --git a/glibc-upstream-2.39-27.patch b/glibc-upstream-2.39-27.patch new file mode 100644 index 0000000..8f136ce --- /dev/null +++ b/glibc-upstream-2.39-27.patch @@ -0,0 +1,61 @@ +commit 7b92f46f04c6cbce19d19ae1099628431858996c +Author: Sunil K Pandey +Date: Thu Feb 29 17:57:02 2024 -0800 + + x86-64: Simplify minimum ISA check ifdef conditional with if + + Replace minimum ISA check ifdef conditional with if. Since + MINIMUM_X86_ISA_LEVEL and AVX_X86_ISA_LEVEL are compile time constants, + compiler will perform constant folding optimization, getting same + results. + + Reviewed-by: H.J. Lu + (cherry picked from commit b6e3898194bbae78910bbe9cd086937014961e45) + +diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c +index b8abe733abe54fc4..3d7c2819d7cc6643 100644 +--- a/sysdeps/x86/cpu-features.c ++++ b/sysdeps/x86/cpu-features.c +@@ -1199,9 +1199,8 @@ no_cpuid: + TUNABLE_CALLBACK (set_x86_shstk)); + #endif + +-#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL +- if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) +-#endif ++ if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL ++ || (GLRO(dl_x86_cpu_features).xsave_state_size != 0)) + { + if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) + { +@@ -1222,24 +1221,22 @@ no_cpuid: + #endif + } + } +-#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL + else + { +-# ifdef __x86_64__ ++#ifdef __x86_64__ + GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave; +-# ifdef SHARED ++# ifdef SHARED + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; +-# endif +-# else +-# ifdef SHARED ++# endif ++#else ++# ifdef SHARED + if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; + else + GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave; +-# endif + # endif +- } + #endif ++ } + + #ifdef SHARED + # ifdef __x86_64__ diff --git a/glibc-upstream-2.39-28.patch b/glibc-upstream-2.39-28.patch new file mode 100644 index 0000000..b5546e0 --- /dev/null +++ b/glibc-upstream-2.39-28.patch @@ -0,0 +1,50 @@ +commit edb9a76e3008725e9dc035d38a58e849a3bde0f1 +Author: Florian Weimer +Date: Sun Apr 14 08:24:51 2024 +0200 + + powerpc: Fix ld.so address determination for PCREL mode (bug 31640) + + This seems to have stopped working with some GCC 14 versions, + which clobber r2. With other compilers, the kernel-provided + r2 value is still available at this point. + + Reviewed-by: Peter Bergner + (cherry picked from commit 14e56bd4ce15ac2d1cc43f762eb2e6b83fec1afe) + +diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h +index c6682f3445615636..2b6f5d2b08cb10b8 100644 +--- a/sysdeps/powerpc/powerpc64/dl-machine.h ++++ b/sysdeps/powerpc/powerpc64/dl-machine.h +@@ -78,6 +78,7 @@ elf_host_tolerates_class (const Elf64_Ehdr *ehdr) + static inline Elf64_Addr + elf_machine_load_address (void) __attribute__ ((const)); + ++#ifndef __PCREL__ + static inline Elf64_Addr + elf_machine_load_address (void) + { +@@ -105,6 +106,24 @@ elf_machine_dynamic (void) + /* Then subtract off the load address offset. */ + return runtime_dynamic - elf_machine_load_address() ; + } ++#else /* __PCREL__ */ ++/* In PCREL mode, r2 may have been clobbered. Rely on relative ++ relocations instead. */ ++ ++static inline ElfW(Addr) ++elf_machine_load_address (void) ++{ ++ extern const ElfW(Ehdr) __ehdr_start attribute_hidden; ++ return (ElfW(Addr)) &__ehdr_start; ++} ++ ++static inline ElfW(Addr) ++elf_machine_dynamic (void) ++{ ++ extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; ++ return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address (); ++} ++#endif /* __PCREL__ */ + + /* The PLT uses Elf64_Rela relocs. */ + #define elf_machine_relplt elf_machine_rela diff --git a/glibc-upstream-2.39-29.patch b/glibc-upstream-2.39-29.patch new file mode 100644 index 0000000..41a85b1 --- /dev/null +++ b/glibc-upstream-2.39-29.patch @@ -0,0 +1,246 @@ +commit 04df8652eb1919da18d54b3dcd6db1675993d45d +Author: H.J. Lu +Date: Thu Feb 15 11:19:56 2024 -0800 + + Apply the Makefile sorting fix + + Apply the Makefile sorting fix generated by sort-makefile-lines.py. + + (cherry picked from commit ef7f4b1fef67430a8f3cfc77fa6aada2add851d7) + +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index fe863e1ba411cc4b..01762ef526854d54 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -1,52 +1,52 @@ + ifeq ($(subdir),string) + sysdep_routines += \ +- strlen-aligned \ +- strlen-lsx \ +- strlen-lasx \ +- strnlen-aligned \ +- strnlen-lsx \ +- strnlen-lasx \ ++ memchr-aligned \ ++ memchr-lasx \ ++ memchr-lsx \ ++ memcmp-aligned \ ++ memcmp-lasx \ ++ memcmp-lsx \ ++ memcpy-aligned \ ++ memcpy-unaligned \ ++ memmove-lasx \ ++ memmove-lsx \ ++ memmove-unaligned \ ++ memrchr-generic \ ++ memrchr-lasx \ ++ memrchr-lsx \ ++ memset-aligned \ ++ memset-lasx \ ++ memset-lsx \ ++ memset-unaligned \ ++ rawmemchr-aligned \ ++ rawmemchr-lasx \ ++ rawmemchr-lsx \ ++ stpcpy-aligned \ ++ stpcpy-lasx \ ++ stpcpy-lsx \ ++ stpcpy-unaligned \ + strchr-aligned \ +- strchr-lsx \ + strchr-lasx \ +- strrchr-aligned \ +- strrchr-lsx \ +- strrchr-lasx \ ++ strchr-lsx \ + strchrnul-aligned \ +- strchrnul-lsx \ + strchrnul-lasx \ ++ strchrnul-lsx \ + strcmp-aligned \ + strcmp-lsx \ +- strncmp-aligned \ +- strncmp-lsx \ + strcpy-aligned \ +- strcpy-unaligned \ +- strcpy-lsx \ + strcpy-lasx \ +- stpcpy-aligned \ +- stpcpy-unaligned \ +- stpcpy-lsx \ +- stpcpy-lasx \ +- memcpy-aligned \ +- memcpy-unaligned \ +- memmove-unaligned \ +- memmove-lsx \ +- memmove-lasx \ +- rawmemchr-aligned \ +- rawmemchr-lsx \ +- rawmemchr-lasx \ +- memchr-aligned \ +- memchr-lsx \ +- memchr-lasx \ +- memrchr-generic \ +- memrchr-lsx \ +- memrchr-lasx \ +- memset-aligned \ +- memset-unaligned \ +- memset-lsx \ +- memset-lasx \ +- memcmp-aligned \ +- memcmp-lsx \ +- memcmp-lasx \ ++ strcpy-lsx \ ++ strcpy-unaligned \ ++ strlen-aligned \ ++ strlen-lasx \ ++ strlen-lsx \ ++ strncmp-aligned \ ++ strncmp-lsx \ ++ strnlen-aligned \ ++ strnlen-lasx \ ++ strnlen-lsx \ ++ strrchr-aligned \ ++ strrchr-lasx \ ++ strrchr-lsx \ + # sysdep_routines + endif +diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile +index 992aabe43ec60abf..5311b594aff62f7c 100644 +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -15,18 +15,18 @@ CFLAGS-dl-get-cpu-features.os += $(rtld-early-cflags) + CFLAGS-get-cpuid-feature-leaf.o += $(no-stack-protector) + + tests += \ +- tst-get-cpu-features \ +- tst-get-cpu-features-static \ + tst-cpu-features-cpuinfo \ + tst-cpu-features-cpuinfo-static \ + tst-cpu-features-supports \ + tst-cpu-features-supports-static \ ++ tst-get-cpu-features \ ++ tst-get-cpu-features-static \ + tst-hwcap-tunables \ + # tests + tests-static += \ +- tst-get-cpu-features-static \ + tst-cpu-features-cpuinfo-static \ + tst-cpu-features-supports-static \ ++ tst-get-cpu-features-static \ + # tests-static + ifeq (yes,$(have-ifunc)) + ifeq (yes,$(have-gcc-ifunc)) +diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile +index 9d374a329916fc45..0ede447405d549b5 100644 +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -252,6 +252,10 @@ sysdep-dl-routines += dl-cet + + tests += \ + tst-cet-legacy-1 \ ++ tst-cet-legacy-10 \ ++ tst-cet-legacy-10-static \ ++ tst-cet-legacy-10a \ ++ tst-cet-legacy-10a-static \ + tst-cet-legacy-1a \ + tst-cet-legacy-2 \ + tst-cet-legacy-2a \ +@@ -263,15 +267,11 @@ tests += \ + tst-cet-legacy-8 \ + tst-cet-legacy-9 \ + tst-cet-legacy-9-static \ +- tst-cet-legacy-10 \ +- tst-cet-legacy-10-static \ +- tst-cet-legacy-10a \ +- tst-cet-legacy-10a-static \ + # tests + tests-static += \ +- tst-cet-legacy-9-static \ + tst-cet-legacy-10-static \ + tst-cet-legacy-10a-static \ ++ tst-cet-legacy-9-static \ + # tests-static + tst-cet-legacy-1a-ARGS = -- $(host-test-program-cmd) + +diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile +index ea81753b708fcb8d..e1a490dd98b4be07 100644 +--- a/sysdeps/x86_64/fpu/multiarch/Makefile ++++ b/sysdeps/x86_64/fpu/multiarch/Makefile +@@ -4,10 +4,10 @@ libm-sysdep_routines += \ + s_ceilf-c \ + s_floor-c \ + s_floorf-c \ +- s_rint-c \ +- s_rintf-c \ + s_nearbyint-c \ + s_nearbyintf-c \ ++ s_rint-c \ ++ s_rintf-c \ + s_roundeven-c \ + s_roundevenf-c \ + s_trunc-c \ +@@ -21,10 +21,10 @@ libm-sysdep_routines += \ + s_floorf-sse4_1 \ + s_nearbyint-sse4_1 \ + s_nearbyintf-sse4_1 \ +- s_roundeven-sse4_1 \ +- s_roundevenf-sse4_1 \ + s_rint-sse4_1 \ + s_rintf-sse4_1 \ ++ s_roundeven-sse4_1 \ ++ s_roundevenf-sse4_1 \ + s_trunc-sse4_1 \ + s_truncf-sse4_1 \ + # libm-sysdep_routines +@@ -84,12 +84,12 @@ CFLAGS-s_cosf-fma.c = -mfma -mavx2 + CFLAGS-s_sincosf-fma.c = -mfma -mavx2 + + libm-sysdep_routines += \ ++ e_asin-fma4 \ ++ e_atan2-fma4 \ + e_exp-fma4 \ + e_log-fma4 \ + e_pow-fma4 \ +- e_asin-fma4 \ + s_atan-fma4 \ +- e_atan2-fma4 \ + s_sin-fma4 \ + s_sincos-fma4 \ + s_tan-fma4 \ +@@ -106,10 +106,10 @@ CFLAGS-s_tan-fma4.c = -mfma4 + CFLAGS-s_sincos-fma4.c = -mfma4 + + libm-sysdep_routines += \ ++ e_atan2-avx \ + e_exp-avx \ + e_log-avx \ + s_atan-avx \ +- e_atan2-avx \ + s_sin-avx \ + s_sincos-avx \ + s_tan-avx \ +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index e1e894c963afb8b1..d3d2270394bd635d 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -4,8 +4,8 @@ sysdep_routines += \ + memchr-avx2 \ + memchr-avx2-rtm \ + memchr-evex \ +- memchr-evex512 \ + memchr-evex-rtm \ ++ memchr-evex512 \ + memchr-sse2 \ + memcmp-avx2-movbe \ + memcmp-avx2-movbe-rtm \ +@@ -37,8 +37,8 @@ sysdep_routines += \ + rawmemchr-avx2 \ + rawmemchr-avx2-rtm \ + rawmemchr-evex \ +- rawmemchr-evex512 \ + rawmemchr-evex-rtm \ ++ rawmemchr-evex512 \ + rawmemchr-sse2 \ + stpcpy-avx2 \ + stpcpy-avx2-rtm \ diff --git a/glibc-upstream-2.39-30.patch b/glibc-upstream-2.39-30.patch new file mode 100644 index 0000000..3796da6 --- /dev/null +++ b/glibc-upstream-2.39-30.patch @@ -0,0 +1,2143 @@ +commit 423099a03264ea28298f47355d7811b8efe03c97 +Author: Sunil K Pandey +Date: Tue Feb 13 12:23:14 2024 -0800 + + x86_64: Exclude SSE, AVX and FMA4 variants in libm multiarch + + When glibc is built with ISA level 3 or higher by default, the resulting + glibc binaries won't run on SSE or FMA4 processors. Exclude SSE, AVX and + FMA4 variants in libm multiarch when ISA level 3 or higher is enabled by + default. + + When glibc is built with ISA level 2 enabled by default, only keep SSE4.1 + variant. + + Fixes BZ 31335. + + NB: elf/tst-valgrind-smoke test fails with ISA level 4, because valgrind + doesn't support AVX512 instructions: + + https://bugs.kde.org/show_bug.cgi?id=383010 + + Reviewed-by: H.J. Lu + (cherry picked from commit 9f78a7c1d0963282608da836b840f0d5ae1c478e) + +diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure +index 1f4c2d67fd2693ed..2a5421bb31e9efe4 100644 +--- a/sysdeps/x86/configure ++++ b/sysdeps/x86/configure +@@ -98,6 +98,7 @@ printf "%s\n" "$libc_cv_have_x86_lahf_sahf" >&6; } + if test $libc_cv_have_x86_lahf_sahf = yes; then + printf "%s\n" "#define HAVE_X86_LAHF_SAHF 1" >>confdefs.h + ++ ISAFLAG="-DHAVE_X86_LAHF_SAHF" + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for MOVBE instruction support" >&5 + printf %s "checking for MOVBE instruction support... " >&6; } +@@ -120,9 +121,41 @@ printf "%s\n" "$libc_cv_have_x86_movbe" >&6; } + if test $libc_cv_have_x86_movbe = yes; then + printf "%s\n" "#define HAVE_X86_MOVBE 1" >>confdefs.h + ++ ISAFLAG="$ISAFLAG -DHAVE_X86_MOVBE" + fi ++ ++ # Check for ISA level support. ++ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ISA level support" >&5 ++printf %s "checking for ISA level support... " >&6; } ++if test ${libc_cv_have_x86_isa_level+y} ++then : ++ printf %s "(cached) " >&6 ++else $as_nop ++ cat > conftest.c < ++#if MINIMUM_X86_ISA_LEVEL >= 4 ++libc_cv_have_x86_isa_level=4 ++#elif MINIMUM_X86_ISA_LEVEL == 3 ++libc_cv_have_x86_isa_level=3 ++#elif MINIMUM_X86_ISA_LEVEL == 2 ++libc_cv_have_x86_isa_level=2 ++#else ++libc_cv_have_x86_isa_level=baseline ++#endif ++EOF ++ eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level` ++ rm -rf conftest* ++fi ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_isa_level" >&5 ++printf "%s\n" "$libc_cv_have_x86_isa_level" >&6; } ++else ++ libc_cv_have_x86_isa_level=baseline + fi + config_vars="$config_vars ++have-x86-isa-level = $libc_cv_have_x86_isa_level" ++config_vars="$config_vars ++x86-isa-level-3-or-above = 3 4" ++config_vars="$config_vars + enable-x86-isa-level = $libc_cv_include_x86_isa_level" + + printf "%s\n" "#define SUPPORT_STATIC_PIE 1" >>confdefs.h +diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac +index 437a50623b35163a..78ff7c8f41c552bc 100644 +--- a/sysdeps/x86/configure.ac ++++ b/sysdeps/x86/configure.ac +@@ -72,6 +72,7 @@ if test $libc_cv_include_x86_isa_level = yes; then + fi]) + if test $libc_cv_have_x86_lahf_sahf = yes; then + AC_DEFINE(HAVE_X86_LAHF_SAHF) ++ ISAFLAG="-DHAVE_X86_LAHF_SAHF" + fi + AC_CACHE_CHECK([for MOVBE instruction support], + libc_cv_have_x86_movbe, [dnl +@@ -81,8 +82,31 @@ if test $libc_cv_include_x86_isa_level = yes; then + fi]) + if test $libc_cv_have_x86_movbe = yes; then + AC_DEFINE(HAVE_X86_MOVBE) ++ ISAFLAG="$ISAFLAG -DHAVE_X86_MOVBE" + fi ++ ++ # Check for ISA level support. ++ AC_CACHE_CHECK([for ISA level support], ++ libc_cv_have_x86_isa_level, [dnl ++cat > conftest.c < ++#if MINIMUM_X86_ISA_LEVEL >= 4 ++libc_cv_have_x86_isa_level=4 ++#elif MINIMUM_X86_ISA_LEVEL == 3 ++libc_cv_have_x86_isa_level=3 ++#elif MINIMUM_X86_ISA_LEVEL == 2 ++libc_cv_have_x86_isa_level=2 ++#else ++libc_cv_have_x86_isa_level=baseline ++#endif ++EOF ++ eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level` ++ rm -rf conftest*]) ++else ++ libc_cv_have_x86_isa_level=baseline + fi ++LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level]) ++LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4]) + LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level]) + + dnl Static PIE is supported. +diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile +index e1a490dd98b4be07..6ddd50240ce33d22 100644 +--- a/sysdeps/x86_64/fpu/multiarch/Makefile ++++ b/sysdeps/x86_64/fpu/multiarch/Makefile +@@ -1,49 +1,4 @@ + ifeq ($(subdir),math) +-libm-sysdep_routines += \ +- s_ceil-c \ +- s_ceilf-c \ +- s_floor-c \ +- s_floorf-c \ +- s_nearbyint-c \ +- s_nearbyintf-c \ +- s_rint-c \ +- s_rintf-c \ +- s_roundeven-c \ +- s_roundevenf-c \ +- s_trunc-c \ +- s_truncf-c \ +-# libm-sysdep_routines +- +-libm-sysdep_routines += \ +- s_ceil-sse4_1 \ +- s_ceilf-sse4_1 \ +- s_floor-sse4_1 \ +- s_floorf-sse4_1 \ +- s_nearbyint-sse4_1 \ +- s_nearbyintf-sse4_1 \ +- s_rint-sse4_1 \ +- s_rintf-sse4_1 \ +- s_roundeven-sse4_1 \ +- s_roundevenf-sse4_1 \ +- s_trunc-sse4_1 \ +- s_truncf-sse4_1 \ +-# libm-sysdep_routines +- +-libm-sysdep_routines += \ +- e_asin-fma \ +- e_atan2-fma \ +- e_exp-fma \ +- e_log-fma \ +- e_log2-fma \ +- e_pow-fma \ +- s_atan-fma \ +- s_expm1-fma \ +- s_log1p-fma \ +- s_sin-fma \ +- s_sincos-fma \ +- s_tan-fma \ +-# libm-sysdep_routines +- + CFLAGS-e_asin-fma.c = -mfma -mavx2 + CFLAGS-e_atan2-fma.c = -mfma -mavx2 + CFLAGS-e_exp-fma.c = -mfma -mavx2 +@@ -57,23 +12,6 @@ CFLAGS-s_sin-fma.c = -mfma -mavx2 + CFLAGS-s_tan-fma.c = -mfma -mavx2 + CFLAGS-s_sincos-fma.c = -mfma -mavx2 + +-libm-sysdep_routines += \ +- s_cosf-sse2 \ +- s_sincosf-sse2 \ +- s_sinf-sse2 \ +-# libm-sysdep_routines +- +-libm-sysdep_routines += \ +- e_exp2f-fma \ +- e_expf-fma \ +- e_log2f-fma \ +- e_logf-fma \ +- e_powf-fma \ +- s_cosf-fma \ +- s_sincosf-fma \ +- s_sinf-fma \ +-# libm-sysdep_routines +- + CFLAGS-e_exp2f-fma.c = -mfma -mavx2 + CFLAGS-e_expf-fma.c = -mfma -mavx2 + CFLAGS-e_log2f-fma.c = -mfma -mavx2 +@@ -83,17 +21,93 @@ CFLAGS-s_sinf-fma.c = -mfma -mavx2 + CFLAGS-s_cosf-fma.c = -mfma -mavx2 + CFLAGS-s_sincosf-fma.c = -mfma -mavx2 + ++# Check if ISA level is 3 or above. ++ifneq (,$(filter $(have-x86-isa-level),$(x86-isa-level-3-or-above))) + libm-sysdep_routines += \ ++ s_ceil-avx \ ++ s_ceilf-avx \ ++ s_floor-avx \ ++ s_floorf-avx \ ++ s_nearbyint-avx \ ++ s_nearbyintf-avx \ ++ s_rint-avx \ ++ s_rintf-avx \ ++ s_roundeven-avx \ ++ s_roundevenf-avx \ ++ s_trunc-avx \ ++ s_truncf-avx \ ++# libm-sysdep_routines ++else ++libm-sysdep_routines += \ ++ e_asin-fma \ + e_asin-fma4 \ ++ e_atan2-avx \ ++ e_atan2-fma \ + e_atan2-fma4 \ ++ e_exp-avx \ ++ e_exp-fma \ + e_exp-fma4 \ ++ e_exp2f-fma \ ++ e_expf-fma \ ++ e_log-avx \ ++ e_log-fma \ + e_log-fma4 \ ++ e_log2-fma \ ++ e_log2f-fma \ ++ e_logf-fma \ ++ e_pow-fma \ + e_pow-fma4 \ ++ e_powf-fma \ ++ s_atan-avx \ ++ s_atan-fma \ + s_atan-fma4 \ ++ s_ceil-sse4_1 \ ++ s_ceilf-sse4_1 \ ++ s_cosf-fma \ ++ s_cosf-sse2 \ ++ s_expm1-fma \ ++ s_floor-sse4_1 \ ++ s_floorf-sse4_1 \ ++ s_log1p-fma \ ++ s_nearbyint-sse4_1 \ ++ s_nearbyintf-sse4_1 \ ++ s_rint-sse4_1 \ ++ s_rintf-sse4_1 \ ++ s_roundeven-sse4_1 \ ++ s_roundevenf-sse4_1 \ ++ s_sin-avx \ ++ s_sin-fma \ + s_sin-fma4 \ ++ s_sincos-avx \ ++ s_sincos-fma \ + s_sincos-fma4 \ ++ s_sincosf-fma \ ++ s_sincosf-sse2 \ ++ s_sinf-fma \ ++ s_sinf-sse2 \ ++ s_tan-avx \ ++ s_tan-fma \ + s_tan-fma4 \ ++ s_trunc-sse4_1 \ ++ s_truncf-sse4_1 \ + # libm-sysdep_routines ++ifeq ($(have-x86-isa-level),baseline) ++libm-sysdep_routines += \ ++ s_ceil-c \ ++ s_ceilf-c \ ++ s_floor-c \ ++ s_floorf-c \ ++ s_nearbyint-c \ ++ s_nearbyintf-c \ ++ s_rint-c \ ++ s_rintf-c \ ++ s_roundeven-c \ ++ s_roundevenf-c \ ++ s_trunc-c \ ++ s_truncf-c \ ++# libm-sysdep_routines ++endif ++endif + + CFLAGS-e_asin-fma4.c = -mfma4 + CFLAGS-e_atan2-fma4.c = -mfma4 +@@ -105,16 +119,6 @@ CFLAGS-s_sin-fma4.c = -mfma4 + CFLAGS-s_tan-fma4.c = -mfma4 + CFLAGS-s_sincos-fma4.c = -mfma4 + +-libm-sysdep_routines += \ +- e_atan2-avx \ +- e_exp-avx \ +- e_log-avx \ +- s_atan-avx \ +- s_sin-avx \ +- s_sincos-avx \ +- s_tan-avx \ +-# libm-sysdep_routines +- + CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX + CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX + CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX +diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c +index 2eaa6c2c0490d8fc..d64fca2586d43f03 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_asin.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c +@@ -16,26 +16,29 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_ieee754_asin (double); + extern double __redirect_ieee754_acos (double); + +-#define SYMBOL_NAME ieee754_asin +-#include "ifunc-fma4.h" ++# define SYMBOL_NAME ieee754_asin ++# include "ifunc-fma4.h" + + libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin, + IFUNC_SELECTOR ()); + libm_alias_finite (__ieee754_asin, __asin) + +-#undef SYMBOL_NAME +-#define SYMBOL_NAME ieee754_acos +-#include "ifunc-fma4.h" ++# undef SYMBOL_NAME ++# define SYMBOL_NAME ieee754_acos ++# include "ifunc-fma4.h" + + libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos, + IFUNC_SELECTOR ()); + libm_alias_finite (__ieee754_acos, __acos) + +-#define __ieee754_acos __ieee754_acos_sse2 +-#define __ieee754_asin __ieee754_asin_sse2 ++# define __ieee754_acos __ieee754_acos_sse2 ++# define __ieee754_asin __ieee754_asin_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c +index 17ee4f3c366b57c4..8a86c14ded1784dd 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c +@@ -16,16 +16,19 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_ieee754_atan2 (double, double); + +-#define SYMBOL_NAME ieee754_atan2 +-#include "ifunc-avx-fma4.h" ++# define SYMBOL_NAME ieee754_atan2 ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_ieee754_atan2, + __ieee754_atan2, IFUNC_SELECTOR ()); + libm_alias_finite (__ieee754_atan2, __atan2) + +-#define __ieee754_atan2 __ieee754_atan2_sse2 ++# define __ieee754_atan2 __ieee754_atan2_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c +index 406b7ebd44d8eba1..d56329291a204ecf 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern double __redirect_ieee754_exp (double); + +-#define SYMBOL_NAME ieee754_exp +-#include "ifunc-avx-fma4.h" ++# define SYMBOL_NAME ieee754_exp ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp, + IFUNC_SELECTOR ()); + libm_alias_finite (__ieee754_exp, __exp) + +-#define __exp __ieee754_exp_sse2 ++# define __exp __ieee754_exp_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c +index 804fd6be850926b3..06fe5028d6e72122 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c +@@ -16,25 +16,28 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern float __redirect_exp2f (float); + +-#define SYMBOL_NAME exp2f +-#include "ifunc-fma.h" ++# define SYMBOL_NAME exp2f ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ()); + +-#ifdef SHARED ++# ifdef SHARED + versioned_symbol (libm, __ieee754_exp2f, exp2f, GLIBC_2_27); + libm_alias_float_other (__exp2, exp2) +-#else ++# else + libm_alias_float (__exp2, exp2) +-#endif ++# endif + + strong_alias (__exp2f, __ieee754_exp2f) + libm_alias_finite (__exp2f, __exp2f) + +-#define __exp2f __exp2f_sse2 ++# define __exp2f __exp2f_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c +index 4a7e2a5bce697d60..19d767f636126c8d 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_expf.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c +@@ -16,28 +16,31 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern float __redirect_expf (float); + +-#define SYMBOL_NAME expf +-#include "ifunc-fma.h" ++# define SYMBOL_NAME expf ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ()); + +-#ifdef SHARED ++# ifdef SHARED + __hidden_ver1 (__expf, __GI___expf, __redirect_expf) + __attribute__ ((visibility ("hidden"))); + + versioned_symbol (libm, __ieee754_expf, expf, GLIBC_2_27); + libm_alias_float_other (__exp, exp) +-#else ++# else + libm_alias_float (__exp, exp) +-#endif ++# endif + + strong_alias (__expf, __ieee754_expf) + libm_alias_finite (__expf, __expf) + +-#define __expf __expf_sse2 ++# define __expf __expf_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c +index 067fbf58c3f8e0f5..d80c1b1463954af8 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_log.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_log.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern double __redirect_ieee754_log (double); + +-#define SYMBOL_NAME ieee754_log +-#include "ifunc-avx-fma4.h" ++# define SYMBOL_NAME ieee754_log ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log, + IFUNC_SELECTOR ()); + libm_alias_finite (__ieee754_log, __log) + +-#define __log __ieee754_log_sse2 ++# define __log __ieee754_log_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2.c b/sysdeps/x86_64/fpu/multiarch/e_log2.c +index 9c57a2f6ccfcbdfd..9686782c09b1f343 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_log2.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_log2.c +@@ -16,28 +16,31 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern double __redirect_log2 (double); + +-#define SYMBOL_NAME log2 +-#include "ifunc-fma.h" ++# define SYMBOL_NAME log2 ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_log2, __log2, IFUNC_SELECTOR ()); + +-#ifdef SHARED ++# ifdef SHARED + __hidden_ver1 (__log2, __GI___log2, __redirect_log2) + __attribute__ ((visibility ("hidden"))); + + versioned_symbol (libm, __ieee754_log2, log2, GLIBC_2_29); + libm_alias_double_other (__log2, log2) +-#else ++# else + libm_alias_double (__log2, log2) +-#endif ++# endif + + strong_alias (__log2, __ieee754_log2) + libm_alias_finite (__log2, __log2) + +-#define __log2 __log2_sse2 ++# define __log2 __log2_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c +index 2b45c87f38afea62..8ada46e11e62726c 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_log2f.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c +@@ -16,28 +16,31 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern float __redirect_log2f (float); + +-#define SYMBOL_NAME log2f +-#include "ifunc-fma.h" ++# define SYMBOL_NAME log2f ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ()); + +-#ifdef SHARED ++# ifdef SHARED + __hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f) + __attribute__ ((visibility ("hidden"))); + + versioned_symbol (libm, __ieee754_log2f, log2f, GLIBC_2_27); + libm_alias_float_other (__log2, log2) +-#else ++# else + libm_alias_float (__log2, log2) +-#endif ++# endif + + strong_alias (__log2f, __ieee754_log2f) + libm_alias_finite (__log2f, __log2f) + +-#define __log2f __log2f_sse2 ++# define __log2f __log2f_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c +index 97e23c8fea39f37e..a3978d9a8e40c588 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_logf.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c +@@ -16,28 +16,31 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern float __redirect_logf (float); + +-#define SYMBOL_NAME logf +-#include "ifunc-fma.h" ++# define SYMBOL_NAME logf ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ()); + +-#ifdef SHARED ++# ifdef SHARED + __hidden_ver1 (__logf, __GI___logf, __redirect_logf) + __attribute__ ((visibility ("hidden"))); + + versioned_symbol (libm, __ieee754_logf, logf, GLIBC_2_27); + libm_alias_float_other (__log, log) +-#else ++# else + libm_alias_float (__log, log) +-#endif ++# endif + + strong_alias (__logf, __ieee754_logf) + libm_alias_finite (__logf, __logf) + +-#define __logf __logf_sse2 ++# define __logf __logf_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c +index 42618e7112fac769..f8f17aff9ffb9bf2 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern double __redirect_ieee754_pow (double, double); + +-#define SYMBOL_NAME ieee754_pow +-#include "ifunc-fma4.h" ++# define SYMBOL_NAME ieee754_pow ++# include "ifunc-fma4.h" + + libc_ifunc_redirected (__redirect_ieee754_pow, + __ieee754_pow, IFUNC_SELECTOR ()); + libm_alias_finite (__ieee754_pow, __pow) + +-#define __pow __ieee754_pow_sse2 ++# define __pow __ieee754_pow_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c +index 8e6ce13cc1a2470b..8b1a4c7d047115f5 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_powf.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c +@@ -16,31 +16,34 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + +-#define powf __redirect_powf +-#define __DECL_SIMD___redirect_powf +-#include +-#undef powf ++# define powf __redirect_powf ++# define __DECL_SIMD___redirect_powf ++# include ++# undef powf + +-#define SYMBOL_NAME powf +-#include "ifunc-fma.h" ++# define SYMBOL_NAME powf ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ()); + +-#ifdef SHARED ++# ifdef SHARED + __hidden_ver1 (__powf, __GI___powf, __redirect_powf) + __attribute__ ((visibility ("hidden"))); + + versioned_symbol (libm, __ieee754_powf, powf, GLIBC_2_27); + libm_alias_float_other (__pow, pow) +-#else ++# else + libm_alias_float (__pow, pow) +-#endif ++# endif + + strong_alias (__powf, __ieee754_powf) + libm_alias_finite (__powf, __powf) + +-#define __powf __powf_sse2 ++# define __powf __powf_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c +index 71bad096a9da60fe..4d2c6ce0060b6d8f 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c +@@ -16,15 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_atan (double); + +-#define SYMBOL_NAME atan +-#include "ifunc-avx-fma4.h" ++# define SYMBOL_NAME atan ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ()); + libm_alias_double (__atan, atan) + +-#define __atan __atan_sse2 ++# define __atan __atan_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S +new file mode 100644 +index 0000000000000000..e6c1106753f29600 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of ceil function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__ceil) ++ vroundsd $10, %xmm0, %xmm0, %xmm0 ++ ret ++END(__ceil) ++ ++libm_alias_double (__ceil, ceil) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S +index 64119011add00832..dba756c38f47b871 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __ceil_sse41 __ceil ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__ceil_sse41) + roundsd $10, %xmm0, %xmm0 + ret + END(__ceil_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_double (__ceil, ceil) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c +index cc028addee82f19c..46c8e91e199311db 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_ceil.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define ceil __redirect_ceil +-#define __ceil __redirect___ceil +-#include +-#undef ceil +-#undef __ceil ++# define ceil __redirect_ceil ++# define __ceil __redirect___ceil ++# include ++# undef ceil ++# undef __ceil + +-#define SYMBOL_NAME ceil +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME ceil ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ()); + libm_alias_double (__ceil, ceil) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S +new file mode 100644 +index 0000000000000000..b4d8ac045569900c +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of ceilf function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__ceilf) ++ vroundss $10, %xmm0, %xmm0, %xmm0 ++ ret ++END(__ceilf) ++ ++libm_alias_float (__ceil, ceil) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S +index dd9a9f6b71449fc4..9abc87b91afafcbb 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __ceilf_sse41 __ceilf ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__ceilf_sse41) + roundss $10, %xmm0, %xmm0 + ret + END(__ceilf_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_float (__ceil, ceil) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c +index 97a0ca7d19a0d8dd..bb53108f73c00fd0 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define ceilf __redirect_ceilf +-#define __ceilf __redirect___ceilf +-#include +-#undef ceilf +-#undef __ceilf ++# define ceilf __redirect_ceilf ++# define __ceilf __redirect___ceilf ++# include ++# undef ceilf ++# undef __ceilf + +-#define SYMBOL_NAME ceilf +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME ceilf ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ()); + libm_alias_float (__ceil, ceil) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c +index 2703c576dfa715ce..8a02e04538841602 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_cosf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c +@@ -16,13 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern float __redirect_cosf (float); + +-#define SYMBOL_NAME cosf +-#include "ifunc-fma.h" ++# define SYMBOL_NAME cosf ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ()); + + libm_alias_float (__cos, cos) ++#else ++# include ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_expm1.c b/sysdeps/x86_64/fpu/multiarch/s_expm1.c +index 8a2d69f9b28fb03d..d58ef3d8f5e7933f 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_expm1.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_expm1.c +@@ -16,21 +16,24 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_expm1 (double); + +-#define SYMBOL_NAME expm1 +-#include "ifunc-fma.h" ++# define SYMBOL_NAME expm1 ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_expm1, __expm1, IFUNC_SELECTOR ()); + libm_alias_double (__expm1, expm1) + +-#define __expm1 __expm1_sse2 ++# define __expm1 __expm1_sse2 + + /* NB: __expm1 may be expanded to __expm1_sse2 in the following + prototypes. */ + extern long double __expm1l (long double); + extern long double __expm1f128 (long double); + ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S +new file mode 100644 +index 0000000000000000..ff74b5a8bfe69423 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of floor function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__floor) ++ vroundsd $9, %xmm0, %xmm0, %xmm0 ++ ret ++END(__floor) ++ ++libm_alias_double (__floor, floor) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S +index 2f7521f39f5e1c63..c9b9b0639bd24182 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __floor_sse41 __floor ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__floor_sse41) + roundsd $9, %xmm0, %xmm0 + ret + END(__floor_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_double (__floor, floor) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c +index 8cebd48e1008af6e..2c87dd0056419492 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_floor.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define floor __redirect_floor +-#define __floor __redirect___floor +-#include +-#undef floor +-#undef __floor ++# define floor __redirect_floor ++# define __floor __redirect___floor ++# include ++# undef floor ++# undef __floor + +-#define SYMBOL_NAME floor +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME floor ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ()); + libm_alias_double (__floor, floor) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S +new file mode 100644 +index 0000000000000000..c378baae8e9c7ac7 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of floorf function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__floorf) ++ vroundss $9, %xmm0, %xmm0, %xmm0 ++ ret ++END(__floorf) ++ ++libm_alias_float (__floor, floor) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S +index 5f6020d27daceb80..c2216899db7b71a3 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __floorf_sse41 __floorf ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__floorf_sse41) + roundss $9, %xmm0, %xmm0 + ret + END(__floorf_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_float (__floor, floor) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c +index a14e18b03c1a1dc9..a277802b6ddc1dfd 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_floorf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define floorf __redirect_floorf +-#define __floorf __redirect___floorf +-#include +-#undef floorf +-#undef __floorf ++# define floorf __redirect_floorf ++# define __floorf __redirect___floorf ++# include ++# undef floorf ++# undef __floorf + +-#define SYMBOL_NAME floorf +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME floorf ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ()); + libm_alias_float (__floor, floor) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_log1p.c b/sysdeps/x86_64/fpu/multiarch/s_log1p.c +index a8e1a3f21b17236c..3fa1185d81af33e7 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_log1p.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_log1p.c +@@ -16,14 +16,17 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_log1p (double); + +-#define SYMBOL_NAME log1p +-#include "ifunc-fma.h" ++# define SYMBOL_NAME log1p ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_log1p, __log1p, IFUNC_SELECTOR ()); + +-#define __log1p __log1p_sse2 ++# define __log1p __log1p_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S +new file mode 100644 +index 0000000000000000..5bfdf73c28b34350 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of nearbyint function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__nearbyint) ++ vroundsd $0xc, %xmm0, %xmm0, %xmm0 ++ ret ++END(__nearbyint) ++ ++libm_alias_double (__nearbyint, nearbyint) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S +index 674f7eb40abb33d9..9d84410a1f347b7a 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __nearbyint_sse41 __nearbyint ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__nearbyint_sse41) + roundsd $0xc, %xmm0, %xmm0 + ret + END(__nearbyint_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_double (__nearbyint, nearbyint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c +index 693e42dd4edffcc1..057a7ca60f0853cf 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# include + +-#define nearbyint __redirect_nearbyint +-#define __nearbyint __redirect___nearbyint +-#include +-#undef nearbyint +-#undef __nearbyint ++# define nearbyint __redirect_nearbyint ++# define __nearbyint __redirect___nearbyint ++# include ++# undef nearbyint ++# undef __nearbyint + +-#define SYMBOL_NAME nearbyint +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME nearbyint ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_nearbyint, __nearbyint, + IFUNC_SELECTOR ()); + libm_alias_double (__nearbyint, nearbyint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S +new file mode 100644 +index 0000000000000000..1dbaed0324daa024 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implmentation of nearbyintf function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__nearbyintf) ++ vroundss $0xc, %xmm0, %xmm0, %xmm0 ++ ret ++END(__nearbyintf) ++ ++libm_alias_float (__nearbyint, nearbyint) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S +index 5892bd756366f076..3cf35f92d6a0ea9b 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __nearbyintf_sse41 __nearbyintf ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__nearbyintf_sse41) + roundss $0xc, %xmm0, %xmm0 + ret + END(__nearbyintf_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_float (__nearbyint, nearbyint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c +index a0ac009f4bd43baa..41f374ba72902bcf 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# include + +-#define nearbyintf __redirect_nearbyintf +-#define __nearbyintf __redirect___nearbyintf +-#include +-#undef nearbyintf +-#undef __nearbyintf ++# define nearbyintf __redirect_nearbyintf ++# define __nearbyintf __redirect___nearbyintf ++# include ++# undef nearbyintf ++# undef __nearbyintf + +-#define SYMBOL_NAME nearbyintf +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME nearbyintf ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf, + IFUNC_SELECTOR ()); + libm_alias_float (__nearbyint, nearbyint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S +new file mode 100644 +index 0000000000000000..2b403b331f145221 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of rint function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__rint) ++ vroundsd $4, %xmm0, %xmm0, %xmm0 ++ ret ++END(__rint) ++ ++libm_alias_double (__rint, rint) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S +index 405372991bfac3d0..8cd9cf759f3ac70c 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __rint_sse41 __rint ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__rint_sse41) + roundsd $4, %xmm0, %xmm0 + ret + END(__rint_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_double (__rint, rint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c +index 754c87e004f4b006..18623b7d99d1a26e 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_rint.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define rint __redirect_rint +-#define __rint __redirect___rint +-#include +-#undef rint +-#undef __rint ++# define rint __redirect_rint ++# define __rint __redirect___rint ++# include ++# undef rint ++# undef __rint + +-#define SYMBOL_NAME rint +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME rint ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ()); + libm_alias_double (__rint, rint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S +new file mode 100644 +index 0000000000000000..171c2867f4ab4e3c +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of rintf function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__rintf) ++ vroundss $4, %xmm0, %xmm0, %xmm0 ++ ret ++END(__rintf) ++ ++libm_alias_float (__rint, rint) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S +index 8ac67ce7673d2aca..fc1e70f0c9a068e4 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __rintf_sse41 __rintf ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__rintf_sse41) + roundss $4, %xmm0, %xmm0 + ret + END(__rintf_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_float (__rint, rint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c +index e9d6b7a5f2080b8b..e275368decba8359 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_rintf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define rintf __redirect_rintf +-#define __rintf __redirect___rintf +-#include +-#undef rintf +-#undef __rintf ++# define rintf __redirect_rintf ++# define __rintf __redirect___rintf ++# include ++# undef rintf ++# undef __rintf + +-#define SYMBOL_NAME rintf +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME rintf ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ()); + libm_alias_float (__rint, rint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S +new file mode 100644 +index 0000000000000000..576790355c6b493f +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of roundeven function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__roundeven) ++ vroundsd $8, %xmm0, %xmm0, %xmm0 ++ ret ++END(__roundeven) ++ ++libm_alias_double (__roundeven, roundeven) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S +index 5ef102336bfa9e2b..f00be56c592a614d 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __roundeven_sse41 __roundeven ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__roundeven_sse41) + roundsd $8, %xmm0, %xmm0 + ret + END(__roundeven_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_double (__roundeven, roundeven) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c +index 8737b32e2604290a..139aad088f59c957 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c +@@ -16,16 +16,19 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# include + +-#define roundeven __redirect_roundeven +-#define __roundeven __redirect___roundeven +-#include +-#undef roundeven +-#undef __roundeven ++# define roundeven __redirect_roundeven ++# define __roundeven __redirect___roundeven ++# include ++# undef roundeven ++# undef __roundeven + +-#define SYMBOL_NAME roundeven +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME roundeven ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_roundeven, __roundeven, IFUNC_SELECTOR ()); + libm_alias_double (__roundeven, roundeven) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S +new file mode 100644 +index 0000000000000000..42c359f4cd04fc56 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of roundevenf function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__roundevenf) ++ vroundss $8, %xmm0, %xmm0, %xmm0 ++ ret ++END(__roundevenf) ++ ++libm_alias_float (__roundeven, roundeven) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S +index 792c90ba07a3f985..6b148e435316816f 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __roundevenf_sse41 __roundevenf ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__roundevenf_sse41) + roundss $8, %xmm0, %xmm0 + ret + END(__roundevenf_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_float (__roundeven, roundeven) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c +index e96016a4d5ba7b39..2fb090075d328ae8 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c +@@ -16,16 +16,19 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# include + +-#define roundevenf __redirect_roundevenf +-#define __roundevenf __redirect___roundevenf +-#include +-#undef roundevenf +-#undef __roundevenf ++# define roundevenf __redirect_roundevenf ++# define __roundevenf __redirect___roundevenf ++# include ++# undef roundevenf ++# undef __roundevenf + +-#define SYMBOL_NAME roundevenf +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME roundevenf ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_roundevenf, __roundevenf, IFUNC_SELECTOR ()); + libm_alias_float (__roundeven, roundeven) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c +index 355cc0092eda46a6..21e77943a3662cd6 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c +@@ -16,24 +16,27 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_sin (double); + extern double __redirect_cos (double); + +-#define SYMBOL_NAME sin +-#include "ifunc-avx-fma4.h" ++# define SYMBOL_NAME sin ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ()); + libm_alias_double (__sin, sin) + +-#undef SYMBOL_NAME +-#define SYMBOL_NAME cos +-#include "ifunc-avx-fma4.h" ++# undef SYMBOL_NAME ++# define SYMBOL_NAME cos ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ()); + libm_alias_double (__cos, cos) + +-#define __cos __cos_sse2 +-#define __sin __sin_sse2 ++# define __cos __cos_sse2 ++# define __sin __sin_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincos.c b/sysdeps/x86_64/fpu/multiarch/s_sincos.c +index 70107e999c39da91..b35757f8de4f9d2d 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_sincos.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_sincos.c +@@ -16,15 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern void __redirect_sincos (double, double *, double *); + +-#define SYMBOL_NAME sincos +-#include "ifunc-fma4.h" ++# define SYMBOL_NAME sincos ++# include "ifunc-fma4.h" + + libc_ifunc_redirected (__redirect_sincos, __sincos, IFUNC_SELECTOR ()); + libm_alias_double (__sincos, sincos) + +-#define __sincos __sincos_sse2 ++# define __sincos __sincos_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c +index 80bc028451e5153c..0ea9b40e844a2b22 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c +@@ -16,13 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern void __redirect_sincosf (float, float *, float *); + +-#define SYMBOL_NAME sincosf +-#include "ifunc-fma.h" ++# define SYMBOL_NAME sincosf ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ()); + + libm_alias_float (__sincos, sincos) ++#else ++# include ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c +index a32b9e955052c296..c61624e3eed441dd 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_sinf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c +@@ -16,13 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern float __redirect_sinf (float); + +-#define SYMBOL_NAME sinf +-#include "ifunc-fma.h" ++# define SYMBOL_NAME sinf ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ()); + + libm_alias_float (__sin, sin) ++#else ++# include ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c +index f9a2474a139b0807..125d992ba1fb01ea 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c +@@ -16,15 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_tan (double); + +-#define SYMBOL_NAME tan +-#include "ifunc-avx-fma4.h" ++# define SYMBOL_NAME tan ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ()); + libm_alias_double (__tan, tan) + +-#define __tan __tan_sse2 ++# define __tan __tan_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S +new file mode 100644 +index 0000000000000000..b3e87e96068e5404 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of trunc function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__trunc) ++ vroundsd $11, %xmm0, %xmm0, %xmm0 ++ ret ++END(__trunc) ++ ++libm_alias_double (__trunc, trunc) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S +index b496a6ef49ac5d1c..2b79174eeda798f8 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S +@@ -18,8 +18,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __trunc_sse41 __trunc ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__trunc_sse41) + roundsd $11, %xmm0, %xmm0 + ret + END(__trunc_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_double (__trunc, trunc) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c +index 9bc9df8744dd56b0..ea89c4f85d9c9dbd 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_trunc.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define trunc __redirect_trunc +-#define __trunc __redirect___trunc +-#include +-#undef trunc +-#undef __trunc ++# define trunc __redirect_trunc ++# define __trunc __redirect___trunc ++# include ++# undef trunc ++# undef __trunc + +-#define SYMBOL_NAME trunc +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME trunc ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ()); + libm_alias_double (__trunc, trunc) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S +new file mode 100644 +index 0000000000000000..f31ac7d7f7980f2e +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of truncf function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__truncf) ++ vroundss $11, %xmm0, %xmm0, %xmm0 ++ ret ++END(__truncf) ++ ++libm_alias_float (__trunc, trunc) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S +index 22e9a8330746da2f..60498b2cb21a97fa 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S +@@ -18,8 +18,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __truncf_sse41 __truncf ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__truncf_sse41) + roundss $11, %xmm0, %xmm0 + ret + END(__truncf_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_float (__trunc, trunc) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c +index dae01d166a0dfa13..92435ce39dbd7cca 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_truncf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define truncf __redirect_truncf +-#define __truncf __redirect___truncf +-#include +-#undef truncf +-#undef __truncf ++# define truncf __redirect_truncf ++# define __truncf __redirect___truncf ++# include ++# undef truncf ++# undef __truncf + +-#define SYMBOL_NAME truncf +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME truncf ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ()); + libm_alias_float (__trunc, trunc) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/w_exp.c b/sysdeps/x86_64/fpu/multiarch/w_exp.c +index 27eee98a0a3e16f0..3584187e0e223195 100644 +--- a/sysdeps/x86_64/fpu/multiarch/w_exp.c ++++ b/sysdeps/x86_64/fpu/multiarch/w_exp.c +@@ -1 +1,6 @@ +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL ++# include ++#else ++# include ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/w_log.c b/sysdeps/x86_64/fpu/multiarch/w_log.c +index 9b2b0187116bc48e..414ca3ca3ddf530a 100644 +--- a/sysdeps/x86_64/fpu/multiarch/w_log.c ++++ b/sysdeps/x86_64/fpu/multiarch/w_log.c +@@ -1 +1,6 @@ +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL ++# include ++#else ++# include ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/w_pow.c b/sysdeps/x86_64/fpu/multiarch/w_pow.c +index b50c1988de597731..d5fcc4f871bd7eea 100644 +--- a/sysdeps/x86_64/fpu/multiarch/w_pow.c ++++ b/sysdeps/x86_64/fpu/multiarch/w_pow.c +@@ -1 +1,6 @@ +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL ++# include ++#else ++# include ++#endif diff --git a/glibc-upstream-2.39-31.patch b/glibc-upstream-2.39-31.patch new file mode 100644 index 0000000..bf21203 --- /dev/null +++ b/glibc-upstream-2.39-31.patch @@ -0,0 +1,207 @@ +commit 31da30f23cddd36db29d5b6a1c7619361b271fb4 +Author: Charles Fol +Date: Thu Mar 28 12:25:38 2024 -0300 + + iconv: ISO-2022-CN-EXT: fix out-of-bound writes when writing escape sequence (CVE-2024-2961) + + ISO-2022-CN-EXT uses escape sequences to indicate character set changes + (as specified by RFC 1922). While the SOdesignation has the expected + bounds checks, neither SS2designation nor SS3designation have its; + allowing a write overflow of 1, 2, or 3 bytes with fixed values: + '$+I', '$+J', '$+K', '$+L', '$+M', or '$*H'. + + Checked on aarch64-linux-gnu. + + Co-authored-by: Adhemerval Zanella + Reviewed-by: Carlos O'Donell + Tested-by: Carlos O'Donell + + (cherry picked from commit f9dc609e06b1136bb0408be9605ce7973a767ada) + +diff --git a/iconvdata/Makefile b/iconvdata/Makefile +index ea019ce5c0e67e98..7196a8744bb66e8c 100644 +--- a/iconvdata/Makefile ++++ b/iconvdata/Makefile +@@ -75,7 +75,8 @@ ifeq (yes,$(build-shared)) + tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ + tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ + bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \ +- bug-iconv13 bug-iconv14 bug-iconv15 ++ bug-iconv13 bug-iconv14 bug-iconv15 \ ++ tst-iconv-iso-2022-cn-ext + ifeq ($(have-thread-library),yes) + tests += bug-iconv3 + endif +@@ -330,6 +331,8 @@ $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \ + $(addprefix $(objpfx),$(modules.so)) + $(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \ + $(addprefix $(objpfx),$(modules.so)) ++$(objpfx)tst-iconv-iso-2022-cn-ext.out: $(addprefix $(objpfx), $(gconv-modules)) \ ++ $(addprefix $(objpfx),$(modules.so)) + + $(objpfx)iconv-test.out: run-iconv-test.sh \ + $(addprefix $(objpfx), $(gconv-modules)) \ +diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c +index b34c8a36f4564c11..cce29b19692263d6 100644 +--- a/iconvdata/iso-2022-cn-ext.c ++++ b/iconvdata/iso-2022-cn-ext.c +@@ -574,6 +574,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); + { \ + const char *escseq; \ + \ ++ if (outptr + 4 > outend) \ ++ { \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ ++ \ + assert (used == CNS11643_2_set); /* XXX */ \ + escseq = "*H"; \ + *outptr++ = ESC; \ +@@ -587,6 +593,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); + { \ + const char *escseq; \ + \ ++ if (outptr + 4 > outend) \ ++ { \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ ++ \ + assert ((used >> 5) >= 3 && (used >> 5) <= 7); \ + escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2; \ + *outptr++ = ESC; \ +diff --git a/iconvdata/tst-iconv-iso-2022-cn-ext.c b/iconvdata/tst-iconv-iso-2022-cn-ext.c +new file mode 100644 +index 0000000000000000..96a8765fd5369681 +--- /dev/null ++++ b/iconvdata/tst-iconv-iso-2022-cn-ext.c +@@ -0,0 +1,128 @@ ++/* Verify ISO-2022-CN-EXT does not write out of the bounds. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++/* The test sets up a two memory page buffer with the second page marked ++ PROT_NONE to trigger a fault if the conversion writes beyond the exact ++ expected amount. Then we carry out various conversions and precisely ++ place the start of the output buffer in order to trigger a SIGSEGV if the ++ process writes anywhere between 1 and page sized bytes more (only one ++ PROT_NONE page is setup as a canary) than expected. These tests exercise ++ all three of the cases in ISO-2022-CN-EXT where the converter must switch ++ character sets and may run out of buffer space while doing the ++ operation. */ ++ ++static int ++do_test (void) ++{ ++ iconv_t cd = iconv_open ("ISO-2022-CN-EXT", "UTF-8"); ++ TEST_VERIFY_EXIT (cd != (iconv_t) -1); ++ ++ char *ntf; ++ size_t ntfsize; ++ char *outbufbase; ++ { ++ int pgz = getpagesize (); ++ TEST_VERIFY_EXIT (pgz > 0); ++ ntfsize = 2 * pgz; ++ ++ ntf = xmmap (NULL, ntfsize, PROT_READ | PROT_WRITE, MAP_PRIVATE ++ | MAP_ANONYMOUS, -1); ++ xmprotect (ntf + pgz, pgz, PROT_NONE); ++ ++ outbufbase = ntf + pgz; ++ } ++ ++ /* Check if SOdesignation escape sequence does not trigger an OOB write. */ ++ { ++ char inbuf[] = "\xe4\xba\xa4\xe6\x8d\xa2"; ++ ++ for (int i = 0; i < 9; i++) ++ { ++ char *inp = inbuf; ++ size_t inleft = sizeof (inbuf) - 1; ++ ++ char *outp = outbufbase - i; ++ size_t outleft = i; ++ ++ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) ++ == (size_t) -1); ++ TEST_COMPARE (errno, E2BIG); ++ ++ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); ++ } ++ } ++ ++ /* Same as before for SS2designation. */ ++ { ++ char inbuf[] = "㴽 \xe3\xb4\xbd"; ++ ++ for (int i = 0; i < 14; i++) ++ { ++ char *inp = inbuf; ++ size_t inleft = sizeof (inbuf) - 1; ++ ++ char *outp = outbufbase - i; ++ size_t outleft = i; ++ ++ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) ++ == (size_t) -1); ++ TEST_COMPARE (errno, E2BIG); ++ ++ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); ++ } ++ } ++ ++ /* Same as before for SS3designation. */ ++ { ++ char inbuf[] = "劄 \xe5\x8a\x84"; ++ ++ for (int i = 0; i < 14; i++) ++ { ++ char *inp = inbuf; ++ size_t inleft = sizeof (inbuf) - 1; ++ ++ char *outp = outbufbase - i; ++ size_t outleft = i; ++ ++ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) ++ == (size_t) -1); ++ TEST_COMPARE (errno, E2BIG); ++ ++ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); ++ } ++ } ++ ++ TEST_VERIFY_EXIT (iconv_close (cd) != -1); ++ ++ xmunmap (ntf, ntfsize); ++ ++ return 0; ++} ++ ++#include diff --git a/glibc.spec b/glibc.spec index 493b171..9687912 100644 --- a/glibc.spec +++ b/glibc.spec @@ -171,7 +171,7 @@ Version: %{glibcversion} # - It allows using the Release number without the %%dist tag in the dependency # generator to make the generated requires interchangeable between Rawhide # and ELN (.elnYY < .fcXX). -%global baserelease 7 +%global baserelease 8 Release: %{baserelease}%{?dist} # Licenses: @@ -302,6 +302,16 @@ Patch41: glibc-upstream-2.39-18.patch Patch42: glibc-upstream-2.39-19.patch Patch43: glibc-upstream-2.39-20.patch Patch44: glibc-upstream-2.39-21.patch +Patch45: glibc-upstream-2.39-22.patch +Patch46: glibc-upstream-2.39-23.patch +Patch47: glibc-upstream-2.39-24.patch +Patch48: glibc-upstream-2.39-25.patch +Patch49: glibc-upstream-2.39-26.patch +Patch50: glibc-upstream-2.39-27.patch +Patch51: glibc-upstream-2.39-28.patch +Patch52: glibc-upstream-2.39-29.patch +Patch53: glibc-upstream-2.39-30.patch +Patch54: glibc-upstream-2.39-31.patch ############################################################################## # Continued list of core "glibc" package information: @@ -2478,6 +2488,20 @@ update_gconv_modules_cache () %endif %changelog +* Thu Apr 18 2024 Florian Weimer - 2.39-8 +- Sync with upstream branch release/2.39/master, + commit 31da30f23cddd36db29d5b6a1c7619361b271fb4: +- iconv: ISO-2022-CN-EXT: fix out-of-bound writes when writing escape sequence (CVE-2024-2961) +- x86_64: Exclude SSE, AVX and FMA4 variants in libm multiarch +- Apply the Makefile sorting fix +- powerpc: Fix ld.so address determination for PCREL mode (bug 31640) +- x86-64: Simplify minimum ISA check ifdef conditional with if +- x86-64: Don't use SSE resolvers for ISA level 3 or above +- AArch64: Check kernel version for SVE ifuncs +- aarch64: fix check for SVE support in assembler +- aarch64/fpu: Sync libmvec routines from 2.39 and before with AOR +- i386: Use generic memrchr in libc (bug 31316) + * Thu Apr 04 2024 Arjun Shankar - 2.39-7 - Sync with upstream branch release/2.39/master, commit 5d070d12b3a52bc44dd1b71743abc4b6243862ae: