diff --git a/glibc-upstream-2.39-213.patch b/glibc-upstream-2.39-213.patch new file mode 100644 index 0000000..17ed68b --- /dev/null +++ b/glibc-upstream-2.39-213.patch @@ -0,0 +1,27 @@ +commit b6ea8902a72fb746ae5cd71ddf1172c5ead89972 +Author: Wilco Dijkstra +Date: Fri Jun 27 14:10:55 2025 +0000 + + AArch64: Avoid memset ifunc in cpu-features.c [BZ #33112] + + During early startup memcpy or memset must not be called since many targets + use ifuncs for them which won't be initialized yet. Security hardening may + use -ftrivial-auto-var-init=zero which inserts calls to memset. Redirect + memset to memset_generic by including dl-symbol-redir-ifunc.h in cpu-features.c. + This fixes BZ #33112. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 681a24ae4d0cb8ed92de98b4da660308840b09ba) + +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +index c0b047bc0dbeae42..0ad55a0c7f66618f 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + + #define DCZID_DZP_MASK (1 << 4) + #define DCZID_BS_MASK (0xf) diff --git a/glibc-upstream-2.39-214.patch b/glibc-upstream-2.39-214.patch new file mode 100644 index 0000000..d4732e8 --- /dev/null +++ b/glibc-upstream-2.39-214.patch @@ -0,0 +1,1638 @@ +commit 3a78a276a37720019ac714e4180910cd5e5d5d58 +Author: Dylan Fleming +Date: Mon May 19 11:36:51 2025 +0000 + + AArch64: Optimize inverse trig functions + + Improve performance of Inverse trig functions by altering how coefficients are + loaded. + + Performance improvement on Neoverse V1: + SVE acos 14% + AdvSIMD acos 6% + + AdvSIMD asin 6% + SVE asin 5% + AdvSIMD asinf 2% + + AdvSIMD atanf 22% + SVE atanf 20% + SVE atan 11% + AdvSIMD atan 5% + + SVE atan2 7% + SVE atan2f 4% + AdvSIMD atan2f 3% + AdvSIMD atan2 2% + + Reviewed-by: Wilco Dijkstra + (cherry picked from commit 1e84509e0041c0a83997aba602a585bb3b8285f0) + +diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c +index 0a86c9823a279766..aaf874f4eab63762 100644 +--- a/sysdeps/aarch64/fpu/acos_advsimd.c ++++ b/sysdeps/aarch64/fpu/acos_advsimd.c +@@ -18,24 +18,23 @@ + . */ + + #include "v_math.h" +-#include "poly_advsimd_f64.h" + + static const struct data + { +- float64x2_t poly[12]; +- float64x2_t pi, pi_over_2; ++ double c1, c3, c5, c7, c9, c11; ++ float64x2_t c0, c2, c4, c6, c8, c10; + uint64x2_t abs_mask; ++ float64x2_t pi, pi_over_2; + } data = { + /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) + on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ +- .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4), +- V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6), +- V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6), +- V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7), +- V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6), +- V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), }, +- .pi = V2 (0x1.921fb54442d18p+1), +- .pi_over_2 = V2 (0x1.921fb54442d18p+0), ++ .c0 = V2 (0x1.555555555554ep-3), .c1 = 0x1.3333333337233p-4, ++ .c2 = V2 (0x1.6db6db67f6d9fp-5), .c3 = 0x1.f1c71fbd29fbbp-6, ++ .c4 = V2 (0x1.6e8b264d467d6p-6), .c5 = 0x1.1c5997c357e9dp-6, ++ .c6 = V2 (0x1.c86a22cd9389dp-7), .c7 = 0x1.856073c22ebbep-7, ++ .c8 = V2 (0x1.fd1151acb6bedp-8), .c9 = 0x1.087182f799c1dp-6, ++ .c10 = V2 (-0x1.6602748120927p-7), .c11 = 0x1.cfa0dd1f9478p-6, ++ .pi = V2 (0x1.921fb54442d18p+1), .pi_over_2 = V2 (0x1.921fb54442d18p+0), + .abs_mask = V2 (0x7fffffffffffffff), + }; + +@@ -63,7 +62,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special) + + acos(x) ~ pi/2 - (x + x^3 P(x^2)). + +- The largest observed error in this region is 1.18 ulps, ++ The largest observed error in this region is 1.18 ulp: + _ZGVnN2v_acos (0x1.fbab0a7c460f6p-2) got 0x1.0d54d1985c068p+0 + want 0x1.0d54d1985c069p+0. + +@@ -71,9 +70,9 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special) + + acos(x) = y + y * z * P(z), with z = (1-x)/2 and y = sqrt(z). + +- The largest observed error in this region is 1.52 ulps, +- _ZGVnN2v_acos (0x1.23d362722f591p-1) got 0x1.edbbedf8a7d6ep-1 +- want 0x1.edbbedf8a7d6cp-1. */ ++ The largest observed error in this region is 1.50 ulp: ++ _ZGVnN2v_acos (0x1.252a2cf3fb9acp-1) got 0x1.ec1a46aa82901p-1 ++ want 0x1.ec1a46aa829p-1. */ + float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x) + { + const struct data *d = ptr_barrier (&data); +@@ -99,13 +98,32 @@ float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x) + float64x2_t z = vbslq_f64 (a_le_half, ax, vsqrtq_f64 (z2)); + + /* Use a single polynomial approximation P for both intervals. */ ++ float64x2_t z3 = vmulq_f64 (z2, z); + float64x2_t z4 = vmulq_f64 (z2, z2); + float64x2_t z8 = vmulq_f64 (z4, z4); +- float64x2_t z16 = vmulq_f64 (z8, z8); +- float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly); + +- /* Finalize polynomial: z + z * z2 * P(z2). */ +- p = vfmaq_f64 (z, vmulq_f64 (z, z2), p); ++ /* Order-11 Estrin. */ ++ float64x2_t c13 = vld1q_f64 (&d->c1); ++ float64x2_t c57 = vld1q_f64 (&d->c5); ++ float64x2_t c911 = vld1q_f64 (&d->c9); ++ ++ float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); ++ float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); ++ float64x2_t p03 = vfmaq_f64 (p01, z4, p23); ++ ++ float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); ++ float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); ++ float64x2_t p47 = vfmaq_f64 (p45, z4, p67); ++ ++ float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); ++ float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); ++ float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); ++ ++ float64x2_t p411 = vfmaq_f64 (p47, z8, p811); ++ float64x2_t p = vfmaq_f64 (p03, z8, p411); ++ ++ /* Finalize polynomial: z + z3 * P(z2). */ ++ p = vfmaq_f64 (z, z3, p); + + /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5 + = 2 Q(|x|) , for 0.5 < x < 1.0 +diff --git a/sysdeps/aarch64/fpu/acos_sve.c b/sysdeps/aarch64/fpu/acos_sve.c +index 99dbfac3e6b1e086..870d8062cfd22dee 100644 +--- a/sysdeps/aarch64/fpu/acos_sve.c ++++ b/sysdeps/aarch64/fpu/acos_sve.c +@@ -18,20 +18,21 @@ + . */ + + #include "sv_math.h" +-#include "poly_sve_f64.h" + + static const struct data + { +- float64_t poly[12]; +- float64_t pi, pi_over_2; ++ float64_t c1, c3, c5, c7, c9, c11; ++ float64_t c0, c2, c4, c6, c8, c10; ++ float64_t pi_over_2; + } data = { + /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) + on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ +- .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4, 0x1.6db6db67f6d9fp-5, +- 0x1.f1c71fbd29fbbp-6, 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6, +- 0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7, 0x1.fd1151acb6bedp-8, +- 0x1.087182f799c1dp-6, -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, }, +- .pi = 0x1.921fb54442d18p+1, ++ .c0 = 0x1.555555555554ep-3, .c1 = 0x1.3333333337233p-4, ++ .c2 = 0x1.6db6db67f6d9fp-5, .c3 = 0x1.f1c71fbd29fbbp-6, ++ .c4 = 0x1.6e8b264d467d6p-6, .c5 = 0x1.1c5997c357e9dp-6, ++ .c6 = 0x1.c86a22cd9389dp-7, .c7 = 0x1.856073c22ebbep-7, ++ .c8 = 0x1.fd1151acb6bedp-8, .c9 = 0x1.087182f799c1dp-6, ++ .c10 = -0x1.6602748120927p-7, .c11 = 0x1.cfa0dd1f9478p-6, + .pi_over_2 = 0x1.921fb54442d18p+0, + }; + +@@ -42,20 +43,21 @@ static const struct data + + acos(x) ~ pi/2 - (x + x^3 P(x^2)). + +- The largest observed error in this region is 1.18 ulps, +- _ZGVsMxv_acos (0x1.fbc5fe28ee9e3p-2) got 0x1.0d4d0f55667f6p+0 +- want 0x1.0d4d0f55667f7p+0. ++ The largest observed error in this region is 1.18 ulp: ++ _ZGVsMxv_acos (0x1.fbb7c9079b429p-2) got 0x1.0d51266607582p+0 ++ want 0x1.0d51266607583p+0. + + For |x| in [0.5, 1.0], use same approximation with a change of variable + + acos(x) = y + y * z * P(z), with z = (1-x)/2 and y = sqrt(z). + +- The largest observed error in this region is 1.52 ulps, +- _ZGVsMxv_acos (0x1.24024271a500ap-1) got 0x1.ed82df4243f0dp-1 +- want 0x1.ed82df4243f0bp-1. */ ++ The largest observed error in this region is 1.50 ulp: ++ _ZGVsMxv_acos (0x1.252a2cf3fb9acp-1) got 0x1.ec1a46aa82901p-1 ++ want 0x1.ec1a46aa829p-1. */ + svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg) + { + const struct data *d = ptr_barrier (&data); ++ svbool_t ptrue = svptrue_b64 (); + + svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000); + svfloat64_t ax = svabs_x (pg, x); +@@ -70,24 +72,41 @@ svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg) + svfloat64_t z = svsqrt_m (ax, a_gt_half, z2); + + /* Use a single polynomial approximation P for both intervals. */ +- svfloat64_t z4 = svmul_x (pg, z2, z2); +- svfloat64_t z8 = svmul_x (pg, z4, z4); +- svfloat64_t z16 = svmul_x (pg, z8, z8); +- svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly); ++ svfloat64_t z3 = svmul_x (ptrue, z2, z); ++ svfloat64_t z4 = svmul_x (ptrue, z2, z2); ++ svfloat64_t z8 = svmul_x (ptrue, z4, z4); ++ ++ svfloat64_t c13 = svld1rq (ptrue, &d->c1); ++ svfloat64_t c57 = svld1rq (ptrue, &d->c5); ++ svfloat64_t c911 = svld1rq (ptrue, &d->c9); ++ ++ svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); ++ svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); ++ svfloat64_t p03 = svmla_x (pg, p01, z4, p23); ++ ++ svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); ++ svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); ++ svfloat64_t p47 = svmla_x (pg, p45, z4, p67); ++ ++ svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); ++ svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); ++ svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); ++ ++ svfloat64_t p411 = svmla_x (pg, p47, z8, p811); ++ svfloat64_t p = svmad_x (pg, p411, z8, p03); + + /* Finalize polynomial: z + z * z2 * P(z2). */ +- p = svmla_x (pg, z, svmul_x (pg, z, z2), p); ++ p = svmad_x (pg, p, z3, z); + + /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5 + = 2 Q(|x|) , for 0.5 < x < 1.0 + = pi - 2 Q(|x|) , for -1.0 < x < -0.5. */ +- svfloat64_t y +- = svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (p), sign)); +- +- svbool_t is_neg = svcmplt (pg, x, 0.0); +- svfloat64_t off = svdup_f64_z (is_neg, d->pi); +- svfloat64_t mul = svsel (a_gt_half, sv_f64 (2.0), sv_f64 (-1.0)); +- svfloat64_t add = svsel (a_gt_half, off, sv_f64 (d->pi_over_2)); +- +- return svmla_x (pg, add, mul, y); ++ svfloat64_t mul = svreinterpret_f64 ( ++ svlsl_m (a_gt_half, svreinterpret_u64 (sv_f64 (1.0)), 10)); ++ mul = svreinterpret_f64 (sveor_x (ptrue, svreinterpret_u64 (mul), sign)); ++ svfloat64_t add = svreinterpret_f64 ( ++ svorr_x (ptrue, sign, svreinterpret_u64 (sv_f64 (d->pi_over_2)))); ++ add = svsub_m (a_gt_half, sv_f64 (d->pi_over_2), add); ++ ++ return svmsb_x (pg, p, mul, add); + } +diff --git a/sysdeps/aarch64/fpu/asin_advsimd.c b/sysdeps/aarch64/fpu/asin_advsimd.c +index 2de6eff40782bc79..f4884227a5c45fe5 100644 +--- a/sysdeps/aarch64/fpu/asin_advsimd.c ++++ b/sysdeps/aarch64/fpu/asin_advsimd.c +@@ -18,24 +18,23 @@ + . */ + + #include "v_math.h" +-#include "poly_advsimd_f64.h" + + static const struct data + { +- float64x2_t poly[12]; ++ float64x2_t c0, c2, c4, c6, c8, c10; + float64x2_t pi_over_2; + uint64x2_t abs_mask; ++ double c1, c3, c5, c7, c9, c11; + } data = { + /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) + on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ +- .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4), +- V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6), +- V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6), +- V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7), +- V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6), +- V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), }, +- .pi_over_2 = V2 (0x1.921fb54442d18p+0), +- .abs_mask = V2 (0x7fffffffffffffff), ++ .c0 = V2 (0x1.555555555554ep-3), .c1 = 0x1.3333333337233p-4, ++ .c2 = V2 (0x1.6db6db67f6d9fp-5), .c3 = 0x1.f1c71fbd29fbbp-6, ++ .c4 = V2 (0x1.6e8b264d467d6p-6), .c5 = 0x1.1c5997c357e9dp-6, ++ .c6 = V2 (0x1.c86a22cd9389dp-7), .c7 = 0x1.856073c22ebbep-7, ++ .c8 = V2 (0x1.fd1151acb6bedp-8), .c9 = 0x1.087182f799c1dp-6, ++ .c10 = V2 (-0x1.6602748120927p-7), .c11 = 0x1.cfa0dd1f9478p-6, ++ .pi_over_2 = V2 (0x1.921fb54442d18p+0), .abs_mask = V2 (0x7fffffffffffffff), + }; + + #define AllMask v_u64 (0xffffffffffffffff) +@@ -68,8 +67,8 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t special) + asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). + + The largest observed error in this region is 2.69 ulps, +- _ZGVnN2v_asin (0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1 +- want 0x1.110d7e85fdd53p-1. */ ++ _ZGVnN2v_asin (0x1.044e8cefee301p-1) got 0x1.1111dd54ddf96p-1 ++ want 0x1.1111dd54ddf99p-1. */ + float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x) + { + const struct data *d = ptr_barrier (&data); +@@ -86,7 +85,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x) + return special_case (x, x, AllMask); + #endif + +- uint64x2_t a_lt_half = vcltq_f64 (ax, v_f64 (0.5)); ++ uint64x2_t a_lt_half = vcaltq_f64 (x, v_f64 (0.5)); + + /* Evaluate polynomial Q(x) = y + y * z * P(z) with + z = x ^ 2 and y = |x| , if |x| < 0.5 +@@ -99,7 +98,26 @@ float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x) + float64x2_t z4 = vmulq_f64 (z2, z2); + float64x2_t z8 = vmulq_f64 (z4, z4); + float64x2_t z16 = vmulq_f64 (z8, z8); +- float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly); ++ ++ /* order-11 estrin. */ ++ float64x2_t c13 = vld1q_f64 (&d->c1); ++ float64x2_t c57 = vld1q_f64 (&d->c5); ++ float64x2_t c911 = vld1q_f64 (&d->c9); ++ ++ float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); ++ float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); ++ float64x2_t p03 = vfmaq_f64 (p01, z4, p23); ++ ++ float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); ++ float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); ++ float64x2_t p47 = vfmaq_f64 (p45, z4, p67); ++ ++ float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); ++ float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); ++ float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); ++ ++ float64x2_t p07 = vfmaq_f64 (p03, z8, p47); ++ float64x2_t p = vfmaq_f64 (p07, z16, p811); + + /* Finalize polynomial: z + z * z2 * P(z2). */ + p = vfmaq_f64 (z, vmulq_f64 (z, z2), p); +diff --git a/sysdeps/aarch64/fpu/asin_sve.c b/sysdeps/aarch64/fpu/asin_sve.c +index 9daa382e34510201..acbf1b3bdd2c5bb6 100644 +--- a/sysdeps/aarch64/fpu/asin_sve.c ++++ b/sysdeps/aarch64/fpu/asin_sve.c +@@ -18,45 +18,43 @@ + . */ + + #include "sv_math.h" +-#include "poly_sve_f64.h" + + static const struct data + { +- float64_t poly[12]; +- float64_t pi_over_2f; ++ float64_t c1, c3, c5, c7, c9, c11; ++ float64_t c0, c2, c4, c6, c8, c10; ++ float64_t pi_over_2; + } data = { + /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) + on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ +- .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4, +- 0x1.6db6db67f6d9fp-5, 0x1.f1c71fbd29fbbp-6, +- 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6, +- 0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7, +- 0x1.fd1151acb6bedp-8, 0x1.087182f799c1dp-6, +- -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, }, +- .pi_over_2f = 0x1.921fb54442d18p+0, ++ .c0 = 0x1.555555555554ep-3, .c1 = 0x1.3333333337233p-4, ++ .c2 = 0x1.6db6db67f6d9fp-5, .c3 = 0x1.f1c71fbd29fbbp-6, ++ .c4 = 0x1.6e8b264d467d6p-6, .c5 = 0x1.1c5997c357e9dp-6, ++ .c6 = 0x1.c86a22cd9389dp-7, .c7 = 0x1.856073c22ebbep-7, ++ .c8 = 0x1.fd1151acb6bedp-8, .c9 = 0x1.087182f799c1dp-6, ++ .c10 = -0x1.6602748120927p-7, .c11 = 0x1.cfa0dd1f9478p-6, ++ .pi_over_2 = 0x1.921fb54442d18p+0, + }; + +-#define P(i) sv_f64 (d->poly[i]) +- + /* Double-precision SVE implementation of vector asin(x). + + For |x| in [0, 0.5], use an order 11 polynomial P such that the final + approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2). + +- The largest observed error in this region is 0.52 ulps, +- _ZGVsMxv_asin(0x1.d95ae04998b6cp-2) got 0x1.ec13757305f27p-2 +- want 0x1.ec13757305f26p-2. +- +- For |x| in [0.5, 1.0], use same approximation with a change of variable ++ The largest observed error in this region is 0.98 ulp: ++ _ZGVsMxv_asin (0x1.d98f6a748ed8ap-2) got 0x1.ec4eb661a73d3p-2 ++ want 0x1.ec4eb661a73d2p-2. + +- asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). ++ For |x| in [0.5, 1.0], use same approximation with a change of variable: ++ asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z). + +- The largest observed error in this region is 2.69 ulps, +- _ZGVsMxv_asin(0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1 +- want 0x1.110d7e85fdd53p-1. */ ++ The largest observed error in this region is 2.66 ulp: ++ _ZGVsMxv_asin (0x1.04024f6e2a2fbp-1) got 0x1.10b9586f087a8p-1 ++ want 0x1.10b9586f087abp-1. */ + svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg) + { + const struct data *d = ptr_barrier (&data); ++ svbool_t ptrue = svptrue_b64 (); + + svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000); + svfloat64_t ax = svabs_x (pg, x); +@@ -70,17 +68,37 @@ svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg) + svfloat64_t z = svsqrt_m (ax, a_ge_half, z2); + + /* Use a single polynomial approximation P for both intervals. */ ++ svfloat64_t z3 = svmul_x (pg, z2, z); + svfloat64_t z4 = svmul_x (pg, z2, z2); + svfloat64_t z8 = svmul_x (pg, z4, z4); +- svfloat64_t z16 = svmul_x (pg, z8, z8); +- svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly); ++ ++ svfloat64_t c13 = svld1rq (ptrue, &d->c1); ++ svfloat64_t c57 = svld1rq (ptrue, &d->c5); ++ svfloat64_t c911 = svld1rq (ptrue, &d->c9); ++ ++ /* Order-11 Estrin scheme. */ ++ svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); ++ svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); ++ svfloat64_t p03 = svmla_x (pg, p01, z4, p23); ++ ++ svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); ++ svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); ++ svfloat64_t p47 = svmla_x (pg, p45, z4, p67); ++ ++ svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); ++ svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); ++ svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); ++ ++ svfloat64_t p411 = svmla_x (pg, p47, z8, p811); ++ svfloat64_t p = svmla_x (pg, p03, z8, p411); ++ + /* Finalize polynomial: z + z * z2 * P(z2). */ +- p = svmla_x (pg, z, svmul_x (pg, z, z2), p); ++ p = svmla_x (pg, z, z3, p); + +- /* asin(|x|) = Q(|x|) , for |x| < 0.5 +- = pi/2 - 2 Q(|x|), for |x| >= 0.5. */ +- svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2f); ++ /* asin(|x|) = Q(|x|), for |x| < 0.5 ++ = pi/2 - 2 Q(|x|), for |x| >= 0.5. */ ++ svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2); + +- /* Copy sign. */ ++ /* Reinsert the sign from the argument. */ + return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign)); + } +diff --git a/sysdeps/aarch64/fpu/asinf_advsimd.c b/sysdeps/aarch64/fpu/asinf_advsimd.c +index 59d870ca3693abac..88437f85abec4729 100644 +--- a/sysdeps/aarch64/fpu/asinf_advsimd.c ++++ b/sysdeps/aarch64/fpu/asinf_advsimd.c +@@ -18,22 +18,21 @@ + . */ + + #include "v_math.h" +-#include "poly_advsimd_f32.h" + + static const struct data + { +- float32x4_t poly[5]; ++ float32x4_t c0, c2, c4; ++ float c1, c3; + float32x4_t pi_over_2f; + } data = { + /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on + [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 . */ +- .poly = { V4 (0x1.55555ep-3), V4 (0x1.33261ap-4), V4 (0x1.70d7dcp-5), +- V4 (0x1.b059dp-6), V4 (0x1.3af7d8p-5) }, +- .pi_over_2f = V4 (0x1.921fb6p+0f), ++ .c0 = V4 (0x1.55555ep-3f), .c1 = 0x1.33261ap-4f, ++ .c2 = V4 (0x1.70d7dcp-5f), .c3 = 0x1.b059dp-6f, ++ .c4 = V4 (0x1.3af7d8p-5f), .pi_over_2f = V4 (0x1.921fb6p+0f), + }; + + #define AbsMask 0x7fffffff +-#define Half 0x3f000000 + #define One 0x3f800000 + #define Small 0x39800000 /* 2^-12. */ + +@@ -47,11 +46,8 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special) + + /* Single-precision implementation of vector asin(x). + +- For |x| < Small, approximate asin(x) by x. Small = 2^-12 for correct +- rounding. If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the +- following approximation. + +- For |x| in [Small, 0.5], use order 4 polynomial P such that the final ++ For |x| <0.5, use order 4 polynomial P such that the final + approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2). + + The largest observed error in this region is 0.83 ulps, +@@ -80,24 +76,31 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (asin) (float32x4_t x) + #endif + + float32x4_t ax = vreinterpretq_f32_u32 (ia); +- uint32x4_t a_lt_half = vcltq_u32 (ia, v_u32 (Half)); ++ uint32x4_t a_lt_half = vcaltq_f32 (x, v_f32 (0.5f)); + + /* Evaluate polynomial Q(x) = y + y * z * P(z) with + z = x ^ 2 and y = |x| , if |x| < 0.5 + z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5. */ + float32x4_t z2 = vbslq_f32 (a_lt_half, vmulq_f32 (x, x), +- vfmsq_n_f32 (v_f32 (0.5), ax, 0.5)); ++ vfmsq_n_f32 (v_f32 (0.5f), ax, 0.5f)); + float32x4_t z = vbslq_f32 (a_lt_half, ax, vsqrtq_f32 (z2)); + + /* Use a single polynomial approximation P for both intervals. */ +- float32x4_t p = v_horner_4_f32 (z2, d->poly); ++ ++ /* PW Horner 3 evaluation scheme. */ ++ float32x4_t z4 = vmulq_f32 (z2, z2); ++ float32x4_t c13 = vld1q_f32 (&d->c1); ++ float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c13, 0); ++ float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c13, 1); ++ float32x4_t p = vfmaq_f32 (p23, d->c4, z4); ++ p = vfmaq_f32 (p01, p, z4); + /* Finalize polynomial: z + z * z2 * P(z2). */ + p = vfmaq_f32 (z, vmulq_f32 (z, z2), p); + + /* asin(|x|) = Q(|x|) , for |x| < 0.5 + = pi/2 - 2 Q(|x|), for |x| >= 0.5. */ + float32x4_t y +- = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (d->pi_over_2f, p, 2.0)); ++ = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (d->pi_over_2f, p, 2.0f)); + + /* Copy sign. */ + return vbslq_f32 (v_u32 (AbsMask), y, x); +diff --git a/sysdeps/aarch64/fpu/atan2_advsimd.c b/sysdeps/aarch64/fpu/atan2_advsimd.c +index 5df4b99ff4277c6a..36be4ce83ac15727 100644 +--- a/sysdeps/aarch64/fpu/atan2_advsimd.c ++++ b/sysdeps/aarch64/fpu/atan2_advsimd.c +@@ -18,40 +18,38 @@ + . */ + + #include "v_math.h" +-#include "poly_advsimd_f64.h" + + static const struct data + { ++ double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; + float64x2_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; + float64x2_t pi_over_2; +- double c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; +- uint64x2_t zeroinfnan, minustwo; ++ uint64x2_t zeroinfnan; + } data = { +- /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on +- [2**-1022, 1.0]. */ +- .c0 = V2 (-0x1.5555555555555p-2), +- .c1 = 0x1.99999999996c1p-3, +- .c2 = V2 (-0x1.2492492478f88p-3), +- .c3 = 0x1.c71c71bc3951cp-4, +- .c4 = V2 (-0x1.745d160a7e368p-4), +- .c5 = 0x1.3b139b6a88ba1p-4, +- .c6 = V2 (-0x1.11100ee084227p-4), +- .c7 = 0x1.e1d0f9696f63bp-5, +- .c8 = V2 (-0x1.aebfe7b418581p-5), +- .c9 = 0x1.842dbe9b0d916p-5, +- .c10 = V2 (-0x1.5d30140ae5e99p-5), +- .c11 = 0x1.338e31eb2fbbcp-5, +- .c12 = V2 (-0x1.00e6eece7de8p-5), +- .c13 = 0x1.860897b29e5efp-6, +- .c14 = V2 (-0x1.0051381722a59p-6), +- .c15 = 0x1.14e9dc19a4a4ep-7, +- .c16 = V2 (-0x1.d0062b42fe3bfp-9), +- .c17 = 0x1.17739e210171ap-10, +- .c18 = V2 (-0x1.ab24da7be7402p-13), +- .c19 = 0x1.358851160a528p-16, ++ /* Coefficients of polynomial P such that ++ atan(x)~x+x*P(x^2) on [2^-1022, 1.0]. */ ++ .c0 = V2 (-0x1.555555555552ap-2), ++ .c1 = 0x1.9999999995aebp-3, ++ .c2 = V2 (-0x1.24924923923f6p-3), ++ .c3 = 0x1.c71c7184288a2p-4, ++ .c4 = V2 (-0x1.745d11fb3d32bp-4), ++ .c5 = 0x1.3b136a18051b9p-4, ++ .c6 = V2 (-0x1.110e6d985f496p-4), ++ .c7 = 0x1.e1bcf7f08801dp-5, ++ .c8 = V2 (-0x1.ae644e28058c3p-5), ++ .c9 = 0x1.82eeb1fed85c6p-5, ++ .c10 = V2 (-0x1.59d7f901566cbp-5), ++ .c11 = 0x1.2c982855ab069p-5, ++ .c12 = V2 (-0x1.eb49592998177p-6), ++ .c13 = 0x1.69d8b396e3d38p-6, ++ .c14 = V2 (-0x1.ca980345c4204p-7), ++ .c15 = 0x1.dc050eafde0b3p-8, ++ .c16 = V2 (-0x1.7ea70755b8eccp-9), ++ .c17 = 0x1.ba3da3de903e8p-11, ++ .c18 = V2 (-0x1.44a4b059b6f67p-13), ++ .c19 = 0x1.c4a45029e5a91p-17, + .pi_over_2 = V2 (0x1.921fb54442d18p+0), + .zeroinfnan = V2 (2 * 0x7ff0000000000000ul - 1), +- .minustwo = V2 (0xc000000000000000), + }; + + #define SignMask v_u64 (0x8000000000000000) +@@ -76,10 +74,9 @@ zeroinfnan (uint64x2_t i, const struct data *d) + } + + /* Fast implementation of vector atan2. +- Maximum observed error is 2.8 ulps: +- _ZGVnN2vv_atan2 (0x1.9651a429a859ap+5, 0x1.953075f4ee26p+5) +- got 0x1.92d628ab678ccp-1 +- want 0x1.92d628ab678cfp-1. */ ++ Maximum observed error is 1.97 ulps: ++ _ZGVnN2vv_atan2 (0x1.42337dba73768p+5, 0x1.422d748cd3e29p+5) ++ got 0x1.9224810264efcp-1 want 0x1.9224810264efep-1. */ + float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x) + { + const struct data *d = ptr_barrier (&data); +@@ -100,26 +97,29 @@ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x) + uint64x2_t pred_xlt0 = vcltzq_f64 (x); + uint64x2_t pred_aygtax = vcagtq_f64 (y, x); + +- /* Set up z for call to atan. */ +- float64x2_t n = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay); +- float64x2_t q = vbslq_f64 (pred_aygtax, ay, ax); +- float64x2_t z = vdivq_f64 (n, q); +- +- /* Work out the correct shift. */ +- float64x2_t shift +- = vreinterpretq_f64_u64 (vandq_u64 (pred_xlt0, d->minustwo)); +- shift = vbslq_f64 (pred_aygtax, vaddq_f64 (shift, v_f64 (1.0)), shift); +- shift = vmulq_f64 (shift, d->pi_over_2); +- +- /* Calculate the polynomial approximation. +- Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of +- full scheme to avoid underflow in x^16. +- The order 19 polynomial P approximates +- (atan(sqrt(x))-sqrt(x))/x^(3/2). */ ++ /* Set up z for evaluation of atan. */ ++ float64x2_t num = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay); ++ float64x2_t den = vbslq_f64 (pred_aygtax, ay, ax); ++ float64x2_t z = vdivq_f64 (num, den); ++ ++ /* Work out the correct shift for atan2: ++ Multiplication by pi is done later. ++ -pi when x < 0 and ax < ay ++ -pi/2 when x < 0 and ax > ay ++ 0 when x >= 0 and ax < ay ++ pi/2 when x >= 0 and ax > ay. */ ++ float64x2_t shift = vreinterpretq_f64_u64 ( ++ vandq_u64 (pred_xlt0, vreinterpretq_u64_f64 (v_f64 (-2.0)))); ++ float64x2_t shift2 = vreinterpretq_f64_u64 ( ++ vandq_u64 (pred_aygtax, vreinterpretq_u64_f64 (v_f64 (1.0)))); ++ shift = vaddq_f64 (shift, shift2); ++ ++ /* Calculate the polynomial approximation. */ + float64x2_t z2 = vmulq_f64 (z, z); +- float64x2_t x2 = vmulq_f64 (z2, z2); +- float64x2_t x4 = vmulq_f64 (x2, x2); +- float64x2_t x8 = vmulq_f64 (x4, x4); ++ float64x2_t z3 = vmulq_f64 (z2, z); ++ float64x2_t z4 = vmulq_f64 (z2, z2); ++ float64x2_t z8 = vmulq_f64 (z4, z4); ++ float64x2_t z16 = vmulq_f64 (z8, z8); + + float64x2_t c13 = vld1q_f64 (&d->c1); + float64x2_t c57 = vld1q_f64 (&d->c5); +@@ -127,45 +127,43 @@ float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x) + float64x2_t c1315 = vld1q_f64 (&d->c13); + float64x2_t c1719 = vld1q_f64 (&d->c17); + +- /* estrin_7. */ ++ /* Order-7 Estrin. */ + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); +- float64x2_t p03 = vfmaq_f64 (p01, x2, p23); ++ float64x2_t p03 = vfmaq_f64 (p01, z4, p23); + + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); +- float64x2_t p47 = vfmaq_f64 (p45, x2, p67); ++ float64x2_t p47 = vfmaq_f64 (p45, z4, p67); + +- float64x2_t p07 = vfmaq_f64 (p03, x4, p47); ++ float64x2_t p07 = vfmaq_f64 (p03, z8, p47); + +- /* estrin_11. */ ++ /* Order-11 Estrin. */ + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); +- float64x2_t p811 = vfmaq_f64 (p89, x2, p1011); ++ float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); + + float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0); + float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1); +- float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415); ++ float64x2_t p1215 = vfmaq_f64 (p1213, z4, p1415); + + float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0); + float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1); +- float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819); ++ float64x2_t p1619 = vfmaq_f64 (p1617, z4, p1819); + +- float64x2_t p815 = vfmaq_f64 (p811, x4, p1215); +- float64x2_t p819 = vfmaq_f64 (p815, x8, p1619); ++ float64x2_t p815 = vfmaq_f64 (p811, z8, p1215); ++ float64x2_t p819 = vfmaq_f64 (p815, z16, p1619); + +- float64x2_t ret = vfmaq_f64 (p07, p819, x8); ++ float64x2_t poly = vfmaq_f64 (p07, p819, z16); + + /* Finalize. y = shift + z + z^3 * P(z^2). */ +- ret = vfmaq_f64 (z, ret, vmulq_f64 (z2, z)); +- ret = vaddq_f64 (ret, shift); ++ float64x2_t ret = vfmaq_f64 (z, shift, d->pi_over_2); ++ ret = vfmaq_f64 (ret, z3, poly); + + if (__glibc_unlikely (v_any_u64 (special_cases))) + return special_case (y, x, ret, sign_xy, special_cases); + + /* Account for the sign of x and y. */ +- ret = vreinterpretq_f64_u64 ( ++ return vreinterpretq_f64_u64 ( + veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy)); +- +- return ret; + } +diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c +index 04fa71fa37c3de29..7af4931cdb89a1c5 100644 +--- a/sysdeps/aarch64/fpu/atan2_sve.c ++++ b/sysdeps/aarch64/fpu/atan2_sve.c +@@ -18,25 +18,25 @@ + . */ + + #include "sv_math.h" +-#include "poly_sve_f64.h" + + static const struct data + { +- float64_t poly[20]; +- float64_t pi_over_2; ++ float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; ++ float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; + } data = { + /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on + [2**-1022, 1.0]. */ +- .poly = { -0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3, +- 0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4, +- -0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5, +- 0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5, +- -0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6, +- 0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10, +- -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, }, +- .pi_over_2 = 0x1.921fb54442d18p+0, ++ .c0 = -0x1.555555555552ap-2, .c1 = 0x1.9999999995aebp-3, ++ .c2 = -0x1.24924923923f6p-3, .c3 = 0x1.c71c7184288a2p-4, ++ .c4 = -0x1.745d11fb3d32bp-4, .c5 = 0x1.3b136a18051b9p-4, ++ .c6 = -0x1.110e6d985f496p-4, .c7 = 0x1.e1bcf7f08801dp-5, ++ .c8 = -0x1.ae644e28058c3p-5, .c9 = 0x1.82eeb1fed85c6p-5, ++ .c10 = -0x1.59d7f901566cbp-5, .c11 = 0x1.2c982855ab069p-5, ++ .c12 = -0x1.eb49592998177p-6, .c13 = 0x1.69d8b396e3d38p-6, ++ .c14 = -0x1.ca980345c4204p-7, .c15 = 0x1.dc050eafde0b3p-8, ++ .c16 = -0x1.7ea70755b8eccp-9, .c17 = 0x1.ba3da3de903e8p-11, ++ .c18 = -0x1.44a4b059b6f67p-13, .c19 = 0x1.c4a45029e5a91p-17, + }; +- + /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ + static svfloat64_t NOINLINE + special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret, +@@ -55,15 +55,17 @@ zeroinfnan (svuint64_t i, const svbool_t pg) + } + + /* Fast implementation of SVE atan2. Errors are greatest when y and +- x are reasonably close together. The greatest observed error is 2.28 ULP: +- _ZGVsMxvv_atan2 (-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732) +- got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1. */ +-svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) ++ x are reasonably close together. The greatest observed error is 1.94 ULP: ++ _ZGVsMxvv_atan2 (0x1.8a4bf7167228ap+5, 0x1.84971226bb57bp+5) ++ got 0x1.95db19dfef9ccp-1 want 0x1.95db19dfef9cep-1. */ ++svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, ++ const svbool_t pg) + { +- const struct data *data_ptr = ptr_barrier (&data); ++ const struct data *d = ptr_barrier (&data); + + svuint64_t ix = svreinterpret_u64 (x); + svuint64_t iy = svreinterpret_u64 (y); ++ svbool_t ptrue = svptrue_b64 (); + + svbool_t cmp_x = zeroinfnan (ix, pg); + svbool_t cmp_y = zeroinfnan (iy, pg); +@@ -80,32 +82,67 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) + + svbool_t pred_aygtax = svcmpgt (pg, ay, ax); + +- /* Set up z for call to atan. */ +- svfloat64_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay); +- svfloat64_t d = svsel (pred_aygtax, ay, ax); +- svfloat64_t z = svdiv_x (pg, n, d); +- +- /* Work out the correct shift. */ ++ /* Set up z for evaluation of atan. */ ++ svfloat64_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay); ++ svfloat64_t den = svsel (pred_aygtax, ay, ax); ++ svfloat64_t z = svdiv_x (pg, num, den); ++ ++ /* Work out the correct shift for atan2: ++ Multiplication by pi is done later. ++ -pi when x < 0 and ax < ay ++ -pi/2 when x < 0 and ax > ay ++ 0 when x >= 0 and ax < ay ++ pi/2 when x >= 0 and ax > ay. */ + svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1)); ++ svfloat64_t shift_mul = svreinterpret_f64 ( ++ svorr_x (pg, sign_x, svreinterpret_u64 (sv_f64 (0x1.921fb54442d18p+0)))); + shift = svsel (pred_aygtax, sv_f64 (1.0), shift); +- shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift))); +- shift = svmul_x (pg, shift, data_ptr->pi_over_2); ++ shift = svmla_x (pg, z, shift, shift_mul); + + /* Use split Estrin scheme for P(z^2) with deg(P)=19. */ + svfloat64_t z2 = svmul_x (pg, z, z); +- svfloat64_t x2 = svmul_x (pg, z2, z2); +- svfloat64_t x4 = svmul_x (pg, x2, x2); +- svfloat64_t x8 = svmul_x (pg, x4, x4); ++ svfloat64_t z3 = svmul_x (pg, z2, z); ++ svfloat64_t z4 = svmul_x (pg, z2, z2); ++ svfloat64_t z8 = svmul_x (pg, z4, z4); ++ svfloat64_t z16 = svmul_x (pg, z8, z8); + +- svfloat64_t ret = svmla_x ( +- pg, sv_estrin_7_f64_x (pg, z2, x2, x4, data_ptr->poly), +- sv_estrin_11_f64_x (pg, z2, x2, x4, x8, data_ptr->poly + 8), x8); ++ /* Order-7 Estrin. */ ++ svfloat64_t c13 = svld1rq (ptrue, &d->c1); ++ svfloat64_t c57 = svld1rq (ptrue, &d->c5); + +- /* y = shift + z + z^3 * P(z^2). */ +- svfloat64_t z3 = svmul_x (pg, z2, z); +- ret = svmla_x (pg, z, z3, ret); ++ svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); ++ svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); ++ svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); ++ svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); ++ ++ svfloat64_t p03 = svmla_x (pg, p01, z4, p23); ++ svfloat64_t p47 = svmla_x (pg, p45, z4, p67); ++ svfloat64_t p07 = svmla_x (pg, p03, z8, p47); ++ ++ /* Order-11 Estrin. */ ++ svfloat64_t c911 = svld1rq (ptrue, &d->c9); ++ svfloat64_t c1315 = svld1rq (ptrue, &d->c13); ++ svfloat64_t c1719 = svld1rq (ptrue, &d->c17); + +- ret = svadd_m (pg, ret, shift); ++ svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); ++ svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); ++ svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); ++ ++ svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), z2, c1315, 0); ++ svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), z2, c1315, 1); ++ svfloat64_t p1215 = svmla_x (pg, p1213, z4, p1415); ++ ++ svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), z2, c1719, 0); ++ svfloat64_t p1819 = svmla_lane (sv_f64 (d->c18), z2, c1719, 1); ++ svfloat64_t p1619 = svmla_x (pg, p1617, z4, p1819); ++ ++ svfloat64_t p815 = svmla_x (pg, p811, z8, p1215); ++ svfloat64_t p819 = svmla_x (pg, p815, z16, p1619); ++ ++ svfloat64_t poly = svmla_x (pg, p07, z16, p819); ++ ++ /* y = shift + z + z^3 * P(z^2). */ ++ svfloat64_t ret = svmla_x (pg, shift, z3, poly); + + /* Account for the sign of x and y. */ + if (__glibc_unlikely (svptest_any (pg, cmp_xy))) +diff --git a/sysdeps/aarch64/fpu/atan2f_advsimd.c b/sysdeps/aarch64/fpu/atan2f_advsimd.c +index 88daacd76cdd3998..23a825e63bfb53b2 100644 +--- a/sysdeps/aarch64/fpu/atan2f_advsimd.c ++++ b/sysdeps/aarch64/fpu/atan2f_advsimd.c +@@ -18,22 +18,22 @@ + . */ + + #include "v_math.h" +-#include "poly_advsimd_f32.h" + + static const struct data + { +- float32x4_t c0, pi_over_2, c4, c6, c2; ++ float32x4_t c0, c4, c6, c2; + float c1, c3, c5, c7; + uint32x4_t comp_const; ++ float32x4_t pi; + } data = { + /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on + [2**-128, 1.0]. + Generated using fpminimax between FLT_MIN and 1. */ +- .c0 = V4 (-0x1.55555p-2f), .c1 = 0x1.99935ep-3f, +- .c2 = V4 (-0x1.24051ep-3f), .c3 = 0x1.bd7368p-4f, +- .c4 = V4 (-0x1.491f0ep-4f), .c5 = 0x1.93a2c0p-5f, +- .c6 = V4 (-0x1.4c3c60p-6f), .c7 = 0x1.01fd88p-8f, +- .pi_over_2 = V4 (0x1.921fb6p+0f), .comp_const = V4 (2 * 0x7f800000lu - 1), ++ .c0 = V4 (-0x1.5554dcp-2), .c1 = 0x1.9978ecp-3, ++ .c2 = V4 (-0x1.230a94p-3), .c3 = 0x1.b4debp-4, ++ .c4 = V4 (-0x1.3550dap-4), .c5 = 0x1.61eebp-5, ++ .c6 = V4 (-0x1.0c17d4p-6), .c7 = 0x1.7ea694p-9, ++ .pi = V4 (0x1.921fb6p+1f), .comp_const = V4 (2 * 0x7f800000lu - 1), + }; + + #define SignMask v_u32 (0x80000000) +@@ -54,13 +54,13 @@ static inline uint32x4_t + zeroinfnan (uint32x4_t i, const struct data *d) + { + /* 2 * i - 1 >= 2 * 0x7f800000lu - 1. */ +- return vcgeq_u32 (vsubq_u32 (vmulq_n_u32 (i, 2), v_u32 (1)), d->comp_const); ++ return vcgeq_u32 (vsubq_u32 (vshlq_n_u32 (i, 1), v_u32 (1)), d->comp_const); + } + + /* Fast implementation of vector atan2f. Maximum observed error is +- 2.95 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]: +- _ZGVnN4vv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1 +- want 0x1.967f00p-1. */ ++ 2.13 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]: ++ _ZGVnN4vv_atan2f (0x1.14a9d4p-87, 0x1.0eb886p-87) got 0x1.97aea2p-1 ++ want 0x1.97ae9ep-1. */ + float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x) + { + const struct data *d = ptr_barrier (&data); +@@ -81,28 +81,31 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x) + uint32x4_t pred_xlt0 = vcltzq_f32 (x); + uint32x4_t pred_aygtax = vcgtq_f32 (ay, ax); + +- /* Set up z for call to atanf. */ +- float32x4_t n = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay); +- float32x4_t q = vbslq_f32 (pred_aygtax, ay, ax); +- float32x4_t z = vdivq_f32 (n, q); +- +- /* Work out the correct shift. */ ++ /* Set up z for evaluation of atanf. */ ++ float32x4_t num = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay); ++ float32x4_t den = vbslq_f32 (pred_aygtax, ay, ax); ++ float32x4_t z = vdivq_f32 (num, den); ++ ++ /* Work out the correct shift for atan2: ++ Multiplication by pi is done later. ++ -pi when x < 0 and ax < ay ++ -pi/2 when x < 0 and ax > ay ++ 0 when x >= 0 and ax < ay ++ pi/2 when x >= 0 and ax > ay. */ + float32x4_t shift = vreinterpretq_f32_u32 ( +- vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-2.0f)))); +- shift = vbslq_f32 (pred_aygtax, vaddq_f32 (shift, v_f32 (1.0f)), shift); +- shift = vmulq_f32 (shift, d->pi_over_2); +- +- /* Calculate the polynomial approximation. +- Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However, +- a standard implementation using z8 creates spurious underflow +- in the very last fma (when z^8 is small enough). +- Therefore, we split the last fma into a mul and an fma. +- Horner and single-level Estrin have higher errors that exceed +- threshold. */ ++ vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-1.0f)))); ++ float32x4_t shift2 = vreinterpretq_f32_u32 ( ++ vandq_u32 (pred_aygtax, vreinterpretq_u32_f32 (v_f32 (0.5f)))); ++ shift = vaddq_f32 (shift, shift2); ++ ++ /* Calculate the polynomial approximation. */ + float32x4_t z2 = vmulq_f32 (z, z); ++ float32x4_t z3 = vmulq_f32 (z2, z); + float32x4_t z4 = vmulq_f32 (z2, z2); ++ float32x4_t z8 = vmulq_f32 (z4, z4); + + float32x4_t c1357 = vld1q_f32 (&d->c1); ++ + float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, c1357, 0); + float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, c1357, 1); + float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, c1357, 2); +@@ -110,10 +113,11 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x) + float32x4_t p03 = vfmaq_f32 (p01, z4, p23); + float32x4_t p47 = vfmaq_f32 (p45, z4, p67); + +- float32x4_t ret = vfmaq_f32 (p03, z4, vmulq_f32 (z4, p47)); ++ float32x4_t poly = vfmaq_f32 (p03, z8, p47); + + /* y = shift + z * P(z^2). */ +- ret = vaddq_f32 (vfmaq_f32 (z, ret, vmulq_f32 (z2, z)), shift); ++ float32x4_t ret = vfmaq_f32 (z, shift, d->pi); ++ ret = vfmaq_f32 (ret, z3, poly); + + if (__glibc_unlikely (v_any_u32 (special_cases))) + { +diff --git a/sysdeps/aarch64/fpu/atan2f_sve.c b/sysdeps/aarch64/fpu/atan2f_sve.c +index 9ea197147ccca0ac..f7f6d40ca668192f 100644 +--- a/sysdeps/aarch64/fpu/atan2f_sve.c ++++ b/sysdeps/aarch64/fpu/atan2f_sve.c +@@ -18,18 +18,18 @@ + . */ + + #include "sv_math.h" +-#include "poly_sve_f32.h" + + static const struct data + { +- float32_t poly[8]; ++ float32_t c0, c2, c4, c6; ++ float32_t c1, c3, c5, c7; + float32_t pi_over_2; + } data = { + /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on + [2**-128, 1.0]. */ +- .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f, +- -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f }, +- .pi_over_2 = 0x1.921fb6p+0f, ++ .c0 = -0x1.5554dcp-2, .c1 = 0x1.9978ecp-3, .c2 = -0x1.230a94p-3, ++ .c3 = 0x1.b4debp-4, .c4 = -0x1.3550dap-4, .c5 = 0x1.61eebp-5, ++ .c6 = -0x1.0c17d4p-6, .c7 = 0x1.7ea694p-9, .pi_over_2 = 0x1.921fb6p+0f, + }; + + /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ +@@ -51,12 +51,14 @@ zeroinfnan (svuint32_t i, const svbool_t pg) + + /* Fast implementation of SVE atan2f based on atan(x) ~ shift + z + z^3 * + P(z^2) with reduction to [0,1] using z=1/x and shift = pi/2. Maximum +- observed error is 2.95 ULP: +- _ZGVsMxvv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1 +- want 0x1.967f00p-1. */ +-svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) ++ observed error is 2.21 ULP: ++ _ZGVnN4vv_atan2f (0x1.a04aa8p+6, 0x1.9a274p+6) got 0x1.95ed3ap-1 ++ want 0x1.95ed36p-1. */ ++svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, ++ const svbool_t pg) + { +- const struct data *data_ptr = ptr_barrier (&data); ++ const struct data *d = ptr_barrier (&data); ++ svbool_t ptrue = svptrue_b32 (); + + svuint32_t ix = svreinterpret_u32 (x); + svuint32_t iy = svreinterpret_u32 (y); +@@ -76,29 +78,42 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) + + svbool_t pred_aygtax = svcmpgt (pg, ay, ax); + +- /* Set up z for call to atan. */ +- svfloat32_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay); +- svfloat32_t d = svsel (pred_aygtax, ay, ax); +- svfloat32_t z = svdiv_x (pg, n, d); +- +- /* Work out the correct shift. */ ++ /* Set up z for evaluation of atanf. */ ++ svfloat32_t num = svsel (pred_aygtax, svneg_x (pg, ax), ay); ++ svfloat32_t den = svsel (pred_aygtax, ay, ax); ++ svfloat32_t z = svdiv_x (ptrue, num, den); ++ ++ /* Work out the correct shift for atan2: ++ Multiplication by pi is done later. ++ -pi when x < 0 and ax < ay ++ -pi/2 when x < 0 and ax > ay ++ 0 when x >= 0 and ax < ay ++ pi/2 when x >= 0 and ax > ay. */ + svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1)); + shift = svsel (pred_aygtax, sv_f32 (1.0), shift); + shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift))); +- shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2)); + + /* Use pure Estrin scheme for P(z^2) with deg(P)=7. */ +- svfloat32_t z2 = svmul_x (pg, z, z); ++ svfloat32_t z2 = svmul_x (ptrue, z, z); ++ svfloat32_t z3 = svmul_x (pg, z2, z); + svfloat32_t z4 = svmul_x (pg, z2, z2); + svfloat32_t z8 = svmul_x (pg, z4, z4); + +- svfloat32_t ret = sv_estrin_7_f32_x (pg, z2, z4, z8, data_ptr->poly); ++ svfloat32_t odd_coeffs = svld1rq (ptrue, &d->c1); + +- /* ret = shift + z + z^3 * P(z^2). */ +- svfloat32_t z3 = svmul_x (pg, z2, z); +- ret = svmla_x (pg, z, z3, ret); ++ svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0); ++ svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1); ++ svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2); ++ svfloat32_t p67 = svmla_lane (sv_f32 (d->c6), z2, odd_coeffs, 3); + +- ret = svadd_m (pg, ret, shift); ++ svfloat32_t p03 = svmla_x (pg, p01, z4, p23); ++ svfloat32_t p47 = svmla_x (pg, p45, z4, p67); ++ ++ svfloat32_t poly = svmla_x (pg, p03, z8, p47); ++ ++ /* ret = shift + z + z^3 * P(z^2). */ ++ svfloat32_t ret = svmla_x (pg, z, shift, sv_f32 (d->pi_over_2)); ++ ret = svmla_x (pg, ret, z3, poly); + + /* Account for the sign of x and y. */ + +diff --git a/sysdeps/aarch64/fpu/atan_advsimd.c b/sysdeps/aarch64/fpu/atan_advsimd.c +index 14f1809796f05246..f835dae828ae9d3a 100644 +--- a/sysdeps/aarch64/fpu/atan_advsimd.c ++++ b/sysdeps/aarch64/fpu/atan_advsimd.c +@@ -18,7 +18,6 @@ + . */ + + #include "v_math.h" +-#include "poly_advsimd_f64.h" + + static const struct data + { +@@ -28,16 +27,16 @@ static const struct data + } data = { + /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on + [2**-1022, 1.0]. */ +- .c0 = V2 (-0x1.5555555555555p-2), .c1 = 0x1.99999999996c1p-3, +- .c2 = V2 (-0x1.2492492478f88p-3), .c3 = 0x1.c71c71bc3951cp-4, +- .c4 = V2 (-0x1.745d160a7e368p-4), .c5 = 0x1.3b139b6a88ba1p-4, +- .c6 = V2 (-0x1.11100ee084227p-4), .c7 = 0x1.e1d0f9696f63bp-5, +- .c8 = V2 (-0x1.aebfe7b418581p-5), .c9 = 0x1.842dbe9b0d916p-5, +- .c10 = V2 (-0x1.5d30140ae5e99p-5), .c11 = 0x1.338e31eb2fbbcp-5, +- .c12 = V2 (-0x1.00e6eece7de8p-5), .c13 = 0x1.860897b29e5efp-6, +- .c14 = V2 (-0x1.0051381722a59p-6), .c15 = 0x1.14e9dc19a4a4ep-7, +- .c16 = V2 (-0x1.d0062b42fe3bfp-9), .c17 = 0x1.17739e210171ap-10, +- .c18 = V2 (-0x1.ab24da7be7402p-13), .c19 = 0x1.358851160a528p-16, ++ .c0 = V2 (-0x1.555555555552ap-2), .c1 = 0x1.9999999995aebp-3, ++ .c2 = V2 (-0x1.24924923923f6p-3), .c3 = 0x1.c71c7184288a2p-4, ++ .c4 = V2 (-0x1.745d11fb3d32bp-4), .c5 = 0x1.3b136a18051b9p-4, ++ .c6 = V2 (-0x1.110e6d985f496p-4), .c7 = 0x1.e1bcf7f08801dp-5, ++ .c8 = V2 (-0x1.ae644e28058c3p-5), .c9 = 0x1.82eeb1fed85c6p-5, ++ .c10 = V2 (-0x1.59d7f901566cbp-5), .c11 = 0x1.2c982855ab069p-5, ++ .c12 = V2 (-0x1.eb49592998177p-6), .c13 = 0x1.69d8b396e3d38p-6, ++ .c14 = V2 (-0x1.ca980345c4204p-7), .c15 = 0x1.dc050eafde0b3p-8, ++ .c16 = V2 (-0x1.7ea70755b8eccp-9), .c17 = 0x1.ba3da3de903e8p-11, ++ .c18 = V2 (-0x1.44a4b059b6f67p-13), .c19 = 0x1.c4a45029e5a91p-17, + .pi_over_2 = V2 (0x1.921fb54442d18p+0), + }; + +@@ -47,9 +46,9 @@ static const struct data + + /* Fast implementation of vector atan. + Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using +- z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps: +- _ZGVnN2v_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1 +- want 0x1.9225645bdd7c3p-1. */ ++ z=1/x and shift = pi/2. Maximum observed error is 2.45 ulps: ++ _ZGVnN2v_atan (0x1.0008d737eb3e6p+0) got 0x1.92288c551a4c1p-1 ++ want 0x1.92288c551a4c3p-1. */ + float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x) + { + const struct data *d = ptr_barrier (&data); +@@ -78,59 +77,53 @@ float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x) + y := arctan(x) for x < 1 + y := pi/2 + arctan(-1/x) for x > 1 + Hence, use z=-1/a if x>=1, otherwise z=a. */ +- uint64x2_t red = vcagtq_f64 (x, v_f64 (1.0)); ++ uint64x2_t red = vcagtq_f64 (x, v_f64 (-1.0)); + /* Avoid dependency in abs(x) in division (and comparison). */ +- float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (1.0), x), x); ++ float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (-1.0), x), x); ++ + float64x2_t shift = vreinterpretq_f64_u64 ( + vandq_u64 (red, vreinterpretq_u64_f64 (d->pi_over_2))); +- /* Use absolute value only when needed (odd powers of z). */ +- float64x2_t az = vbslq_f64 ( +- SignMask, vreinterpretq_f64_u64 (vandq_u64 (SignMask, red)), z); +- +- /* Calculate the polynomial approximation. +- Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of +- full scheme to avoid underflow in x^16. +- The order 19 polynomial P approximates +- (atan(sqrt(x))-sqrt(x))/x^(3/2). */ ++ ++ /* Reinsert sign bit from argument into the shift value. */ ++ shift = vreinterpretq_f64_u64 ( ++ veorq_u64 (vreinterpretq_u64_f64 (shift), sign)); ++ ++ /* Calculate polynomial approximation P(z^2) with deg(P)=19. */ + float64x2_t z2 = vmulq_f64 (z, z); +- float64x2_t x2 = vmulq_f64 (z2, z2); +- float64x2_t x4 = vmulq_f64 (x2, x2); +- float64x2_t x8 = vmulq_f64 (x4, x4); ++ float64x2_t z4 = vmulq_f64 (z2, z2); ++ float64x2_t z8 = vmulq_f64 (z4, z4); ++ float64x2_t z16 = vmulq_f64 (z8, z8); + +- /* estrin_7. */ ++ /* Order-7 Estrin. */ + float64x2_t p01 = vfmaq_laneq_f64 (d->c0, z2, c13, 0); + float64x2_t p23 = vfmaq_laneq_f64 (d->c2, z2, c13, 1); +- float64x2_t p03 = vfmaq_f64 (p01, x2, p23); ++ float64x2_t p03 = vfmaq_f64 (p01, z4, p23); + + float64x2_t p45 = vfmaq_laneq_f64 (d->c4, z2, c57, 0); + float64x2_t p67 = vfmaq_laneq_f64 (d->c6, z2, c57, 1); +- float64x2_t p47 = vfmaq_f64 (p45, x2, p67); ++ float64x2_t p47 = vfmaq_f64 (p45, z4, p67); + +- float64x2_t p07 = vfmaq_f64 (p03, x4, p47); ++ float64x2_t p07 = vfmaq_f64 (p03, z8, p47); + +- /* estrin_11. */ ++ /* Order-11 Estrin. */ + float64x2_t p89 = vfmaq_laneq_f64 (d->c8, z2, c911, 0); + float64x2_t p1011 = vfmaq_laneq_f64 (d->c10, z2, c911, 1); +- float64x2_t p811 = vfmaq_f64 (p89, x2, p1011); ++ float64x2_t p811 = vfmaq_f64 (p89, z4, p1011); + + float64x2_t p1213 = vfmaq_laneq_f64 (d->c12, z2, c1315, 0); + float64x2_t p1415 = vfmaq_laneq_f64 (d->c14, z2, c1315, 1); +- float64x2_t p1215 = vfmaq_f64 (p1213, x2, p1415); ++ float64x2_t p1215 = vfmaq_f64 (p1213, z4, p1415); + + float64x2_t p1617 = vfmaq_laneq_f64 (d->c16, z2, c1719, 0); + float64x2_t p1819 = vfmaq_laneq_f64 (d->c18, z2, c1719, 1); +- float64x2_t p1619 = vfmaq_f64 (p1617, x2, p1819); ++ float64x2_t p1619 = vfmaq_f64 (p1617, z4, p1819); + +- float64x2_t p815 = vfmaq_f64 (p811, x4, p1215); +- float64x2_t p819 = vfmaq_f64 (p815, x8, p1619); ++ float64x2_t p815 = vfmaq_f64 (p811, z8, p1215); ++ float64x2_t p819 = vfmaq_f64 (p815, z16, p1619); + +- float64x2_t y = vfmaq_f64 (p07, p819, x8); ++ float64x2_t y = vfmaq_f64 (p07, p819, z16); + + /* Finalize. y = shift + z + z^3 * P(z^2). */ +- y = vfmaq_f64 (az, y, vmulq_f64 (z2, az)); +- y = vaddq_f64 (y, shift); +- +- /* y = atan(x) if x>0, -atan(-x) otherwise. */ +- y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), sign)); +- return y; ++ y = vfmsq_f64 (v_f64 (-1.0), z2, y); ++ return vfmsq_f64 (shift, z, y); + } +diff --git a/sysdeps/aarch64/fpu/atan_sve.c b/sysdeps/aarch64/fpu/atan_sve.c +index fa1630304cd43ce4..046c04fbb197b75a 100644 +--- a/sysdeps/aarch64/fpu/atan_sve.c ++++ b/sysdeps/aarch64/fpu/atan_sve.c +@@ -18,23 +18,26 @@ + . */ + + #include "sv_math.h" +-#include "poly_sve_f64.h" + + static const struct data + { +- float64_t poly[20]; +- float64_t pi_over_2; ++ float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16, c18; ++ float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c19; ++ float64_t shift_val, neg_one; + } data = { + /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on + [2**-1022, 1.0]. */ +- .poly = { -0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3, +- 0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4, +- -0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5, +- 0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5, +- -0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6, +- 0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10, +- -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, }, +- .pi_over_2 = 0x1.921fb54442d18p+0, ++ .c0 = -0x1.555555555552ap-2, .c1 = 0x1.9999999995aebp-3, ++ .c2 = -0x1.24924923923f6p-3, .c3 = 0x1.c71c7184288a2p-4, ++ .c4 = -0x1.745d11fb3d32bp-4, .c5 = 0x1.3b136a18051b9p-4, ++ .c6 = -0x1.110e6d985f496p-4, .c7 = 0x1.e1bcf7f08801dp-5, ++ .c8 = -0x1.ae644e28058c3p-5, .c9 = 0x1.82eeb1fed85c6p-5, ++ .c10 = -0x1.59d7f901566cbp-5, .c11 = 0x1.2c982855ab069p-5, ++ .c12 = -0x1.eb49592998177p-6, .c13 = 0x1.69d8b396e3d38p-6, ++ .c14 = -0x1.ca980345c4204p-7, .c15 = 0x1.dc050eafde0b3p-8, ++ .c16 = -0x1.7ea70755b8eccp-9, .c17 = 0x1.ba3da3de903e8p-11, ++ .c18 = -0x1.44a4b059b6f67p-13, .c19 = 0x1.c4a45029e5a91p-17, ++ .shift_val = 0x1.490fdaa22168cp+1, .neg_one = -1, + }; + + /* Useful constants. */ +@@ -43,15 +46,14 @@ static const struct data + /* Fast implementation of SVE atan. + Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using + z=1/x and shift = pi/2. Largest errors are close to 1. The maximum observed +- error is 2.27 ulps: +- _ZGVsMxv_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1 +- want 0x1.9225645bdd7c3p-1. */ ++ error is 2.08 ulps: ++ _ZGVsMxv_atan (0x1.000a7c56975e8p+0) got 0x1.922a3163e15c2p-1 ++ want 0x1.922a3163e15c4p-1. */ + svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg) + { + const struct data *d = ptr_barrier (&data); + +- /* No need to trigger special case. Small cases, infs and nans +- are supported by our approximation technique. */ ++ svbool_t ptrue = svptrue_b64 (); + svuint64_t ix = svreinterpret_u64 (x); + svuint64_t sign = svand_x (pg, ix, SignMask); + +@@ -59,32 +61,60 @@ svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg) + y := arctan(x) for x < 1 + y := pi/2 + arctan(-1/x) for x > 1 + Hence, use z=-1/a if x>=1, otherwise z=a. */ +- svbool_t red = svacgt (pg, x, 1.0); +- /* Avoid dependency in abs(x) in division (and comparison). */ +- svfloat64_t z = svsel (red, svdivr_x (pg, x, 1.0), x); +- /* Use absolute value only when needed (odd powers of z). */ +- svfloat64_t az = svabs_x (pg, z); +- az = svneg_m (az, red, az); ++ svbool_t red = svacgt (pg, x, d->neg_one); ++ svfloat64_t z = svsel (red, svdiv_x (pg, sv_f64 (d->neg_one), x), x); ++ ++ /* Reuse of -1.0f to reduce constant loads, ++ We need a shift value of 1/2, which is created via -1 + (1 + 1/2). */ ++ svfloat64_t shift ++ = svadd_z (red, sv_f64 (d->neg_one), sv_f64 (d->shift_val)); ++ ++ /* Reinserts the sign bit of the argument to handle the case of x < -1. */ ++ shift = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (shift), sign)); + + /* Use split Estrin scheme for P(z^2) with deg(P)=19. */ +- svfloat64_t z2 = svmul_x (pg, z, z); +- svfloat64_t x2 = svmul_x (pg, z2, z2); +- svfloat64_t x4 = svmul_x (pg, x2, x2); +- svfloat64_t x8 = svmul_x (pg, x4, x4); ++ svfloat64_t z2 = svmul_x (ptrue, z, z); ++ svfloat64_t z4 = svmul_x (ptrue, z2, z2); ++ svfloat64_t z8 = svmul_x (ptrue, z4, z4); ++ svfloat64_t z16 = svmul_x (ptrue, z8, z8); + +- svfloat64_t y +- = svmla_x (pg, sv_estrin_7_f64_x (pg, z2, x2, x4, d->poly), +- sv_estrin_11_f64_x (pg, z2, x2, x4, x8, d->poly + 8), x8); ++ /* Order-7 Estrin. */ ++ svfloat64_t c13 = svld1rq (ptrue, &d->c1); ++ svfloat64_t c57 = svld1rq (ptrue, &d->c5); + +- /* y = shift + z + z^3 * P(z^2). */ +- svfloat64_t z3 = svmul_x (pg, z2, az); +- y = svmla_x (pg, az, z3, y); ++ svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), z2, c13, 0); ++ svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), z2, c13, 1); ++ svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), z2, c57, 0); ++ svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), z2, c57, 1); ++ ++ svfloat64_t p03 = svmla_x (pg, p01, z4, p23); ++ svfloat64_t p47 = svmla_x (pg, p45, z4, p67); ++ svfloat64_t p07 = svmla_x (pg, p03, z8, p47); ++ ++ /* Order-11 Estrin. */ ++ svfloat64_t c911 = svld1rq (ptrue, &d->c9); ++ svfloat64_t c1315 = svld1rq (ptrue, &d->c13); ++ svfloat64_t c1719 = svld1rq (ptrue, &d->c17); + +- /* Apply shift as indicated by `red` predicate. */ +- y = svadd_m (red, y, d->pi_over_2); ++ svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), z2, c911, 0); ++ svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), z2, c911, 1); ++ svfloat64_t p811 = svmla_x (pg, p89, z4, p1011); + +- /* y = atan(x) if x>0, -atan(-x) otherwise. */ +- y = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign)); ++ svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), z2, c1315, 0); ++ svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), z2, c1315, 1); ++ svfloat64_t p1215 = svmla_x (pg, p1213, z4, p1415); + +- return y; ++ svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), z2, c1719, 0); ++ svfloat64_t p1819 = svmla_lane (sv_f64 (d->c18), z2, c1719, 1); ++ svfloat64_t p1619 = svmla_x (pg, p1617, z4, p1819); ++ ++ svfloat64_t p815 = svmla_x (pg, p811, z8, p1215); ++ svfloat64_t p819 = svmla_x (pg, p815, z16, p1619); ++ ++ svfloat64_t y = svmla_x (pg, p07, z16, p819); ++ ++ /* y = shift + z + z^3 * P(z^2). */ ++ shift = svadd_m (red, z, shift); ++ y = svmul_x (pg, z2, y); ++ return svmla_x (pg, shift, z, y); + } +diff --git a/sysdeps/aarch64/fpu/atanf_advsimd.c b/sysdeps/aarch64/fpu/atanf_advsimd.c +index d015cc70ad4a369c..e72074ec6206e572 100644 +--- a/sysdeps/aarch64/fpu/atanf_advsimd.c ++++ b/sysdeps/aarch64/fpu/atanf_advsimd.c +@@ -22,26 +22,35 @@ + + static const struct data + { ++ uint32x4_t sign_mask, pi_over_2; ++ float32x4_t neg_one; ++#if WANT_SIMD_EXCEPT + float32x4_t poly[8]; +- float32x4_t pi_over_2; ++} data = { ++ .poly = { V4 (-0x1.5554dcp-2), V4 (0x1.9978ecp-3), V4 (-0x1.230a94p-3), ++ V4 (0x1.b4debp-4), V4 (-0x1.3550dap-4), V4 (0x1.61eebp-5), ++ V4 (-0x1.0c17d4p-6), V4 (0x1.7ea694p-9) }, ++#else ++ float32x4_t c0, c2, c4, c6; ++ float c1, c3, c5, c7; + } data = { + /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on + [2**-128, 1.0]. + Generated using fpminimax between FLT_MIN and 1. */ +- .poly = { V4 (-0x1.55555p-2f), V4 (0x1.99935ep-3f), V4 (-0x1.24051ep-3f), +- V4 (0x1.bd7368p-4f), V4 (-0x1.491f0ep-4f), V4 (0x1.93a2c0p-5f), +- V4 (-0x1.4c3c60p-6f), V4 (0x1.01fd88p-8f) }, +- .pi_over_2 = V4 (0x1.921fb6p+0f), ++ .c0 = V4 (-0x1.5554dcp-2), .c1 = 0x1.9978ecp-3, ++ .c2 = V4 (-0x1.230a94p-3), .c3 = 0x1.b4debp-4, ++ .c4 = V4 (-0x1.3550dap-4), .c5 = 0x1.61eebp-5, ++ .c6 = V4 (-0x1.0c17d4p-6), .c7 = 0x1.7ea694p-9, ++#endif ++ .pi_over_2 = V4 (0x3fc90fdb), ++ .neg_one = V4 (-1.0f), ++ .sign_mask = V4 (0x80000000), + }; + +-#define SignMask v_u32 (0x80000000) +- +-#define P(i) d->poly[i] +- ++#if WANT_SIMD_EXCEPT + #define TinyBound 0x30800000 /* asuint(0x1p-30). */ + #define BigBound 0x4e800000 /* asuint(0x1p30). */ + +-#if WANT_SIMD_EXCEPT + static float32x4_t VPCS_ATTR NOINLINE + special_case (float32x4_t x, float32x4_t y, uint32x4_t special) + { +@@ -51,19 +60,20 @@ special_case (float32x4_t x, float32x4_t y, uint32x4_t special) + + /* Fast implementation of vector atanf based on + atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] +- using z=-1/x and shift = pi/2. Maximum observed error is 2.9ulps: +- _ZGVnN4v_atanf (0x1.0468f6p+0) got 0x1.967f06p-1 want 0x1.967fp-1. */ ++ using z=-1/x and shift = pi/2. Maximum observed error is 2.02 ulps: ++ _ZGVnN4v_atanf (0x1.03d4cep+0) got 0x1.95ed3ap-1 ++ want 0x1.95ed36p-1. */ + float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x) + { + const struct data *d = ptr_barrier (&data); + +- /* Small cases, infs and nans are supported by our approximation technique, +- but do not set fenv flags correctly. Only trigger special case if we need +- fenv. */ + uint32x4_t ix = vreinterpretq_u32_f32 (x); +- uint32x4_t sign = vandq_u32 (ix, SignMask); ++ uint32x4_t sign = vandq_u32 (ix, d->sign_mask); + + #if WANT_SIMD_EXCEPT ++ /* Small cases, infs and nans are supported by our approximation technique, ++ but do not set fenv flags correctly. Only trigger special case if we need ++ fenv. */ + uint32x4_t ia = vandq_u32 (ix, v_u32 (0x7ff00000)); + uint32x4_t special = vcgtq_u32 (vsubq_u32 (ia, v_u32 (TinyBound)), + v_u32 (BigBound - TinyBound)); +@@ -71,41 +81,52 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (atan) (float32x4_t x) + if (__glibc_unlikely (v_any_u32 (special))) + return special_case (x, x, v_u32 (-1)); + #endif +- + /* Argument reduction: +- y := arctan(x) for x < 1 +- y := pi/2 + arctan(-1/x) for x > 1 +- Hence, use z=-1/a if x>=1, otherwise z=a. */ +- uint32x4_t red = vcagtq_f32 (x, v_f32 (1.0)); +- /* Avoid dependency in abs(x) in division (and comparison). */ +- float32x4_t z = vbslq_f32 (red, vdivq_f32 (v_f32 (1.0f), x), x); ++ y := arctan(x) for |x| < 1 ++ y := arctan(-1/x) + pi/2 for x > +1 ++ y := arctan(-1/x) - pi/2 for x < -1 ++ Hence, use z=-1/a if x>=|-1|, otherwise z=a. */ ++ uint32x4_t red = vcagtq_f32 (x, d->neg_one); ++ ++ float32x4_t z = vbslq_f32 (red, vdivq_f32 (d->neg_one, x), x); ++ ++ /* Shift is calculated as +-pi/2 or 0, depending on the argument case. */ + float32x4_t shift = vreinterpretq_f32_u32 ( +- vandq_u32 (red, vreinterpretq_u32_f32 (d->pi_over_2))); +- /* Use absolute value only when needed (odd powers of z). */ +- float32x4_t az = vbslq_f32 ( +- SignMask, vreinterpretq_f32_u32 (vandq_u32 (SignMask, red)), z); ++ vandq_u32 (red, veorq_u32 (d->pi_over_2, sign))); ++ ++ float32x4_t z2 = vmulq_f32 (z, z); ++ float32x4_t z3 = vmulq_f32 (z, z2); ++ float32x4_t z4 = vmulq_f32 (z2, z2); ++#if WANT_SIMD_EXCEPT + + /* Calculate the polynomial approximation. + Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However, + a standard implementation using z8 creates spurious underflow + in the very last fma (when z^8 is small enough). +- Therefore, we split the last fma into a mul and an fma. +- Horner and single-level Estrin have higher errors that exceed +- threshold. */ +- float32x4_t z2 = vmulq_f32 (z, z); +- float32x4_t z4 = vmulq_f32 (z2, z2); +- ++ Therefore, we split the last fma into a mul and an fma. */ + float32x4_t y = vfmaq_f32 ( + v_pairwise_poly_3_f32 (z2, z4, d->poly), z4, + vmulq_f32 (z4, v_pairwise_poly_3_f32 (z2, z4, d->poly + 4))); + +- /* y = shift + z * P(z^2). */ +- y = vaddq_f32 (vfmaq_f32 (az, y, vmulq_f32 (z2, az)), shift); ++#else ++ float32x4_t z8 = vmulq_f32 (z4, z4); ++ ++ /* Uses an Estrin scheme for polynomial approximation. */ ++ float32x4_t odd_coeffs = vld1q_f32 (&d->c1); ++ ++ float32x4_t p01 = vfmaq_laneq_f32 (d->c0, z2, odd_coeffs, 0); ++ float32x4_t p23 = vfmaq_laneq_f32 (d->c2, z2, odd_coeffs, 1); ++ float32x4_t p45 = vfmaq_laneq_f32 (d->c4, z2, odd_coeffs, 2); ++ float32x4_t p67 = vfmaq_laneq_f32 (d->c6, z2, odd_coeffs, 3); + +- /* y = atan(x) if x>0, -atan(-x) otherwise. */ +- y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), sign)); ++ float32x4_t p03 = vfmaq_f32 (p01, z4, p23); ++ float32x4_t p47 = vfmaq_f32 (p45, z4, p67); + +- return y; ++ float32x4_t y = vfmaq_f32 (p03, z8, p47); ++#endif ++ ++ /* y = shift + z * P(z^2). */ ++ return vfmaq_f32 (vaddq_f32 (shift, z), z3, y); + } + libmvec_hidden_def (V_NAME_F1 (atan)) + HALF_WIDTH_ALIAS_F1 (atan) +diff --git a/sysdeps/aarch64/fpu/atanf_sve.c b/sysdeps/aarch64/fpu/atanf_sve.c +index 7b54094586c20590..0f5a054743dc47d3 100644 +--- a/sysdeps/aarch64/fpu/atanf_sve.c ++++ b/sysdeps/aarch64/fpu/atanf_sve.c +@@ -18,18 +18,26 @@ + . */ + + #include "sv_math.h" +-#include "poly_sve_f32.h" + + static const struct data + { +- float32_t poly[8]; +- float32_t pi_over_2; ++ float32_t c1, c3, c5, c7; ++ float32_t c0, c2, c4, c6; ++ float32_t shift_val, neg_one; + } data = { + /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on + [2**-128, 1.0]. */ +- .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f, +- -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f }, +- .pi_over_2 = 0x1.921fb6p+0f, ++ .c0 = -0x1.5554dcp-2, ++ .c1 = 0x1.9978ecp-3, ++ .c2 = -0x1.230a94p-3, ++ .c3 = 0x1.b4debp-4, ++ .c4 = -0x1.3550dap-4, ++ .c5 = 0x1.61eebp-5, ++ .c6 = -0x1.0c17d4p-6, ++ .c7 = 0x1.7ea694p-9, ++ /* pi/2, used as a shift value after reduction. */ ++ .shift_val = 0x1.921fb54442d18p+0, ++ .neg_one = -1.0f, + }; + + #define SignMask (0x80000000) +@@ -37,43 +45,49 @@ static const struct data + /* Fast implementation of SVE atanf based on + atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using + z=-1/x and shift = pi/2. +- Largest observed error is 2.9 ULP, close to +/-1.0: +- _ZGVsMxv_atanf (0x1.0468f6p+0) got -0x1.967f06p-1 +- want -0x1.967fp-1. */ ++ Largest observed error is 2.12 ULP: ++ _ZGVsMxv_atanf (0x1.03d4cep+0) got 0x1.95ed3ap-1 ++ want 0x1.95ed36p-1. */ + svfloat32_t SV_NAME_F1 (atan) (svfloat32_t x, const svbool_t pg) + { + const struct data *d = ptr_barrier (&data); ++ svbool_t ptrue = svptrue_b32 (); + + /* No need to trigger special case. Small cases, infs and nans + are supported by our approximation technique. */ + svuint32_t ix = svreinterpret_u32 (x); +- svuint32_t sign = svand_x (pg, ix, SignMask); ++ svuint32_t sign = svand_x (ptrue, ix, SignMask); + + /* Argument reduction: + y := arctan(x) for x < 1 +- y := pi/2 + arctan(-1/x) for x > 1 +- Hence, use z=-1/a if x>=1, otherwise z=a. */ +- svbool_t red = svacgt (pg, x, 1.0f); +- /* Avoid dependency in abs(x) in division (and comparison). */ +- svfloat32_t z = svsel (red, svdiv_x (pg, sv_f32 (1.0f), x), x); +- /* Use absolute value only when needed (odd powers of z). */ +- svfloat32_t az = svabs_x (pg, z); +- az = svneg_m (az, red, az); +- +- /* Use split Estrin scheme for P(z^2) with deg(P)=7. */ +- svfloat32_t z2 = svmul_x (pg, z, z); +- svfloat32_t z4 = svmul_x (pg, z2, z2); +- svfloat32_t z8 = svmul_x (pg, z4, z4); +- +- svfloat32_t y = sv_estrin_7_f32_x (pg, z2, z4, z8, d->poly); +- +- /* y = shift + z + z^3 * P(z^2). */ +- svfloat32_t z3 = svmul_x (pg, z2, az); +- y = svmla_x (pg, az, z3, y); +- +- /* Apply shift as indicated by 'red' predicate. */ +- y = svadd_m (red, y, sv_f32 (d->pi_over_2)); +- +- /* y = atan(x) if x>0, -atan(-x) otherwise. */ +- return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign)); ++ y := arctan(-1/x) + pi/2 for x > +1 ++ y := arctan(-1/x) - pi/2 for x < -1 ++ Hence, use z=-1/a if |x|>=|-1|, otherwise z=a. */ ++ svbool_t red = svacgt (pg, x, d->neg_one); ++ svfloat32_t z = svsel (red, svdiv_x (pg, sv_f32 (d->neg_one), x), x); ++ ++ /* Reinserts the sign bit of the argument to handle the case of x < -1. */ ++ svfloat32_t shift = svreinterpret_f32 ( ++ sveor_x (red, svreinterpret_u32 (sv_f32 (d->shift_val)), sign)); ++ ++ svfloat32_t z2 = svmul_x (ptrue, z, z); ++ svfloat32_t z3 = svmul_x (ptrue, z2, z); ++ svfloat32_t z4 = svmul_x (ptrue, z2, z2); ++ svfloat32_t z8 = svmul_x (ptrue, z4, z4); ++ ++ svfloat32_t odd_coeffs = svld1rq (ptrue, &d->c1); ++ ++ svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0); ++ svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1); ++ svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2); ++ svfloat32_t p67 = svmla_lane (sv_f32 (d->c6), z2, odd_coeffs, 3); ++ ++ svfloat32_t p03 = svmla_x (pg, p01, z4, p23); ++ svfloat32_t p47 = svmla_x (pg, p45, z4, p67); ++ ++ svfloat32_t y = svmla_x (pg, p03, z8, p47); ++ ++ /* shift + z + z^3 * P(z^2). */ ++ shift = svadd_m (red, z, shift); ++ return svmla_x (pg, shift, z3, y); + } diff --git a/glibc-upstream-2.39-215.patch b/glibc-upstream-2.39-215.patch new file mode 100644 index 0000000..75c81e8 --- /dev/null +++ b/glibc-upstream-2.39-215.patch @@ -0,0 +1,135 @@ +commit 392e6cf1e86e29fe155c21351cd7b7a0fd371f5b +Author: Luna Lamb +Date: Thu May 29 15:22:51 2025 +0000 + + AArch64: Improve codegen in SVE log1p + + Improves memory access, reformat evaluation scheme to pack coefficients. + 5% improvement in throughput microbenchmark on Neoverse V1. + + Reviewed-by: Wilco Dijkstra + (cherry picked from commit da196e6134ede64728006518352d75b6c3902fec) + +diff --git a/sysdeps/aarch64/fpu/log1p_sve.c b/sysdeps/aarch64/fpu/log1p_sve.c +index 04f7e5720e13c371..5251f3c07566eec3 100644 +--- a/sysdeps/aarch64/fpu/log1p_sve.c ++++ b/sysdeps/aarch64/fpu/log1p_sve.c +@@ -22,19 +22,33 @@ + + static const struct data + { +- double poly[19]; ++ float64_t c0, c2, c4, c6, c8, c10, c12, c14, c16; ++ float64_t c1, c3, c5, c7, c9, c11, c13, c15, c17, c18; + double ln2_hi, ln2_lo; + uint64_t hfrt2_top, onemhfrt2_top, inf, mone; + } data = { + /* Generated using Remez in [ sqrt(2)/2 - 1, sqrt(2) - 1]. Order 20 +- polynomial, however first 2 coefficients are 0 and 1 so are not stored. */ +- .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2, +- 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3, +- -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4, +- 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4, +- -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5, +- 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4, +- -0x1.cfa7385bdb37ep-6, }, ++ polynomial, however first 2 coefficients are 0 and 1 so are not ++ stored. */ ++ .c0 = -0x1.ffffffffffffbp-2, ++ .c1 = 0x1.55555555551a9p-2, ++ .c2 = -0x1.00000000008e3p-2, ++ .c3 = 0x1.9999999a32797p-3, ++ .c4 = -0x1.555555552fecfp-3, ++ .c5 = 0x1.249248e071e5ap-3, ++ .c6 = -0x1.ffffff8bf8482p-4, ++ .c7 = 0x1.c71c8f07da57ap-4, ++ .c8 = -0x1.9999ca4ccb617p-4, ++ .c9 = 0x1.7459ad2e1dfa3p-4, ++ .c10 = -0x1.554d2680a3ff2p-4, ++ .c11 = 0x1.3b4c54d487455p-4, ++ .c12 = -0x1.2548a9ffe80e6p-4, ++ .c13 = 0x1.0f389a24b2e07p-4, ++ .c14 = -0x1.eee4db15db335p-5, ++ .c15 = 0x1.e95b494d4a5ddp-5, ++ .c16 = -0x1.15fdf07cb7c73p-4, ++ .c17 = 0x1.0310b70800fcfp-4, ++ .c18 = -0x1.cfa7385bdb37ep-6, + .ln2_hi = 0x1.62e42fefa3800p-1, + .ln2_lo = 0x1.ef35793c76730p-45, + /* top32(asuint64(sqrt(2)/2)) << 32. */ +@@ -49,7 +63,7 @@ static const struct data + #define BottomMask 0xffffffff + + static svfloat64_t NOINLINE +-special_case (svbool_t special, svfloat64_t x, svfloat64_t y) ++special_case (svfloat64_t x, svfloat64_t y, svbool_t special) + { + return sv_call_f64 (log1p, x, y, special); + } +@@ -91,8 +105,9 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg) + /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */ + svuint64_t utop + = svadd_x (pg, svand_x (pg, u, 0x000fffff00000000), d->hfrt2_top); +- svuint64_t u_red = svorr_x (pg, utop, svand_x (pg, mi, BottomMask)); +- svfloat64_t f = svsub_x (pg, svreinterpret_f64 (u_red), 1); ++ svuint64_t u_red ++ = svorr_x (pg, utop, svand_x (svptrue_b64 (), mi, BottomMask)); ++ svfloat64_t f = svsub_x (svptrue_b64 (), svreinterpret_f64 (u_red), 1); + + /* Correction term c/m. */ + svfloat64_t cm = svdiv_x (pg, svsub_x (pg, x, svsub_x (pg, m, 1)), m); +@@ -103,16 +118,47 @@ svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg) + Hence approximation has the form f + f^2 * P(f) + where P(x) = C0 + C1*x + C2x^2 + ... + Assembling this all correctly is dealt with at the final step. */ +- svfloat64_t f2 = svmul_x (pg, f, f), f4 = svmul_x (pg, f2, f2), +- f8 = svmul_x (pg, f4, f4), f16 = svmul_x (pg, f8, f8); +- svfloat64_t p = sv_estrin_18_f64_x (pg, f, f2, f4, f8, f16, d->poly); ++ svfloat64_t f2 = svmul_x (svptrue_b64 (), f, f), ++ f4 = svmul_x (svptrue_b64 (), f2, f2), ++ f8 = svmul_x (svptrue_b64 (), f4, f4), ++ f16 = svmul_x (svptrue_b64 (), f8, f8); ++ ++ svfloat64_t c13 = svld1rq (svptrue_b64 (), &d->c1); ++ svfloat64_t c57 = svld1rq (svptrue_b64 (), &d->c5); ++ svfloat64_t c911 = svld1rq (svptrue_b64 (), &d->c9); ++ svfloat64_t c1315 = svld1rq (svptrue_b64 (), &d->c13); ++ svfloat64_t c1718 = svld1rq (svptrue_b64 (), &d->c17); ++ ++ /* Order-18 Estrin scheme. */ ++ svfloat64_t p01 = svmla_lane (sv_f64 (d->c0), f, c13, 0); ++ svfloat64_t p23 = svmla_lane (sv_f64 (d->c2), f, c13, 1); ++ svfloat64_t p45 = svmla_lane (sv_f64 (d->c4), f, c57, 0); ++ svfloat64_t p67 = svmla_lane (sv_f64 (d->c6), f, c57, 1); ++ ++ svfloat64_t p03 = svmla_x (pg, p01, f2, p23); ++ svfloat64_t p47 = svmla_x (pg, p45, f2, p67); ++ svfloat64_t p07 = svmla_x (pg, p03, f4, p47); ++ ++ svfloat64_t p89 = svmla_lane (sv_f64 (d->c8), f, c911, 0); ++ svfloat64_t p1011 = svmla_lane (sv_f64 (d->c10), f, c911, 1); ++ svfloat64_t p1213 = svmla_lane (sv_f64 (d->c12), f, c1315, 0); ++ svfloat64_t p1415 = svmla_lane (sv_f64 (d->c14), f, c1315, 1); ++ ++ svfloat64_t p811 = svmla_x (pg, p89, f2, p1011); ++ svfloat64_t p1215 = svmla_x (pg, p1213, f2, p1415); ++ svfloat64_t p815 = svmla_x (pg, p811, f4, p1215); ++ ++ svfloat64_t p015 = svmla_x (pg, p07, f8, p815); ++ svfloat64_t p1617 = svmla_lane (sv_f64 (d->c16), f, c1718, 0); ++ svfloat64_t p1618 = svmla_lane (p1617, f2, c1718, 1); ++ svfloat64_t p = svmla_x (pg, p015, f16, p1618); + + svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2_lo); + svfloat64_t yhi = svmla_x (pg, f, k, d->ln2_hi); +- svfloat64_t y = svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p); + + if (__glibc_unlikely (svptest_any (pg, special))) +- return special_case (special, x, y); +- +- return y; ++ return special_case ( ++ x, svmla_x (svptrue_b64 (), svadd_x (svptrue_b64 (), ylo, yhi), f2, p), ++ special); ++ return svmla_x (svptrue_b64 (), svadd_x (svptrue_b64 (), ylo, yhi), f2, p); + } diff --git a/glibc-upstream-2.39-216.patch b/glibc-upstream-2.39-216.patch new file mode 100644 index 0000000..027370d --- /dev/null +++ b/glibc-upstream-2.39-216.patch @@ -0,0 +1,193 @@ +commit fbade65338cff0a3a1699a8627a8180e9a01a627 +Author: Adhemerval Zanella +Date: Thu Feb 22 10:42:55 2024 -0300 + + arm: Use _dl_find_object on __gnu_Unwind_Find_exidx (BZ 31405) + + Instead of __dl_iterate_phdr. On ARM dlfo_eh_frame/dlfo_eh_count + maps to PT_ARM_EXIDX vaddr start / length. + + On a Neoverse N1 machine with 160 cores, the following program: + + $ cat test.c + #include + #include + #include + + enum { + niter = 1024, + ntimes = 128, + }; + + static void * + tf (void *arg) + { + int a = (int) arg; + + for (int i = 0; i < niter; i++) + { + void *p[ntimes]; + for (int j = 0; j < ntimes; j++) + p[j] = malloc (a * 128); + for (int j = 0; j < ntimes; j++) + free (p[j]); + } + + return NULL; + } + + int main (int argc, char *argv[]) + { + enum { nthreads = 16 }; + pthread_t t[nthreads]; + + for (int i = 0; i < nthreads; i ++) + assert (pthread_create (&t[i], NULL, tf, (void *) i) == 0); + + for (int i = 0; i < nthreads; i++) + { + void *r; + assert (pthread_join (t[i], &r) == 0); + assert (r == NULL); + } + + return 0; + } + $ arm-linux-gnueabihf-gcc -fsanitize=address test.c -o test + + Improves from ~15s to 0.5s. + + Checked on arm-linux-gnueabihf. + + (cherry picked from commit f4c142bb9fe6b02c0af8cfca8a920091e2dba44b) + +diff --git a/elf/Makefile b/elf/Makefile +index 92da608da1ebc175..f5646c434f19a667 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -34,6 +34,7 @@ routines = \ + dl-addr \ + dl-addr-obj \ + dl-early_allocate \ ++ dl-find_object \ + dl-iteratephdr \ + dl-libc \ + dl-origin \ +@@ -60,7 +61,6 @@ dl-routines = \ + dl-deps \ + dl-exception \ + dl-execstack \ +- dl-find_object \ + dl-fini \ + dl-init \ + dl-load \ +diff --git a/elf/dl-find_object.c b/elf/dl-find_object.c +index 940fa5c2236af666..449302eda35ce96f 100644 +--- a/elf/dl-find_object.c ++++ b/elf/dl-find_object.c +@@ -356,7 +356,7 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size) + } + + int +-_dl_find_object (void *pc1, struct dl_find_object *result) ++__dl_find_object (void *pc1, struct dl_find_object *result) + { + uintptr_t pc = (uintptr_t) pc1; + +@@ -463,7 +463,8 @@ _dl_find_object (void *pc1, struct dl_find_object *result) + return -1; + } /* Transaction retry loop. */ + } +-rtld_hidden_def (_dl_find_object) ++hidden_def (__dl_find_object) ++weak_alias (__dl_find_object, _dl_find_object) + + /* _dlfo_process_initial is called twice. First to compute the array + sizes from the initial loaded mappings. Second to fill in the +diff --git a/include/dlfcn.h b/include/dlfcn.h +index a44420fa37439a85..f49ee1b0c9958d38 100644 +--- a/include/dlfcn.h ++++ b/include/dlfcn.h +@@ -4,7 +4,8 @@ + #include /* For ElfW. */ + #include + +-rtld_hidden_proto (_dl_find_object) ++extern __typeof (_dl_find_object) __dl_find_object; ++hidden_proto (__dl_find_object) + + /* Internally used flag. */ + #define __RTLD_DLOPEN 0x80000000 +diff --git a/sysdeps/arm/find_exidx.c b/sysdeps/arm/find_exidx.c +index d647865e5a098cd5..a924d59b9f75d7b2 100644 +--- a/sysdeps/arm/find_exidx.c ++++ b/sysdeps/arm/find_exidx.c +@@ -16,64 +16,15 @@ + . */ + + #include +-#include +- +-struct unw_eh_callback_data +-{ +- _Unwind_Ptr pc; +- _Unwind_Ptr exidx_start; +- int exidx_len; +-}; +- +- +-/* Callback to determines if the PC lies within an object, and remember the +- location of the exception index table if it does. */ +- +-static int +-find_exidx_callback (struct dl_phdr_info * info, size_t size, void * ptr) +-{ +- struct unw_eh_callback_data * data; +- const ElfW(Phdr) *phdr; +- int i; +- int match; +- _Unwind_Ptr load_base; +- +- data = (struct unw_eh_callback_data *) ptr; +- load_base = info->dlpi_addr; +- phdr = info->dlpi_phdr; +- +- match = 0; +- for (i = info->dlpi_phnum; i > 0; i--, phdr++) +- { +- if (phdr->p_type == PT_LOAD) +- { +- _Unwind_Ptr vaddr = phdr->p_vaddr + load_base; +- if (data->pc >= vaddr && data->pc < vaddr + phdr->p_memsz) +- match = 1; +- } +- else if (phdr->p_type == PT_ARM_EXIDX) +- { +- data->exidx_start = (_Unwind_Ptr) (phdr->p_vaddr + load_base); +- data->exidx_len = phdr->p_memsz; +- } +- } +- +- return match; +-} +- + + /* Find the exception index table containing PC. */ + + _Unwind_Ptr + __gnu_Unwind_Find_exidx (_Unwind_Ptr pc, int * pcount) + { +- struct unw_eh_callback_data data; +- +- data.pc = pc; +- data.exidx_start = 0; +- if (__dl_iterate_phdr (find_exidx_callback, &data) <= 0) ++ struct dl_find_object data; ++ if (__dl_find_object ((void *) pc, &data) < 0) + return 0; +- +- *pcount = data.exidx_len / 8; +- return data.exidx_start; ++ *pcount = data.dlfo_eh_count; ++ return (_Unwind_Ptr) data.dlfo_eh_frame; + } diff --git a/glibc-upstream-2.39-217.patch b/glibc-upstream-2.39-217.patch new file mode 100644 index 0000000..b185066 --- /dev/null +++ b/glibc-upstream-2.39-217.patch @@ -0,0 +1,93 @@ +commit 9833fcf7ce7a7ac7ec5b7694fd4c2bc705282842 +Author: Florian Weimer +Date: Sat Feb 1 12:37:58 2025 +0100 + + elf: Do not add a copy of _dl_find_object to libc.so + + This reduces code size and dependencies on ld.so internals from + libc.so. + + Fixes commit f4c142bb9fe6b02c0af8cfca8a920091e2dba44b + ("arm: Use _dl_find_object on __gnu_Unwind_Find_exidx (BZ 31405)"). + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 96429bcc91a14f71b177ddc5e716de3069060f2c) + +diff --git a/elf/Makefile b/elf/Makefile +index f5646c434f19a667..92da608da1ebc175 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -34,7 +34,6 @@ routines = \ + dl-addr \ + dl-addr-obj \ + dl-early_allocate \ +- dl-find_object \ + dl-iteratephdr \ + dl-libc \ + dl-origin \ +@@ -61,6 +60,7 @@ dl-routines = \ + dl-deps \ + dl-exception \ + dl-execstack \ ++ dl-find_object \ + dl-fini \ + dl-init \ + dl-load \ +diff --git a/elf/dl-find_object.c b/elf/dl-find_object.c +index 449302eda35ce96f..940fa5c2236af666 100644 +--- a/elf/dl-find_object.c ++++ b/elf/dl-find_object.c +@@ -356,7 +356,7 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size) + } + + int +-__dl_find_object (void *pc1, struct dl_find_object *result) ++_dl_find_object (void *pc1, struct dl_find_object *result) + { + uintptr_t pc = (uintptr_t) pc1; + +@@ -463,8 +463,7 @@ __dl_find_object (void *pc1, struct dl_find_object *result) + return -1; + } /* Transaction retry loop. */ + } +-hidden_def (__dl_find_object) +-weak_alias (__dl_find_object, _dl_find_object) ++rtld_hidden_def (_dl_find_object) + + /* _dlfo_process_initial is called twice. First to compute the array + sizes from the initial loaded mappings. Second to fill in the +diff --git a/include/dlfcn.h b/include/dlfcn.h +index f49ee1b0c9958d38..a44420fa37439a85 100644 +--- a/include/dlfcn.h ++++ b/include/dlfcn.h +@@ -4,8 +4,7 @@ + #include /* For ElfW. */ + #include + +-extern __typeof (_dl_find_object) __dl_find_object; +-hidden_proto (__dl_find_object) ++rtld_hidden_proto (_dl_find_object) + + /* Internally used flag. */ + #define __RTLD_DLOPEN 0x80000000 +diff --git a/sysdeps/arm/find_exidx.c b/sysdeps/arm/find_exidx.c +index a924d59b9f75d7b2..4257c268381df540 100644 +--- a/sysdeps/arm/find_exidx.c ++++ b/sysdeps/arm/find_exidx.c +@@ -15,6 +15,7 @@ + License along with the GNU C Library. If not, see + . */ + ++#include + #include + + /* Find the exception index table containing PC. */ +@@ -23,7 +24,7 @@ _Unwind_Ptr + __gnu_Unwind_Find_exidx (_Unwind_Ptr pc, int * pcount) + { + struct dl_find_object data; +- if (__dl_find_object ((void *) pc, &data) < 0) ++ if (GLRO(dl_find_object) ((void *) pc, &data) < 0) + return 0; + *pcount = data.dlfo_eh_count; + return (_Unwind_Ptr) data.dlfo_eh_frame; diff --git a/glibc-upstream-2.39-218.patch b/glibc-upstream-2.39-218.patch new file mode 100644 index 0000000..0318190 --- /dev/null +++ b/glibc-upstream-2.39-218.patch @@ -0,0 +1,89 @@ +commit 64488b4b31d63b59b06b42ad93a092053364801b +Author: Florian Weimer +Date: Fri Aug 1 10:20:23 2025 +0200 + + elf: Extract rtld_setup_phdr function from dl_main + + Remove historic binutils reference from comment and update + how this data is used by applications. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 2cac9559e06044ba520e785c151fbbd25011865f) + +diff --git a/elf/rtld.c b/elf/rtld.c +index 3ca9a11009a74626..3bf9707e0007bc83 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1284,6 +1284,37 @@ rtld_setup_main_map (struct link_map *main_map) + return has_interp; + } + ++/* Set up the program header information for the dynamic linker ++ itself. It can be accessed via _r_debug and dl_iterate_phdr ++ callbacks. */ ++static void ++rtld_setup_phdr (void) ++{ ++ /* Starting from binutils-2.23, the linker will define the magic ++ symbol __ehdr_start to point to our own ELF header if it is ++ visible in a segment that also includes the phdrs. */ ++ ++ const ElfW(Ehdr) *rtld_ehdr = &__ehdr_start; ++ assert (rtld_ehdr->e_ehsize == sizeof *rtld_ehdr); ++ assert (rtld_ehdr->e_phentsize == sizeof (ElfW(Phdr))); ++ ++ const ElfW(Phdr) *rtld_phdr = (const void *) rtld_ehdr + rtld_ehdr->e_phoff; ++ ++ GL(dl_rtld_map).l_phdr = rtld_phdr; ++ GL(dl_rtld_map).l_phnum = rtld_ehdr->e_phnum; ++ ++ ++ /* PT_GNU_RELRO is usually the last phdr. */ ++ size_t cnt = rtld_ehdr->e_phnum; ++ while (cnt-- > 0) ++ if (rtld_phdr[cnt].p_type == PT_GNU_RELRO) ++ { ++ GL(dl_rtld_map).l_relro_addr = rtld_phdr[cnt].p_vaddr; ++ GL(dl_rtld_map).l_relro_size = rtld_phdr[cnt].p_memsz; ++ break; ++ } ++} ++ + /* Adjusts the contents of the stack and related globals for the user + entry point. The ld.so processed skip_args arguments and bumped + _dl_argv and _dl_argc accordingly. Those arguments are removed from +@@ -1749,33 +1780,7 @@ dl_main (const ElfW(Phdr) *phdr, + ++GL(dl_ns)[LM_ID_BASE]._ns_nloaded; + ++GL(dl_load_adds); + +- /* Starting from binutils-2.23, the linker will define the magic symbol +- __ehdr_start to point to our own ELF header if it is visible in a +- segment that also includes the phdrs. If that's not available, we use +- the old method that assumes the beginning of the file is part of the +- lowest-addressed PT_LOAD segment. */ +- +- /* Set up the program header information for the dynamic linker +- itself. It is needed in the dl_iterate_phdr callbacks. */ +- const ElfW(Ehdr) *rtld_ehdr = &__ehdr_start; +- assert (rtld_ehdr->e_ehsize == sizeof *rtld_ehdr); +- assert (rtld_ehdr->e_phentsize == sizeof (ElfW(Phdr))); +- +- const ElfW(Phdr) *rtld_phdr = (const void *) rtld_ehdr + rtld_ehdr->e_phoff; +- +- GL(dl_rtld_map).l_phdr = rtld_phdr; +- GL(dl_rtld_map).l_phnum = rtld_ehdr->e_phnum; +- +- +- /* PT_GNU_RELRO is usually the last phdr. */ +- size_t cnt = rtld_ehdr->e_phnum; +- while (cnt-- > 0) +- if (rtld_phdr[cnt].p_type == PT_GNU_RELRO) +- { +- GL(dl_rtld_map).l_relro_addr = rtld_phdr[cnt].p_vaddr; +- GL(dl_rtld_map).l_relro_size = rtld_phdr[cnt].p_memsz; +- break; +- } ++ rtld_setup_phdr (); + + /* Add the dynamic linker to the TLS list if it also uses TLS. */ + if (GL(dl_rtld_map).l_tls_blocksize != 0) diff --git a/glibc-upstream-2.39-219.patch b/glibc-upstream-2.39-219.patch new file mode 100644 index 0000000..f2ea96e --- /dev/null +++ b/glibc-upstream-2.39-219.patch @@ -0,0 +1,448 @@ +commit 49f0e73fa3279465f4c9d86a286c3812cc377061 +Author: Florian Weimer +Date: Fri Aug 1 12:19:49 2025 +0200 + + elf: Handle ld.so with LOAD segment gaps in _dl_find_object (bug 31943) + + Detect if ld.so not contiguous and handle that case in _dl_find_object. + Set l_find_object_processed even for initially loaded link maps, + otherwise dlopen of an initially loaded object adds it to + _dlfo_loaded_mappings (where maps are expected to be contiguous), + in addition to _dlfo_nodelete_mappings. + + Test elf/tst-link-map-contiguous-ldso iterates over the loader + image, reading every word to make sure memory is actually mapped. + It only does that if the l_contiguous flag is set for the link map. + Otherwise, it finds gaps with mmap and checks that _dl_find_object + does not return the ld.so mapping for them. + + The test elf/tst-link-map-contiguous-main does the same thing for + the libc.so shared object. This only works if the kernel loaded + the main program because the glibc dynamic loader may fill + the gaps with PROT_NONE mappings in some cases, making it contiguous, + but accesses to individual words may still fault. + + Test elf/tst-link-map-contiguous-libc is again slightly different + because the dynamic loader always fills the gaps with PROT_NONE + mappings, so a different form of probing has to be used. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 20681be149b9eb1b6c1f4246bf4bd801221c86cd) + +diff --git a/elf/Makefile b/elf/Makefile +index 92da608da1ebc175..3085a0844c6604fe 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -514,6 +514,8 @@ tests-internal += \ + tst-dl_find_object \ + tst-dl_find_object-threads \ + tst-dlmopen2 \ ++ tst-link-map-contiguous-ldso \ ++ tst-link-map-contiguous-libc \ + tst-ptrguard1 \ + tst-stackguard1 \ + tst-tls-surplus \ +@@ -525,6 +527,10 @@ tests-internal += \ + unload2 \ + # tests-internal + ++ifeq ($(build-hardcoded-path-in-tests),yes) ++tests-internal += tst-link-map-contiguous-main ++endif ++ + tests-container += \ + tst-dlopen-self-container \ + tst-dlopen-tlsmodid-container \ +diff --git a/elf/dl-find_object.c b/elf/dl-find_object.c +index 940fa5c2236af666..0e45f0af32c9e6b4 100644 +--- a/elf/dl-find_object.c ++++ b/elf/dl-find_object.c +@@ -465,6 +465,37 @@ _dl_find_object (void *pc1, struct dl_find_object *result) + } + rtld_hidden_def (_dl_find_object) + ++/* Subroutine of _dlfo_process_initial to split out noncontigous link ++ maps. NODELETE is the number of used _dlfo_nodelete_mappings ++ elements. It is incremented as needed, and the new NODELETE value ++ is returned. */ ++static size_t ++_dlfo_process_initial_noncontiguous_map (struct link_map *map, ++ size_t nodelete) ++{ ++ struct dl_find_object_internal dlfo; ++ _dl_find_object_from_map (map, &dlfo); ++ ++ /* PT_LOAD segments for a non-contiguous link map are added to the ++ non-closeable mappings. */ ++ const ElfW(Phdr) *ph = map->l_phdr; ++ const ElfW(Phdr) *ph_end = map->l_phdr + map->l_phnum; ++ for (; ph < ph_end; ++ph) ++ if (ph->p_type == PT_LOAD) ++ { ++ if (_dlfo_nodelete_mappings != NULL) ++ { ++ /* Second pass only. */ ++ _dlfo_nodelete_mappings[nodelete] = dlfo; ++ ElfW(Addr) start = ph->p_vaddr + map->l_addr; ++ _dlfo_nodelete_mappings[nodelete].map_start = start; ++ _dlfo_nodelete_mappings[nodelete].map_end = start + ph->p_memsz; ++ } ++ ++nodelete; ++ } ++ return nodelete; ++} ++ + /* _dlfo_process_initial is called twice. First to compute the array + sizes from the initial loaded mappings. Second to fill in the + bases and infos arrays with the (still unsorted) data. Returns the +@@ -476,29 +507,8 @@ _dlfo_process_initial (void) + + size_t nodelete = 0; + if (!main_map->l_contiguous) +- { +- struct dl_find_object_internal dlfo; +- _dl_find_object_from_map (main_map, &dlfo); +- +- /* PT_LOAD segments for a non-contiguous are added to the +- non-closeable mappings. */ +- for (const ElfW(Phdr) *ph = main_map->l_phdr, +- *ph_end = main_map->l_phdr + main_map->l_phnum; +- ph < ph_end; ++ph) +- if (ph->p_type == PT_LOAD) +- { +- if (_dlfo_nodelete_mappings != NULL) +- { +- /* Second pass only. */ +- _dlfo_nodelete_mappings[nodelete] = dlfo; +- _dlfo_nodelete_mappings[nodelete].map_start +- = ph->p_vaddr + main_map->l_addr; +- _dlfo_nodelete_mappings[nodelete].map_end +- = _dlfo_nodelete_mappings[nodelete].map_start + ph->p_memsz; +- } +- ++nodelete; +- } +- } ++ /* Contiguous case already handled in _dl_find_object_init. */ ++ nodelete = _dlfo_process_initial_noncontiguous_map (main_map, nodelete); + + size_t loaded = 0; + for (Lmid_t ns = 0; ns < GL(dl_nns); ++ns) +@@ -510,11 +520,22 @@ _dlfo_process_initial (void) + /* lt_library link maps are implicitly NODELETE. */ + if (l->l_type == lt_library || l->l_nodelete_active) + { +- if (_dlfo_nodelete_mappings != NULL) +- /* Second pass only. */ +- _dl_find_object_from_map +- (l, _dlfo_nodelete_mappings + nodelete); +- ++nodelete; ++ /* The kernel may have loaded ld.so with gaps. */ ++ if (!l->l_contiguous ++#ifdef SHARED ++ && l == &GL(dl_rtld_map) ++#endif ++ ) ++ nodelete ++ = _dlfo_process_initial_noncontiguous_map (l, nodelete); ++ else ++ { ++ if (_dlfo_nodelete_mappings != NULL) ++ /* Second pass only. */ ++ _dl_find_object_from_map ++ (l, _dlfo_nodelete_mappings + nodelete); ++ ++nodelete; ++ } + } + else if (l->l_type == lt_loaded) + { +@@ -756,7 +777,6 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count) + /* Prefer newly loaded link map. */ + assert (loaded_index1 > 0); + _dl_find_object_from_map (loaded[loaded_index1 - 1], dlfo); +- loaded[loaded_index1 - 1]->l_find_object_processed = 1; + --loaded_index1; + } + +diff --git a/elf/dl-find_object.h b/elf/dl-find_object.h +index 0915065be065504d..8894c6657c9dc309 100644 +--- a/elf/dl-find_object.h ++++ b/elf/dl-find_object.h +@@ -87,7 +87,7 @@ _dl_find_object_to_external (struct dl_find_object_internal *internal, + } + + /* Extract the object location data from a link map and writes it to +- *RESULT using relaxed MO stores. */ ++ *RESULT using relaxed MO stores. Set L->l_find_object_processed. */ + static void __attribute__ ((unused)) + _dl_find_object_from_map (struct link_map *l, + struct dl_find_object_internal *result) +@@ -100,6 +100,8 @@ _dl_find_object_from_map (struct link_map *l, + atomic_store_relaxed (&result->eh_dbase, (void *) l->l_info[DT_PLTGOT]); + #endif + ++ l->l_find_object_processed = 1; ++ + for (const ElfW(Phdr) *ph = l->l_phdr, *ph_end = l->l_phdr + l->l_phnum; + ph < ph_end; ++ph) + if (ph->p_type == DLFO_EH_SEGMENT_TYPE) +diff --git a/elf/rtld.c b/elf/rtld.c +index 3bf9707e0007bc83..4760633866cf9159 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1286,7 +1286,7 @@ rtld_setup_main_map (struct link_map *main_map) + + /* Set up the program header information for the dynamic linker + itself. It can be accessed via _r_debug and dl_iterate_phdr +- callbacks. */ ++ callbacks, and it is used by _dl_find_object. */ + static void + rtld_setup_phdr (void) + { +@@ -1304,6 +1304,29 @@ rtld_setup_phdr (void) + GL(dl_rtld_map).l_phnum = rtld_ehdr->e_phnum; + + ++ GL(dl_rtld_map).l_contiguous = 1; ++ /* The linker may not have produced a contiguous object. The kernel ++ will load the object with actual gaps (unlike the glibc loader ++ for shared objects, which always produces a contiguous mapping). ++ See similar logic in rtld_setup_main_map above. */ ++ { ++ ElfW(Addr) expected_load_address = 0; ++ for (const ElfW(Phdr) *ph = rtld_phdr; ph < &rtld_phdr[rtld_ehdr->e_phnum]; ++ ++ph) ++ if (ph->p_type == PT_LOAD) ++ { ++ ElfW(Addr) mapstart = ph->p_vaddr & ~(GLRO(dl_pagesize) - 1); ++ if (GL(dl_rtld_map).l_contiguous && expected_load_address != 0 ++ && expected_load_address != mapstart) ++ GL(dl_rtld_map).l_contiguous = 0; ++ ElfW(Addr) allocend = ph->p_vaddr + ph->p_memsz; ++ /* The next expected address is the page following this load ++ segment. */ ++ expected_load_address = ((allocend + GLRO(dl_pagesize) - 1) ++ & ~(GLRO(dl_pagesize) - 1)); ++ } ++ } ++ + /* PT_GNU_RELRO is usually the last phdr. */ + size_t cnt = rtld_ehdr->e_phnum; + while (cnt-- > 0) +diff --git a/elf/tst-link-map-contiguous-ldso.c b/elf/tst-link-map-contiguous-ldso.c +new file mode 100644 +index 0000000000000000..04de808bb234fe38 +--- /dev/null ++++ b/elf/tst-link-map-contiguous-ldso.c +@@ -0,0 +1,98 @@ ++/* Check that _dl_find_object behavior matches up with gaps. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ struct link_map *l = xdlopen (LD_SO, RTLD_NOW); ++ if (!l->l_contiguous) ++ { ++ puts ("info: ld.so link map is not contiguous"); ++ ++ /* Try to find holes by probing with mmap. */ ++ int pagesize = getpagesize (); ++ bool gap_found = false; ++ ElfW(Addr) addr = l->l_map_start; ++ TEST_COMPARE (addr % pagesize, 0); ++ while (addr < l->l_map_end) ++ { ++ void *expected = (void *) addr; ++ void *ptr = xmmap (expected, 1, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, -1); ++ struct dl_find_object dlfo; ++ int dlfo_ret = _dl_find_object (expected, &dlfo); ++ if (ptr == expected) ++ { ++ if (dlfo_ret < 0) ++ { ++ TEST_COMPARE (dlfo_ret, -1); ++ printf ("info: hole without mapping data found at %p\n", ptr); ++ } ++ else ++ FAIL ("object \"%s\" found in gap at %p", ++ dlfo.dlfo_link_map->l_name, ptr); ++ gap_found = true; ++ } ++ else if (dlfo_ret == 0) ++ { ++ if ((void *) dlfo.dlfo_link_map != (void *) l) ++ { ++ printf ("info: object \"%s\" found at %p\n", ++ dlfo.dlfo_link_map->l_name, ptr); ++ gap_found = true; ++ } ++ } ++ else ++ TEST_COMPARE (dlfo_ret, -1); ++ xmunmap (ptr, 1); ++ addr += pagesize; ++ } ++ if (!gap_found) ++ FAIL ("no ld.so gap found"); ++ } ++ else ++ { ++ puts ("info: ld.so link map is contiguous"); ++ ++ /* Assert that ld.so is truly contiguous in memory. */ ++ volatile long int *p = (volatile long int *) l->l_map_start; ++ volatile long int *end = (volatile long int *) l->l_map_end; ++ while (p < end) ++ { ++ *p; ++ ++p; ++ } ++ } ++ ++ xdlclose (l); ++ ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-link-map-contiguous-libc.c b/elf/tst-link-map-contiguous-libc.c +new file mode 100644 +index 0000000000000000..eb5728c765ac3cfb +--- /dev/null ++++ b/elf/tst-link-map-contiguous-libc.c +@@ -0,0 +1,57 @@ ++/* Check that the entire libc.so program image is readable if contiguous. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ struct link_map *l = xdlopen (LIBC_SO, RTLD_NOW); ++ ++ /* The dynamic loader fills holes with PROT_NONE mappings. */ ++ if (!l->l_contiguous) ++ FAIL_EXIT1 ("libc.so link map is not contiguous"); ++ ++ /* Direct probing does not work because not everything is readable ++ due to PROT_NONE mappings. */ ++ int pagesize = getpagesize (); ++ ElfW(Addr) addr = l->l_map_start; ++ TEST_COMPARE (addr % pagesize, 0); ++ while (addr < l->l_map_end) ++ { ++ void *expected = (void *) addr; ++ void *ptr = xmmap (expected, 1, PROT_READ | PROT_WRITE, ++ MAP_PRIVATE | MAP_ANONYMOUS, -1); ++ if (ptr == expected) ++ FAIL ("hole in libc.so memory image after %lu bytes", ++ (unsigned long int) (addr - l->l_map_start)); ++ xmunmap (ptr, 1); ++ addr += pagesize; ++ } ++ ++ xdlclose (l); ++ ++ return 0; ++} ++#include +diff --git a/elf/tst-link-map-contiguous-main.c b/elf/tst-link-map-contiguous-main.c +new file mode 100644 +index 0000000000000000..2d1a054f0fbb0855 +--- /dev/null ++++ b/elf/tst-link-map-contiguous-main.c +@@ -0,0 +1,45 @@ ++/* Check that the entire main program image is readable if contiguous. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ struct link_map *l = xdlopen ("", RTLD_NOW); ++ if (!l->l_contiguous) ++ FAIL_UNSUPPORTED ("main link map is not contiguous"); ++ ++ /* This check only works if the kernel loaded the main program. The ++ dynamic loader replaces gaps with PROT_NONE mappings, resulting ++ in faults. */ ++ volatile long int *p = (volatile long int *) l->l_map_start; ++ volatile long int *end = (volatile long int *) l->l_map_end; ++ while (p < end) ++ { ++ *p; ++ ++p; ++ } ++ ++ xdlclose (l); ++ ++ return 0; ++} ++#include diff --git a/glibc-upstream-2.39-220.patch b/glibc-upstream-2.39-220.patch new file mode 100644 index 0000000..d4cd8f9 --- /dev/null +++ b/glibc-upstream-2.39-220.patch @@ -0,0 +1,41 @@ +commit fca59375106e798911f3793768e94ee114542e3e +Author: Carlos O'Donell +Date: Thu Jun 8 06:43:44 2023 -0400 + + ctype: Reformat Makefile. + + Reflow and sort Makefile. + + Code generation changes present due to link order changes. + + No regressions on x86_64 and i686. + + (cherry picked from commit 12956e0a330e3d90fc196f7d7a047ce613f78920) + +diff --git a/ctype/Makefile b/ctype/Makefile +index 717d02012942e0b9..3e09938bd1bb1522 100644 +--- a/ctype/Makefile ++++ b/ctype/Makefile +@@ -24,9 +24,18 @@ include ../Makeconfig + + headers := ctype.h + +-routines := ctype ctype-c99 ctype-extn ctype-c99_l ctype_l isctype +-aux := ctype-info +- +-tests := test_ctype ++routines := \ ++ ctype \ ++ ctype-c99 \ ++ ctype-c99_l \ ++ ctype-extn \ ++ ctype_l \ ++ isctype \ ++ # routines ++aux := ctype-info ++ ++tests := \ ++ test_ctype \ ++ # tests + + include ../Rules diff --git a/glibc-upstream-2.39-221.patch b/glibc-upstream-2.39-221.patch new file mode 100644 index 0000000..e1b3b2e --- /dev/null +++ b/glibc-upstream-2.39-221.patch @@ -0,0 +1,296 @@ +commit fbdf9680cc67d5646607c3d6fdc146fedc383a2a +Author: Florian Weimer +Date: Fri May 16 19:53:09 2025 +0200 + + Remove + + Use __thread variables directly instead. The macros do not save any + typing. It seems unlikely that a future port will lack __thread + variable support. + + Some of the __libc_tsd_* variables are referenced from assembler + files, so keep their names. Previously, included + , which in turn included , so a few direct includes + of are now required. + + Reviewed-by: Frédéric Bérat + (cherry picked from commit 10a66a8e421b09682b774c795ef1da402235dddc) + +diff --git a/ctype/ctype-info.c b/ctype/ctype-info.c +index 9032547567b2c348..71d1c8e3b4660d54 100644 +--- a/ctype/ctype-info.c ++++ b/ctype/ctype-info.c +@@ -19,20 +19,20 @@ + #include + #include + +-__libc_tsd_define (, const uint16_t *, CTYPE_B) +-__libc_tsd_define (, const int32_t *, CTYPE_TOLOWER) +-__libc_tsd_define (, const int32_t *, CTYPE_TOUPPER) ++__thread const uint16_t * __libc_tsd_CTYPE_B; ++__thread const int32_t * __libc_tsd_CTYPE_TOLOWER; ++__thread const int32_t * __libc_tsd_CTYPE_TOUPPER; + + + void + __ctype_init (void) + { +- const uint16_t **bp = __libc_tsd_address (const uint16_t *, CTYPE_B); +- *bp = (const uint16_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_CLASS) + 128; +- const int32_t **up = __libc_tsd_address (const int32_t *, CTYPE_TOUPPER); +- *up = ((int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOUPPER) + 128); +- const int32_t **lp = __libc_tsd_address (const int32_t *, CTYPE_TOLOWER); +- *lp = ((int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOLOWER) + 128); ++ __libc_tsd_CTYPE_B ++ = ((const uint16_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_CLASS)) + 128; ++ __libc_tsd_CTYPE_TOUPPER ++ = ((const int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOUPPER)) + 128; ++ __libc_tsd_CTYPE_TOLOWER = ++ ((const int32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TOLOWER)) + 128; + } + libc_hidden_def (__ctype_init) + +diff --git a/include/ctype.h b/include/ctype.h +index 493a6f80ce8e8b8e..e993adc86da43b7c 100644 +--- a/include/ctype.h ++++ b/include/ctype.h +@@ -24,33 +24,32 @@ libc_hidden_proto (toupper) + NL_CURRENT_INDIRECT. */ + + # include "../locale/localeinfo.h" +-# include + + # ifndef CTYPE_EXTERN_INLINE /* Used by ctype/ctype-info.c, which see. */ + # define CTYPE_EXTERN_INLINE extern inline + # endif + +-__libc_tsd_define (extern, const uint16_t *, CTYPE_B) +-__libc_tsd_define (extern, const int32_t *, CTYPE_TOUPPER) +-__libc_tsd_define (extern, const int32_t *, CTYPE_TOLOWER) ++extern __thread const uint16_t * __libc_tsd_CTYPE_B; ++extern __thread const int32_t * __libc_tsd_CTYPE_TOUPPER; ++extern __thread const int32_t * __libc_tsd_CTYPE_TOLOWER; + + + CTYPE_EXTERN_INLINE const uint16_t ** __attribute__ ((const)) + __ctype_b_loc (void) + { +- return __libc_tsd_address (const uint16_t *, CTYPE_B); ++ return &__libc_tsd_CTYPE_B; + } + + CTYPE_EXTERN_INLINE const int32_t ** __attribute__ ((const)) + __ctype_toupper_loc (void) + { +- return __libc_tsd_address (const int32_t *, CTYPE_TOUPPER); ++ return &__libc_tsd_CTYPE_TOUPPER; + } + + CTYPE_EXTERN_INLINE const int32_t ** __attribute__ ((const)) + __ctype_tolower_loc (void) + { +- return __libc_tsd_address (const int32_t *, CTYPE_TOLOWER); ++ return &__libc_tsd_CTYPE_TOLOWER; + } + + # ifndef __NO_CTYPE +diff --git a/include/rpc/rpc.h b/include/rpc/rpc.h +index f5cee6caef6284d2..936ea3cebb8101e1 100644 +--- a/include/rpc/rpc.h ++++ b/include/rpc/rpc.h +@@ -3,8 +3,6 @@ + + # ifndef _ISOMAC + +-#include +- + /* Now define the internal interfaces. */ + extern unsigned long _create_xid (void); + +@@ -47,7 +45,7 @@ extern void __rpc_thread_key_cleanup (void) attribute_hidden; + + extern void __rpc_thread_destroy (void) attribute_hidden; + +-__libc_tsd_define (extern, struct rpc_thread_variables *, RPC_VARS) ++extern __thread struct rpc_thread_variables *__libc_tsd_RPC_VARS; + + #define RPC_THREAD_VARIABLE(x) (__rpc_thread_variables()->x) + +diff --git a/locale/lc-ctype.c b/locale/lc-ctype.c +index c77ec51cb89b839d..70556acaf0dc69bb 100644 +--- a/locale/lc-ctype.c ++++ b/locale/lc-ctype.c +@@ -64,12 +64,9 @@ _nl_postload_ctype (void) + in fact using the global locale. */ + if (_NL_CURRENT_LOCALE == &_nl_global_locale) + { +- __libc_tsd_set (const uint16_t *, CTYPE_B, +- (void *) _nl_global_locale.__ctype_b); +- __libc_tsd_set (const int32_t *, CTYPE_TOUPPER, +- (void *) _nl_global_locale.__ctype_toupper); +- __libc_tsd_set (const int32_t *, CTYPE_TOLOWER, +- (void *) _nl_global_locale.__ctype_tolower); ++ __libc_tsd_CTYPE_B = _nl_global_locale.__ctype_b; ++ __libc_tsd_CTYPE_TOUPPER = _nl_global_locale.__ctype_toupper; ++ __libc_tsd_CTYPE_TOLOWER = _nl_global_locale.__ctype_tolower; + } + + #include +diff --git a/locale/localeinfo.h b/locale/localeinfo.h +index ed698faef1b38003..bc8e92e4dca80d62 100644 +--- a/locale/localeinfo.h ++++ b/locale/localeinfo.h +@@ -236,10 +236,8 @@ extern struct __locale_struct _nl_global_locale attribute_hidden; + + /* This fetches the thread-local locale_t pointer, either one set with + uselocale or &_nl_global_locale. */ +-#define _NL_CURRENT_LOCALE (__libc_tsd_get (locale_t, LOCALE)) +-#include +-__libc_tsd_define (extern, locale_t, LOCALE) +- ++#define _NL_CURRENT_LOCALE __libc_tsd_LOCALE ++extern __thread locale_t __libc_tsd_LOCALE; + + /* For static linking it is desireable to avoid always linking in the code + and data for every category when we can tell at link time that they are +diff --git a/locale/uselocale.c b/locale/uselocale.c +index 8136caf61b4673fb..0b247a77d5f47f81 100644 +--- a/locale/uselocale.c ++++ b/locale/uselocale.c +@@ -34,7 +34,7 @@ __uselocale (locale_t newloc) + { + const locale_t locobj + = newloc == LC_GLOBAL_LOCALE ? &_nl_global_locale : newloc; +- __libc_tsd_set (locale_t, LOCALE, locobj); ++ __libc_tsd_LOCALE = locobj; + + #ifdef NL_CURRENT_INDIRECT + /* Now we must update all the per-category thread-local variables to +@@ -62,11 +62,9 @@ __uselocale (locale_t newloc) + #endif + + /* Update the special tsd cache of some locale data. */ +- __libc_tsd_set (const uint16_t *, CTYPE_B, (void *) locobj->__ctype_b); +- __libc_tsd_set (const int32_t *, CTYPE_TOLOWER, +- (void *) locobj->__ctype_tolower); +- __libc_tsd_set (const int32_t *, CTYPE_TOUPPER, +- (void *) locobj->__ctype_toupper); ++ __libc_tsd_CTYPE_B = locobj->__ctype_b; ++ __libc_tsd_CTYPE_TOLOWER = locobj->__ctype_tolower; ++ __libc_tsd_CTYPE_TOUPPER = locobj->__ctype_toupper; + } + + return oldloc == &_nl_global_locale ? LC_GLOBAL_LOCALE : oldloc; +diff --git a/stdio-common/printf-parsemb.c b/stdio-common/printf-parsemb.c +index ab9fafb5ecb12f16..8db18f11b32c9433 100644 +--- a/stdio-common/printf-parsemb.c ++++ b/stdio-common/printf-parsemb.c +@@ -17,6 +17,7 @@ + . */ + + #include ++#include + #include + #include + #include +diff --git a/string/strerror.c b/string/strerror.c +index 107d9d39c287bed4..efa4e903ead00a47 100644 +--- a/string/strerror.c ++++ b/string/strerror.c +@@ -21,5 +21,5 @@ + char * + strerror (int errnum) + { +- return __strerror_l (errnum, __libc_tsd_get (locale_t, LOCALE)); ++ return __strerror_l (errnum, __libc_tsd_LOCALE); + } +diff --git a/sunrpc/rpc_thread.c b/sunrpc/rpc_thread.c +index a04b7ec47fa4760c..e20f0a62302eb675 100644 +--- a/sunrpc/rpc_thread.c ++++ b/sunrpc/rpc_thread.c +@@ -3,7 +3,6 @@ + #include + + #include +-#include + #include + #include + +diff --git a/sysdeps/generic/libc-tsd.h b/sysdeps/generic/libc-tsd.h +deleted file mode 100644 +index ac0e99e14b1bcb90..0000000000000000 +--- a/sysdeps/generic/libc-tsd.h ++++ /dev/null +@@ -1,60 +0,0 @@ +-/* libc-internal interface for thread-specific data. Stub or TLS version. +- Copyright (C) 1998-2024 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#ifndef _GENERIC_LIBC_TSD_H +-#define _GENERIC_LIBC_TSD_H 1 +- +-/* This file defines the following macros for accessing a small fixed +- set of thread-specific `void *' data used only internally by libc. +- +- __libc_tsd_define(CLASS, TYPE, KEY) -- Define or declare a datum with TYPE +- for KEY. CLASS can be `static' for +- keys used in only one source file, +- empty for global definitions, or +- `extern' for global declarations. +- __libc_tsd_address(TYPE, KEY) -- Return the `TYPE *' pointing to +- the current thread's datum for KEY. +- __libc_tsd_get(TYPE, KEY) -- Return the `TYPE' datum for KEY. +- __libc_tsd_set(TYPE, KEY, VALUE) -- Set the datum for KEY to VALUE. +- +- The set of available KEY's will usually be provided as an enum, +- and contains (at least): +- _LIBC_TSD_KEY_MALLOC +- _LIBC_TSD_KEY_DL_ERROR +- _LIBC_TSD_KEY_RPC_VARS +- All uses must be the literal _LIBC_TSD_* name in the __libc_tsd_* macros. +- Some implementations may not provide any enum at all and instead +- using string pasting in the macros. */ +- +-#include +- +-/* When full support for __thread variables is available, this interface is +- just a trivial wrapper for it. Without TLS, this is the generic/stub +- implementation for wholly single-threaded systems. +- +- We don't define an enum for the possible key values, because the KEYs +- translate directly into variables by macro magic. */ +- +-#define __libc_tsd_define(CLASS, TYPE, KEY) \ +- CLASS __thread TYPE __libc_tsd_##KEY attribute_tls_model_ie; +- +-#define __libc_tsd_address(TYPE, KEY) (&__libc_tsd_##KEY) +-#define __libc_tsd_get(TYPE, KEY) (__libc_tsd_##KEY) +-#define __libc_tsd_set(TYPE, KEY, VALUE) (__libc_tsd_##KEY = (VALUE)) +- +-#endif /* libc-tsd.h */ +diff --git a/time/strftime_l.c b/time/strftime_l.c +index 77adec905007d53a..066c839c2feccdc1 100644 +--- a/time/strftime_l.c ++++ b/time/strftime_l.c +@@ -40,6 +40,7 @@ + #endif + + #include ++#include + #include /* Some systems define `time_t' here. */ + + #ifdef TIME_WITH_SYS_TIME diff --git a/glibc-upstream-2.39-222.patch b/glibc-upstream-2.39-222.patch new file mode 100644 index 0000000..ad625e7 --- /dev/null +++ b/glibc-upstream-2.39-222.patch @@ -0,0 +1,67 @@ +commit 25c537c3b3933663642874e332c73c6f65e3ddea +Author: Florian Weimer +Date: Fri May 16 19:53:09 2025 +0200 + + Use proper extern declaration for _nl_C_LC_CTYPE_{class,toupper,tolower} + + The existing initializers already contain explicit casts. Keep them + due to int/uint32_t mismatch. + + Reviewed-by: Frédéric Bérat + (cherry picked from commit e0c0f856f58ceb68800a964c36c15c606e7a8c4c) + +diff --git a/ctype/ctype-info.c b/ctype/ctype-info.c +index 71d1c8e3b4660d54..94e312d91ff76323 100644 +--- a/ctype/ctype-info.c ++++ b/ctype/ctype-info.c +@@ -41,10 +41,7 @@ libc_hidden_def (__ctype_init) + #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) + + /* Defined in locale/C-ctype.c. */ +-extern const char _nl_C_LC_CTYPE_class[] attribute_hidden; + extern const char _nl_C_LC_CTYPE_class32[] attribute_hidden; +-extern const char _nl_C_LC_CTYPE_toupper[] attribute_hidden; +-extern const char _nl_C_LC_CTYPE_tolower[] attribute_hidden; + extern const char _nl_C_LC_CTYPE_class_upper[] attribute_hidden; + extern const char _nl_C_LC_CTYPE_class_lower[] attribute_hidden; + extern const char _nl_C_LC_CTYPE_class_alpha[] attribute_hidden; +diff --git a/include/ctype.h b/include/ctype.h +index e993adc86da43b7c..ae078a63d355af61 100644 +--- a/include/ctype.h ++++ b/include/ctype.h +@@ -63,6 +63,11 @@ __ctype_tolower_loc (void) + # define __isdigit_l(c, l) ({ int __c = (c); __c >= '0' && __c <= '9'; }) + # endif /* Not __NO_CTYPE. */ + ++/* For use in initializers. */ ++extern const char _nl_C_LC_CTYPE_class[] attribute_hidden; ++extern const uint32_t _nl_C_LC_CTYPE_toupper[] attribute_hidden; ++extern const uint32_t _nl_C_LC_CTYPE_tolower[] attribute_hidden; ++ + # endif /* IS_IN (libc). */ + #endif /* Not _ISOMAC. */ + +diff --git a/locale/xlocale.c b/locale/xlocale.c +index f2b9d03303e6653d..d11c1cbf8c65ad54 100644 +--- a/locale/xlocale.c ++++ b/locale/xlocale.c +@@ -18,18 +18,13 @@ + + #include + #include "localeinfo.h" ++#include + + #define DEFINE_CATEGORY(category, category_name, items, a) \ + extern struct __locale_data _nl_C_##category; + #include "categories.def" + #undef DEFINE_CATEGORY + +-/* Defined in locale/C-ctype.c. */ +-extern const char _nl_C_LC_CTYPE_class[] attribute_hidden; +-extern const char _nl_C_LC_CTYPE_toupper[] attribute_hidden; +-extern const char _nl_C_LC_CTYPE_tolower[] attribute_hidden; +- +- + const struct __locale_struct _nl_C_locobj attribute_hidden = + { + .__locales = diff --git a/glibc-upstream-2.39-223.patch b/glibc-upstream-2.39-223.patch new file mode 100644 index 0000000..7cbb71b --- /dev/null +++ b/glibc-upstream-2.39-223.patch @@ -0,0 +1,198 @@ +commit c11950503fbb8b5885a0400d1a7ba83a80878e53 +Author: Florian Weimer +Date: Fri May 16 19:53:09 2025 +0200 + + ctype: Fallback initialization of TLS using relocations (bug 19341, bug 32483) + + This ensures that the ctype data pointers in TLS are valid + in secondary namespaces even without initialization via + __ctype_init. + + Reviewed-by: Frédéric Bérat + (cherry picked from commit 2745db8dd3ec31045acd761b612516490085bc20) + +diff --git a/ctype/Makefile b/ctype/Makefile +index 3e09938bd1bb1522..b7cd5f2282b4511c 100644 +--- a/ctype/Makefile ++++ b/ctype/Makefile +@@ -36,6 +36,23 @@ aux := ctype-info + + tests := \ + test_ctype \ ++ tst-ctype-tls-dlmopen \ ++ tst-ctype-tls-dlopen-static \ + # tests + ++tests-static := \ ++ tst-ctype-tls-dlopen-static \ ++ # tests-static ++ ++modules-names := \ ++ tst-ctype-tls-mod \ ++ # modules-names ++ + include ../Rules ++ ++$(objpfx)tst-ctype-tls-dlmopen: $(shared-thread-library) ++$(objpfx)tst-ctype-tls-dlmopen.out: $(objpfx)tst-ctype-tls-mod.so ++$(objpfx)tst-ctype-tls-dlopen-static: $(static-thread-library) ++$(objpfx)tst-ctype-tls-dlopen-static.out: $(objpfx)tst-ctype-tls-mod.so ++tst-ctype-tls-dlopen-static-ENV = \ ++ LD_LIBRARY_PATH=$(ld-library-path):$(common-objpfx):$(common-objpfx)elf +diff --git a/ctype/ctype-info.c b/ctype/ctype-info.c +index 94e312d91ff76323..621bd4239c8b4c21 100644 +--- a/ctype/ctype-info.c ++++ b/ctype/ctype-info.c +@@ -19,9 +19,17 @@ + #include + #include + +-__thread const uint16_t * __libc_tsd_CTYPE_B; +-__thread const int32_t * __libc_tsd_CTYPE_TOLOWER; +-__thread const int32_t * __libc_tsd_CTYPE_TOUPPER; ++/* Fallback initialization using relocations. See the _nl_C_locobj ++ initializers in locale/xlocale.c. Usually, this is overwritten by ++ __ctype_init before user code runs, but this does not happen for ++ threads in secondary namespaces. With the initializers, secondary ++ namespaces at least get locale data from the C locale. */ ++__thread const uint16_t * __libc_tsd_CTYPE_B ++ = (const uint16_t *) _nl_C_LC_CTYPE_class + 128; ++__thread const int32_t * __libc_tsd_CTYPE_TOLOWER ++ = (const int32_t *) _nl_C_LC_CTYPE_tolower + 128; ++__thread const int32_t * __libc_tsd_CTYPE_TOUPPER ++ = (const int32_t *) _nl_C_LC_CTYPE_toupper + 128; + + + void +diff --git a/ctype/tst-ctype-tls-dlmopen.c b/ctype/tst-ctype-tls-dlmopen.c +new file mode 100644 +index 0000000000000000..f7eeb65551344b72 +--- /dev/null ++++ b/ctype/tst-ctype-tls-dlmopen.c +@@ -0,0 +1,2 @@ ++#define DO_STATIC_TEST 0 ++#include "tst-ctype-tls-skeleton.c" +diff --git a/ctype/tst-ctype-tls-dlopen-static.c b/ctype/tst-ctype-tls-dlopen-static.c +new file mode 100644 +index 0000000000000000..c2c09c362cc95906 +--- /dev/null ++++ b/ctype/tst-ctype-tls-dlopen-static.c +@@ -0,0 +1,2 @@ ++#define DO_STATIC_TEST 1 ++#include "tst-ctype-tls-skeleton.c" +diff --git a/ctype/tst-ctype-tls-mod.c b/ctype/tst-ctype-tls-mod.c +new file mode 100644 +index 0000000000000000..52cbb9dcb67e1800 +--- /dev/null ++++ b/ctype/tst-ctype-tls-mod.c +@@ -0,0 +1,37 @@ ++/* Wrappers for macros in a secondary namespace. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++int ++my_isalpha (int ch) ++{ ++ return isalpha (ch); ++} ++ ++int ++my_toupper (int ch) ++{ ++ return toupper (ch); ++} ++ ++int ++my_tolower (int ch) ++{ ++ return tolower (ch); ++} +diff --git a/ctype/tst-ctype-tls-skeleton.c b/ctype/tst-ctype-tls-skeleton.c +new file mode 100644 +index 0000000000000000..8c53e35899f12b8f +--- /dev/null ++++ b/ctype/tst-ctype-tls-skeleton.c +@@ -0,0 +1,67 @@ ++/* Test that in a secondary namespace works. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Before this file is included, define DO_STATIC_TEST to 0 or 1. ++ With 0, dlmopen is used for the test. With 1, dlopen is used. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static int (*my_isalpha) (int); ++static int (*my_toupper) (int); ++static int (*my_tolower) (int); ++ ++static void * ++checks (void *ignore) ++{ ++ TEST_VERIFY (my_isalpha ('a')); ++ TEST_VERIFY (!my_isalpha ('0')); ++ TEST_COMPARE (my_toupper ('a'), 'A'); ++ TEST_COMPARE (my_toupper ('A'), 'A'); ++ TEST_COMPARE (my_tolower ('a'), 'a'); ++ TEST_COMPARE (my_tolower ('A'), 'a'); ++ return NULL; ++} ++ ++static int ++do_test (void) ++{ ++ char *dso = xasprintf ("%s/ctype/tst-ctype-tls-mod.so", support_objdir_root); ++#if DO_STATIC_TEST ++ void *handle = xdlopen (dso, RTLD_LAZY); ++#else ++ void *handle = xdlmopen (LM_ID_NEWLM, dso, RTLD_LAZY); ++#endif ++ my_isalpha = xdlsym (handle, "my_isalpha"); ++ my_toupper = xdlsym (handle, "my_toupper"); ++ my_tolower = xdlsym (handle, "my_tolower"); ++ ++ checks (NULL); ++ xpthread_join (xpthread_create (NULL, checks, NULL)); ++ ++ xdlclose (handle); ++ free (dso); ++ ++ return 0; ++} ++ ++#include diff --git a/glibc-upstream-2.39-224.patch b/glibc-upstream-2.39-224.patch new file mode 100644 index 0000000..9b9edbd --- /dev/null +++ b/glibc-upstream-2.39-224.patch @@ -0,0 +1,40 @@ +commit 20d2d69a2fcb3357127bea4536c8c85b183dc3b9 +Author: Jens Remus +Date: Fri Jul 25 15:40:03 2025 +0200 + + Use TLS initial-exec model for __libc_tsd_CTYPE_* thread variables [BZ #33234] + + Commit 10a66a8e421b ("Remove ") removed the TLS initial-exec + (IE) model attribute from the __libc_tsd_CTYPE_* thread variable declarations + and definitions. Commit a894f04d8776 ("Optimize __libc_tsd_* thread + variable access") restored it on declarations. + + Restore the TLS initial-exec model attribute on __libc_tsd_CTYPE_* thread + variable definitions. + + This resolves test tst-locale1 failure on s390 32-bit, when using a + GNU linker without the fix from GNU binutils commit aefebe82dc89 + ("IBM zSystems: Fix offset relative to static TLS"). + + Reviewed-by: Florian Weimer + (cherry picked from commit e5363e6f460c2d58809bf10fc96d70fd1ef8b5b2) + +diff --git a/ctype/ctype-info.c b/ctype/ctype-info.c +index 621bd4239c8b4c21..b6cdf7eb667a7118 100644 +--- a/ctype/ctype-info.c ++++ b/ctype/ctype-info.c +@@ -24,11 +24,11 @@ + __ctype_init before user code runs, but this does not happen for + threads in secondary namespaces. With the initializers, secondary + namespaces at least get locale data from the C locale. */ +-__thread const uint16_t * __libc_tsd_CTYPE_B ++__thread const uint16_t * __libc_tsd_CTYPE_B attribute_tls_model_ie + = (const uint16_t *) _nl_C_LC_CTYPE_class + 128; +-__thread const int32_t * __libc_tsd_CTYPE_TOLOWER ++__thread const int32_t * __libc_tsd_CTYPE_TOLOWER attribute_tls_model_ie + = (const int32_t *) _nl_C_LC_CTYPE_tolower + 128; +-__thread const int32_t * __libc_tsd_CTYPE_TOUPPER ++__thread const int32_t * __libc_tsd_CTYPE_TOUPPER attribute_tls_model_ie + = (const int32_t *) _nl_C_LC_CTYPE_toupper + 128; + + diff --git a/glibc-upstream-2.39-225.patch b/glibc-upstream-2.39-225.patch new file mode 100644 index 0000000..fa133fa --- /dev/null +++ b/glibc-upstream-2.39-225.patch @@ -0,0 +1,544 @@ +commit d1c1f78e9eb9ff5e8eeae21ec9a879b7d0095c2e +Author: Joe Ramsay +Date: Fri Jan 3 19:13:36 2025 +0000 + + math: Remove no-mathvec flag + + More routines are to follow, some of which hit many failures in the + current testsuite due to wrong sign of zero (mathvec routines are not + required to get this right). Instead of disabling a large number of + tests, change the failure condition such that, for vector routines, + tests pass as long as computed == expected == 0.0, regardless of sign. + + Affected tests (vector tests for expm1, log1p, sin, tan and tanh) all + still pass. + + (cherry picked from commit 939e770e0196ebd763cacc602421b76d62df0798) + +diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in +index d728f9777015d3b9..5a690023e9a675cb 100644 +--- a/math/auto-libm-test-in ++++ b/math/auto-libm-test-in +@@ -5354,7 +5354,7 @@ exp2 -0x4.8ce878p-4 + exp2 0xf.93d18bf7be8d272p-4 + + expm1 0 +-expm1 -0 no-mathvec ++expm1 -0 + expm1 1 + expm1 0.75 + expm1 2 +@@ -5419,7 +5419,7 @@ expm1 -0x1p-100 + expm1 0x1p-600 + expm1 -0x1p-600 + expm1 0x1p-10000 +-expm1 -0x1p-10000 no-mathvec ++expm1 -0x1p-10000 + expm1 0xe.4152ac57cd1ea7ap-60 + expm1 0x6.660247486aed8p-4 + expm1 0x6.289a78p-4 +@@ -6577,7 +6577,7 @@ log10 0xf.bf1b2p-4 + log10 0x1.6b5f7ap+96 + + log1p 0 +-log1p -0 no-mathvec ++log1p -0 + log1p e-1 + log1p -0.25 + log1p -0.875 +@@ -7318,7 +7318,7 @@ pow 0x1.7ac7cp+5 23 + pow -0x1.7ac7cp+5 23 + + sin 0 +-sin -0 no-mathvec ++sin -0 + sin pi/6 + sin -pi/6 + sin pi/2 +@@ -7655,7 +7655,7 @@ sqrt min + sqrt min_subnorm + + tan 0 +-tan -0 no-mathvec ++tan -0 + tan pi/4 + tan pi/2 + tan -pi/2 +diff --git a/math/auto-libm-test-out-expm1 b/math/auto-libm-test-out-expm1 +index 91da41b7f604a5a1..8483455801221aac 100644 +--- a/math/auto-libm-test-out-expm1 ++++ b/math/auto-libm-test-out-expm1 +@@ -23,31 +23,31 @@ expm1 0 + = expm1 tonearest ibm128 0x0p+0 : 0x0p+0 : inexact-ok + = expm1 towardzero ibm128 0x0p+0 : 0x0p+0 : inexact-ok + = expm1 upward ibm128 0x0p+0 : 0x0p+0 : inexact-ok +-expm1 -0 no-mathvec +-= expm1 downward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 tonearest binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 towardzero binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 upward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 downward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 tonearest binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 towardzero binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 upward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 downward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 tonearest intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 towardzero intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 upward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 downward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 tonearest m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 towardzero m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 upward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 downward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 tonearest binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 towardzero binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 upward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 downward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 tonearest ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 towardzero ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 upward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok ++expm1 -0 ++= expm1 downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 tonearest intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 towardzero intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 upward intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 downward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 tonearest m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 towardzero m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 upward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 downward binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 tonearest binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 towardzero binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 upward binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 downward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 tonearest ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 towardzero ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 upward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok + expm1 1 + = expm1 downward binary32 0x1p+0 : 0x1.b7e15p+0 : inexact-ok + = expm1 tonearest binary32 0x1p+0 : 0x1.b7e152p+0 : inexact-ok +@@ -1880,87 +1880,87 @@ expm1 0x1p-10000 + = expm1 tonearest binary128 0x1p-10000 : 0x1p-10000 : inexact-ok + = expm1 towardzero binary128 0x1p-10000 : 0x1p-10000 : inexact-ok + = expm1 upward binary128 0x1p-10000 : 0x1.0000000000000000000000000001p-10000 : inexact-ok +-expm1 -0x1p-10000 no-mathvec +-= expm1 downward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 tonearest binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 towardzero binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 upward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 downward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 tonearest binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 towardzero binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 upward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 downward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 tonearest intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 towardzero intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 upward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 downward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 tonearest m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 towardzero m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 upward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 downward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 tonearest binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 towardzero binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 upward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 downward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 tonearest ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 towardzero ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 upward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= expm1 downward binary32 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok underflow errno-erange-ok +-= expm1 tonearest binary32 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok underflow errno-erange-ok +-= expm1 towardzero binary32 -0x8p-152 : -0x0p+0 : no-mathvec inexact-ok underflow errno-erange-ok +-= expm1 upward binary32 -0x8p-152 : -0x0p+0 : no-mathvec inexact-ok underflow errno-erange-ok +-= expm1 downward binary64 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok +-= expm1 tonearest binary64 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok +-= expm1 towardzero binary64 -0x8p-152 : -0x7.ffffffffffffcp-152 : no-mathvec inexact-ok +-= expm1 upward binary64 -0x8p-152 : -0x7.ffffffffffffcp-152 : no-mathvec inexact-ok +-= expm1 downward intel96 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok +-= expm1 tonearest intel96 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok +-= expm1 towardzero intel96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : no-mathvec inexact-ok +-= expm1 upward intel96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : no-mathvec inexact-ok +-= expm1 downward m68k96 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok +-= expm1 tonearest m68k96 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok +-= expm1 towardzero m68k96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : no-mathvec inexact-ok +-= expm1 upward m68k96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : no-mathvec inexact-ok +-= expm1 downward binary128 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok +-= expm1 tonearest binary128 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok +-= expm1 towardzero binary128 -0x8p-152 : -0x7.fffffffffffffffffffffffffffcp-152 : no-mathvec inexact-ok +-= expm1 upward binary128 -0x8p-152 : -0x7.fffffffffffffffffffffffffffcp-152 : no-mathvec inexact-ok +-= expm1 downward ibm128 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok +-= expm1 tonearest ibm128 -0x8p-152 : -0x8p-152 : no-mathvec inexact-ok +-= expm1 towardzero ibm128 -0x8p-152 : -0x7.fffffffffffffffffffffffffep-152 : no-mathvec inexact-ok +-= expm1 upward ibm128 -0x8p-152 : -0x7.fffffffffffffffffffffffffep-152 : no-mathvec inexact-ok +-= expm1 downward binary64 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok underflow errno-erange-ok +-= expm1 tonearest binary64 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok underflow errno-erange-ok +-= expm1 towardzero binary64 -0x4p-1076 : -0x0p+0 : no-mathvec inexact-ok underflow errno-erange-ok +-= expm1 upward binary64 -0x4p-1076 : -0x0p+0 : no-mathvec inexact-ok underflow errno-erange-ok +-= expm1 downward intel96 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok +-= expm1 tonearest intel96 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok +-= expm1 towardzero intel96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : no-mathvec inexact-ok +-= expm1 upward intel96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : no-mathvec inexact-ok +-= expm1 downward m68k96 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok +-= expm1 tonearest m68k96 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok +-= expm1 towardzero m68k96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : no-mathvec inexact-ok +-= expm1 upward m68k96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : no-mathvec inexact-ok +-= expm1 downward binary128 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok +-= expm1 tonearest binary128 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok +-= expm1 towardzero binary128 -0x4p-1076 : -0x3.fffffffffffffffffffffffffffep-1076 : no-mathvec inexact-ok +-= expm1 upward binary128 -0x4p-1076 : -0x3.fffffffffffffffffffffffffffep-1076 : no-mathvec inexact-ok +-= expm1 downward ibm128 -0x4p-1076 : -0x4p-1076 : no-mathvec xfail:ibm128-libgcc inexact-ok underflow errno-erange-ok +-= expm1 tonearest ibm128 -0x4p-1076 : -0x4p-1076 : no-mathvec inexact-ok underflow errno-erange-ok +-= expm1 towardzero ibm128 -0x4p-1076 : -0x0p+0 : no-mathvec xfail:ibm128-libgcc inexact-ok underflow errno-erange-ok +-= expm1 upward ibm128 -0x4p-1076 : -0x0p+0 : no-mathvec xfail:ibm128-libgcc inexact-ok underflow errno-erange-ok +-= expm1 downward intel96 -0x1p-10000 : -0x1p-10000 : no-mathvec inexact-ok +-= expm1 tonearest intel96 -0x1p-10000 : -0x1p-10000 : no-mathvec inexact-ok +-= expm1 towardzero intel96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : no-mathvec inexact-ok +-= expm1 upward intel96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : no-mathvec inexact-ok +-= expm1 downward m68k96 -0x1p-10000 : -0x1p-10000 : no-mathvec inexact-ok +-= expm1 tonearest m68k96 -0x1p-10000 : -0x1p-10000 : no-mathvec inexact-ok +-= expm1 towardzero m68k96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : no-mathvec inexact-ok +-= expm1 upward m68k96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : no-mathvec inexact-ok +-= expm1 downward binary128 -0x1p-10000 : -0x1p-10000 : no-mathvec inexact-ok +-= expm1 tonearest binary128 -0x1p-10000 : -0x1p-10000 : no-mathvec inexact-ok +-= expm1 towardzero binary128 -0x1p-10000 : -0xf.fffffffffffffffffffffffffff8p-10004 : no-mathvec inexact-ok +-= expm1 upward binary128 -0x1p-10000 : -0xf.fffffffffffffffffffffffffff8p-10004 : no-mathvec inexact-ok ++expm1 -0x1p-10000 ++= expm1 downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 tonearest intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 towardzero intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 upward intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 downward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 tonearest m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 towardzero m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 upward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 downward binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 tonearest binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 towardzero binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 upward binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 downward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 tonearest ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 towardzero ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 upward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= expm1 downward binary32 -0x8p-152 : -0x8p-152 : inexact-ok underflow errno-erange-ok ++= expm1 tonearest binary32 -0x8p-152 : -0x8p-152 : inexact-ok underflow errno-erange-ok ++= expm1 towardzero binary32 -0x8p-152 : -0x0p+0 : inexact-ok underflow errno-erange-ok ++= expm1 upward binary32 -0x8p-152 : -0x0p+0 : inexact-ok underflow errno-erange-ok ++= expm1 downward binary64 -0x8p-152 : -0x8p-152 : inexact-ok ++= expm1 tonearest binary64 -0x8p-152 : -0x8p-152 : inexact-ok ++= expm1 towardzero binary64 -0x8p-152 : -0x7.ffffffffffffcp-152 : inexact-ok ++= expm1 upward binary64 -0x8p-152 : -0x7.ffffffffffffcp-152 : inexact-ok ++= expm1 downward intel96 -0x8p-152 : -0x8p-152 : inexact-ok ++= expm1 tonearest intel96 -0x8p-152 : -0x8p-152 : inexact-ok ++= expm1 towardzero intel96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : inexact-ok ++= expm1 upward intel96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : inexact-ok ++= expm1 downward m68k96 -0x8p-152 : -0x8p-152 : inexact-ok ++= expm1 tonearest m68k96 -0x8p-152 : -0x8p-152 : inexact-ok ++= expm1 towardzero m68k96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : inexact-ok ++= expm1 upward m68k96 -0x8p-152 : -0x7.fffffffffffffff8p-152 : inexact-ok ++= expm1 downward binary128 -0x8p-152 : -0x8p-152 : inexact-ok ++= expm1 tonearest binary128 -0x8p-152 : -0x8p-152 : inexact-ok ++= expm1 towardzero binary128 -0x8p-152 : -0x7.fffffffffffffffffffffffffffcp-152 : inexact-ok ++= expm1 upward binary128 -0x8p-152 : -0x7.fffffffffffffffffffffffffffcp-152 : inexact-ok ++= expm1 downward ibm128 -0x8p-152 : -0x8p-152 : inexact-ok ++= expm1 tonearest ibm128 -0x8p-152 : -0x8p-152 : inexact-ok ++= expm1 towardzero ibm128 -0x8p-152 : -0x7.fffffffffffffffffffffffffep-152 : inexact-ok ++= expm1 upward ibm128 -0x8p-152 : -0x7.fffffffffffffffffffffffffep-152 : inexact-ok ++= expm1 downward binary64 -0x4p-1076 : -0x4p-1076 : inexact-ok underflow errno-erange-ok ++= expm1 tonearest binary64 -0x4p-1076 : -0x4p-1076 : inexact-ok underflow errno-erange-ok ++= expm1 towardzero binary64 -0x4p-1076 : -0x0p+0 : inexact-ok underflow errno-erange-ok ++= expm1 upward binary64 -0x4p-1076 : -0x0p+0 : inexact-ok underflow errno-erange-ok ++= expm1 downward intel96 -0x4p-1076 : -0x4p-1076 : inexact-ok ++= expm1 tonearest intel96 -0x4p-1076 : -0x4p-1076 : inexact-ok ++= expm1 towardzero intel96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : inexact-ok ++= expm1 upward intel96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : inexact-ok ++= expm1 downward m68k96 -0x4p-1076 : -0x4p-1076 : inexact-ok ++= expm1 tonearest m68k96 -0x4p-1076 : -0x4p-1076 : inexact-ok ++= expm1 towardzero m68k96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : inexact-ok ++= expm1 upward m68k96 -0x4p-1076 : -0x3.fffffffffffffffcp-1076 : inexact-ok ++= expm1 downward binary128 -0x4p-1076 : -0x4p-1076 : inexact-ok ++= expm1 tonearest binary128 -0x4p-1076 : -0x4p-1076 : inexact-ok ++= expm1 towardzero binary128 -0x4p-1076 : -0x3.fffffffffffffffffffffffffffep-1076 : inexact-ok ++= expm1 upward binary128 -0x4p-1076 : -0x3.fffffffffffffffffffffffffffep-1076 : inexact-ok ++= expm1 downward ibm128 -0x4p-1076 : -0x4p-1076 : xfail:ibm128-libgcc inexact-ok underflow errno-erange-ok ++= expm1 tonearest ibm128 -0x4p-1076 : -0x4p-1076 : inexact-ok underflow errno-erange-ok ++= expm1 towardzero ibm128 -0x4p-1076 : -0x0p+0 : xfail:ibm128-libgcc inexact-ok underflow errno-erange-ok ++= expm1 upward ibm128 -0x4p-1076 : -0x0p+0 : xfail:ibm128-libgcc inexact-ok underflow errno-erange-ok ++= expm1 downward intel96 -0x1p-10000 : -0x1p-10000 : inexact-ok ++= expm1 tonearest intel96 -0x1p-10000 : -0x1p-10000 : inexact-ok ++= expm1 towardzero intel96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : inexact-ok ++= expm1 upward intel96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : inexact-ok ++= expm1 downward m68k96 -0x1p-10000 : -0x1p-10000 : inexact-ok ++= expm1 tonearest m68k96 -0x1p-10000 : -0x1p-10000 : inexact-ok ++= expm1 towardzero m68k96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : inexact-ok ++= expm1 upward m68k96 -0x1p-10000 : -0xf.fffffffffffffffp-10004 : inexact-ok ++= expm1 downward binary128 -0x1p-10000 : -0x1p-10000 : inexact-ok ++= expm1 tonearest binary128 -0x1p-10000 : -0x1p-10000 : inexact-ok ++= expm1 towardzero binary128 -0x1p-10000 : -0xf.fffffffffffffffffffffffffff8p-10004 : inexact-ok ++= expm1 upward binary128 -0x1p-10000 : -0xf.fffffffffffffffffffffffffff8p-10004 : inexact-ok + expm1 0xe.4152ac57cd1ea7ap-60 + = expm1 downward binary32 0xe.4152bp-60 : 0xe.4152bp-60 : inexact-ok + = expm1 tonearest binary32 0xe.4152bp-60 : 0xe.4152bp-60 : inexact-ok +diff --git a/math/auto-libm-test-out-log1p b/math/auto-libm-test-out-log1p +index f83241f51ad9db8b..f7d3b35e6d4465c4 100644 +--- a/math/auto-libm-test-out-log1p ++++ b/math/auto-libm-test-out-log1p +@@ -23,31 +23,31 @@ log1p 0 + = log1p tonearest ibm128 0x0p+0 : 0x0p+0 : inexact-ok + = log1p towardzero ibm128 0x0p+0 : 0x0p+0 : inexact-ok + = log1p upward ibm128 0x0p+0 : 0x0p+0 : inexact-ok +-log1p -0 no-mathvec +-= log1p downward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p tonearest binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p towardzero binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p upward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p downward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p tonearest binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p towardzero binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p upward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p downward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p tonearest intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p towardzero intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p upward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p downward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p tonearest m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p towardzero m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p upward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p downward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p tonearest binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p towardzero binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p upward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p downward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p tonearest ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p towardzero ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= log1p upward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok ++log1p -0 ++= log1p downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p tonearest intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p towardzero intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p upward intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p downward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p tonearest m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p towardzero m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p upward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p downward binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p tonearest binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p towardzero binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p upward binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p downward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p tonearest ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p towardzero ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= log1p upward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok + log1p e-1 + = log1p downward binary32 0x1.b7e152p+0 : 0x1p+0 : inexact-ok + = log1p tonearest binary32 0x1.b7e152p+0 : 0x1p+0 : inexact-ok +diff --git a/math/auto-libm-test-out-sin b/math/auto-libm-test-out-sin +index e1f684528316dde5..f1d21b179c955eb7 100644 +--- a/math/auto-libm-test-out-sin ++++ b/math/auto-libm-test-out-sin +@@ -23,31 +23,31 @@ sin 0 + = sin tonearest ibm128 0x0p+0 : 0x0p+0 : inexact-ok + = sin towardzero ibm128 0x0p+0 : 0x0p+0 : inexact-ok + = sin upward ibm128 0x0p+0 : 0x0p+0 : inexact-ok +-sin -0 no-mathvec +-= sin downward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin tonearest binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin towardzero binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin upward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin downward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin tonearest binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin towardzero binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin upward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin downward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin tonearest intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin towardzero intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin upward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin downward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin tonearest m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin towardzero m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin upward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin downward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin tonearest binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin towardzero binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin upward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin downward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin tonearest ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin towardzero ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= sin upward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok ++sin -0 ++= sin downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin tonearest intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin towardzero intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin upward intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin downward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin tonearest m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin towardzero m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin upward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin downward binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin tonearest binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin towardzero binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin upward binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin downward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin tonearest ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin towardzero ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= sin upward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok + sin pi/6 + = sin downward binary32 0x8.60a92p-4 : 0x8p-4 : inexact-ok + = sin tonearest binary32 0x8.60a92p-4 : 0x8p-4 : inexact-ok +diff --git a/math/auto-libm-test-out-tan b/math/auto-libm-test-out-tan +index f46fdc7ec62075f2..7d00d03e1da81b18 100644 +--- a/math/auto-libm-test-out-tan ++++ b/math/auto-libm-test-out-tan +@@ -23,31 +23,31 @@ tan 0 + = tan tonearest ibm128 0x0p+0 : 0x0p+0 : inexact-ok + = tan towardzero ibm128 0x0p+0 : 0x0p+0 : inexact-ok + = tan upward ibm128 0x0p+0 : 0x0p+0 : inexact-ok +-tan -0 no-mathvec +-= tan downward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan tonearest binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan towardzero binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan upward binary32 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan downward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan tonearest binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan towardzero binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan upward binary64 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan downward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan tonearest intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan towardzero intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan upward intel96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan downward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan tonearest m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan towardzero m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan upward m68k96 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan downward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan tonearest binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan towardzero binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan upward binary128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan downward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan tonearest ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan towardzero ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok +-= tan upward ibm128 -0x0p+0 : -0x0p+0 : no-mathvec inexact-ok ++tan -0 ++= tan downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan tonearest intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan towardzero intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan upward intel96 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan downward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan tonearest m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan towardzero m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan upward m68k96 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan downward binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan tonearest binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan towardzero binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan upward binary128 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan downward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan tonearest ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan towardzero ibm128 -0x0p+0 : -0x0p+0 : inexact-ok ++= tan upward ibm128 -0x0p+0 : -0x0p+0 : inexact-ok + tan pi/4 + = tan downward binary32 0xc.90fdbp-4 : 0x1p+0 : inexact-ok + = tan tonearest binary32 0xc.90fdbp-4 : 0x1p+0 : inexact-ok +diff --git a/math/gen-auto-libm-tests.c b/math/gen-auto-libm-tests.c +index c35242b356821eed..48227248e4aebc90 100644 +--- a/math/gen-auto-libm-tests.c ++++ b/math/gen-auto-libm-tests.c +@@ -96,8 +96,7 @@ + zero and infinite results should be ignored; "xfail" indicates the + test is disabled as expected to produce incorrect results, + "xfail-rounding" indicates the test is disabled only in rounding +- modes other than round-to-nearest; "no-mathvec" indicates the test +- is disabled in vector math libraries. Otherwise, test flags are of ++ modes other than round-to-nearest. Otherwise, test flags are of + the form "spurious-" and "missing-", for any + exception ("overflow", "underflow", "inexact", "invalid", + "divbyzero"), "spurious-errno" and "missing-errno", to indicate +@@ -353,7 +352,6 @@ typedef enum + flag_missing_overflow, + flag_missing_underflow, + flag_missing_errno, +- flag_no_mathvec, + num_input_flag_types, + flag_first_flag = 0, + flag_spurious_first = flag_spurious_divbyzero, +@@ -379,7 +377,6 @@ static const char *const input_flags[num_input_flag_types] = + "missing-overflow", + "missing-underflow", + "missing-errno", +- "no-mathvec", + }; + + /* An input flag, possibly conditional. */ +@@ -2052,7 +2049,6 @@ output_for_one_input_case (FILE *fp, const char *filename, test_function *tf, + { + case flag_ignore_zero_inf_sign: + case flag_xfail: +- case flag_no_mathvec: + if (fprintf (fp, " %s%s", + input_flags[it->flags[i].type], + (it->flags[i].cond +diff --git a/math/gen-libm-test.py b/math/gen-libm-test.py +index 397dbd325930841f..6e8bb564379e2117 100755 +--- a/math/gen-libm-test.py ++++ b/math/gen-libm-test.py +@@ -93,8 +93,7 @@ BEAUTIFY_MAP = {'minus_zero': '-0', + + # Flags in auto-libm-test-out that map directly to C flags. + FLAGS_SIMPLE = {'ignore-zero-inf-sign': 'IGNORE_ZERO_INF_SIGN', +- 'xfail': 'XFAIL_TEST', +- 'no-mathvec': 'NO_TEST_MATHVEC'} ++ 'xfail': 'XFAIL_TEST'} + + # Exceptions in auto-libm-test-out, and their corresponding C flags + # for being required, OK or required to be absent. +diff --git a/math/libm-test-support.c b/math/libm-test-support.c +index 0796f9d4956e3818..3fecd87064666f94 100644 +--- a/math/libm-test-support.c ++++ b/math/libm-test-support.c +@@ -776,7 +776,7 @@ check_float_internal (const char *test_name, FLOAT computed, FLOAT expected, + ulps = ULPDIFF (computed, expected); + set_max_error (ulps, curr_max_error); + print_diff = 1; +- if ((exceptions & IGNORE_ZERO_INF_SIGN) == 0 ++ if (((exceptions & IGNORE_ZERO_INF_SIGN) == 0) && !flag_test_mathvec + && computed == 0.0 && expected == 0.0 + && signbit(computed) != signbit (expected)) + ok = 0; diff --git a/glibc-upstream-2.39-228.patch b/glibc-upstream-2.39-228.patch new file mode 100644 index 0000000..6d2dc6d --- /dev/null +++ b/glibc-upstream-2.39-228.patch @@ -0,0 +1,64 @@ +commit 62ff85fd09ff648183af4265f07dace2879e6d42 +Author: H.J. Lu +Date: Mon Jul 28 12:18:22 2025 -0700 + + x86-64: Add GLIBC_ABI_GNU2_TLS version [BZ #33129] + + Programs and shared libraries compiled with -mtls-dialect=gnu2 may fail + silently at run-time against glibc without the GNU2 TLS run-time fix + for: + + https://sourceware.org/bugzilla/show_bug.cgi?id=31372 + + Add GLIBC_ABI_GNU2_TLS version to indicate that glibc has the working + GNU2 TLS run-time. Linker can add the GLIBC_ABI_GNU2_TLS version to + binaries which depend on the working GNU2 TLS run-time: + + https://sourceware.org/bugzilla/show_bug.cgi?id=33130 + + so that such programs and shared libraries will fail to load and run at + run-time against libc.so without the GLIBC_ABI_GNU2_TLS version, instead + of fail silently at random. + + This fixes BZ #33129. + + Signed-off-by: H.J. Lu + Reviewed-by: Sam James + (cherry picked from commit 9df8fa397d515dc86ff5565f6c45625e672d539e) + +diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile +index 08ec882159990e97..af978601657c2129 100644 +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -209,6 +209,15 @@ LDFLAGS-tst-plt-rewrite2 = -Wl,-z,now + LDFLAGS-tst-plt-rewritemod2.so = -Wl,-z,now,-z,undefs + tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2 + $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so ++ ++tests-special += $(objpfx)check-gnu2-tls.out ++ ++$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so ++ LC_ALL=C $(READELF) -V -W $< \ ++ | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ ++ | grep GLIBC_ABI_GNU2_TLS > $@; \ ++ $(evaluate-test) ++generated += check-gnu2-tls.out + endif + + test-internal-extras += tst-gnu2-tls2mod1 +diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions +index e94758b23643a905..a63c11bcb25adf48 100644 +--- a/sysdeps/x86_64/Versions ++++ b/sysdeps/x86_64/Versions +@@ -5,6 +5,11 @@ libc { + GLIBC_2.13 { + __fentry__; + } ++ GLIBC_ABI_GNU2_TLS { ++ # This symbol is used only for empty version map and will be removed ++ # by scripts/versions.awk. ++ __placeholder_only_for_empty_version_map; ++ } + } + libm { + GLIBC_2.1 { diff --git a/glibc-upstream-2.39-229.patch b/glibc-upstream-2.39-229.patch new file mode 100644 index 0000000..914735e --- /dev/null +++ b/glibc-upstream-2.39-229.patch @@ -0,0 +1,76 @@ +commit 269e89bd8d25a0659c6c963a509e152faefd6ba2 +Author: H.J. Lu +Date: Thu Aug 14 07:03:20 2025 -0700 + + x86-64: Add GLIBC_ABI_DT_X86_64_PLT [BZ #33212] + + When the linker -z mark-plt option is used to add DT_X86_64_PLT, + DT_X86_64_PLTSZ and DT_X86_64_PLTENT, the r_addend field of the + R_X86_64_JUMP_SLOT relocation stores the offset of the indirect + branch instruction. However, glibc versions without the commit: + + commit f8587a61892cbafd98ce599131bf4f103466f084 + Author: H.J. Lu + Date: Fri May 20 19:21:48 2022 -0700 + + x86-64: Ignore r_addend for R_X86_64_GLOB_DAT/R_X86_64_JUMP_SLOT + + According to x86-64 psABI, r_addend should be ignored for R_X86_64_GLOB_DAT + and R_X86_64_JUMP_SLOT. Since linkers always set their r_addends to 0, we + can ignore their r_addends. + + Reviewed-by: Fangrui Song + + won't ignore the r_addend value in the R_X86_64_JUMP_SLOT relocation. + Such programs and shared libraries will fail at run-time randomly. + + Add GLIBC_ABI_DT_X86_64_PLT version to indicate that glibc is compatible + with DT_X86_64_PLT. + + The linker can add the glibc GLIBC_ABI_DT_X86_64_PLT version dependency + whenever -z mark-plt is passed to the linker. The resulting programs and + shared libraries will fail to load at run-time against libc.so without the + GLIBC_ABI_DT_X86_64_PLT version, instead of fail randomly. + + This fixes BZ #33212. + + Signed-off-by: H.J. Lu + Reviewed-by: Sam James + (cherry picked from commit 399384e0c8193e31aea014220ccfa24300ae5938) + +diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile +index af978601657c2129..579bb33ada0e5f16 100644 +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -210,6 +210,15 @@ LDFLAGS-tst-plt-rewritemod2.so = -Wl,-z,now,-z,undefs + tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2 + $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so + ++tests-special += $(objpfx)check-dt-x86-64-plt.out ++ ++$(objpfx)check-dt-x86-64-plt.out: $(common-objpfx)libc.so ++ LC_ALL=C $(READELF) -V -W $< \ ++ | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ ++ | grep GLIBC_ABI_DT_X86_64_PLT > $@; \ ++ $(evaluate-test) ++generated += check-dt-x86-64-plt.out ++ + tests-special += $(objpfx)check-gnu2-tls.out + + $(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so +diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions +index a63c11bcb25adf48..0a759029e5a00cf1 100644 +--- a/sysdeps/x86_64/Versions ++++ b/sysdeps/x86_64/Versions +@@ -10,6 +10,11 @@ libc { + # by scripts/versions.awk. + __placeholder_only_for_empty_version_map; + } ++ GLIBC_ABI_DT_X86_64_PLT { ++ # This symbol is used only for empty version map and will be removed ++ # by scripts/versions.awk. ++ __placeholder_only_for_empty_version_map; ++ } + } + libm { + GLIBC_2.1 { diff --git a/glibc-upstream-2.39-230.patch b/glibc-upstream-2.39-230.patch new file mode 100644 index 0000000..20ab345 --- /dev/null +++ b/glibc-upstream-2.39-230.patch @@ -0,0 +1,103 @@ +commit 9fa7cc6a0b388ed16cf8a8976de5f5882882d503 +Author: Adam Sampson +Date: Mon May 6 18:16:32 2024 +0100 + + ldconfig: Move endswithn into a new header file + + is_gdb_python_file is doing a similar test, so it can use this helper + function as well. + + Signed-off-by: Adam Sampson + Reviewed-by: Adhemerval Zanella + (cherry picked from commit ed2b8d3a866eb37e069f6a71bdf10421cd4c5e54) + +diff --git a/elf/endswith.h b/elf/endswith.h +new file mode 100644 +index 0000000000000000..c6430c48be0c1071 +--- /dev/null ++++ b/elf/endswith.h +@@ -0,0 +1,33 @@ ++/* Copyright (C) 2023-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published ++ by the Free Software Foundation; version 2 of the License, or ++ (at your option) any later version. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, see . */ ++ ++#ifndef _ENDSWITH_H ++#define _ENDSWITH_H ++ ++#include ++ ++/* Return true if the N bytes at NAME end with with the characters in ++ the string SUFFIX. (NAME[N + 1] does not have to be a null byte.) ++ Expected to be called with a string literal for SUFFIX. */ ++static inline bool ++endswithn (const char *name, size_t n, const char *suffix) ++{ ++ return (n >= strlen (suffix) ++ && memcmp (name + n - strlen (suffix), suffix, ++ strlen (suffix)) == 0); ++} ++ ++#endif /* _ENDSWITH_H */ +diff --git a/elf/ldconfig.c b/elf/ldconfig.c +index b64c54b53e1aa5bf..0f3ef707dd2f721d 100644 +--- a/elf/ldconfig.c ++++ b/elf/ldconfig.c +@@ -40,6 +40,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -661,17 +662,6 @@ struct dlib_entry + struct dlib_entry *next; + }; + +-/* Return true if the N bytes at NAME end with with the characters in +- the string SUFFIX. (NAME[N + 1] does not have to be a null byte.) +- Expected to be called with a string literal for SUFFIX. */ +-static inline bool +-endswithn (const char *name, size_t n, const char *suffix) +-{ +- return (n >= strlen (suffix) +- && memcmp (name + n - strlen (suffix), suffix, +- strlen (suffix)) == 0); +-} +- + /* Skip some temporary DSO files. These files may be partially written + and lead to ldconfig crashes when examined. */ + static bool +diff --git a/elf/readlib.c b/elf/readlib.c +index 4d67c7413649be30..32e8b8eb2298c9dd 100644 +--- a/elf/readlib.c ++++ b/elf/readlib.c +@@ -33,6 +33,7 @@ + #include + + #include ++#include + + #define Elf32_CLASS ELFCLASS32 + #define Elf64_CLASS ELFCLASS64 +@@ -48,7 +49,7 @@ static bool + is_gdb_python_file (const char *name) + { + size_t len = strlen (name); +- return len > 7 && strcmp (name + len - 7, "-gdb.py") == 0; ++ return endswithn (name, len, "-gdb.py"); + } + + /* Returns 0 if everything is ok, != 0 in case of error. */ diff --git a/glibc-upstream-2.39-231.patch b/glibc-upstream-2.39-231.patch new file mode 100644 index 0000000..328ae5c --- /dev/null +++ b/glibc-upstream-2.39-231.patch @@ -0,0 +1,242 @@ +commit 6917fde6f9623d0521a9f16c8f10c94ab0f2e4ba +Author: Florian Weimer +Date: Fri Oct 25 16:50:10 2024 +0200 + + elf: Run constructors on cyclic recursive dlopen (bug 31986) + + This is conceptually similar to the reported bug, but does not + depend on auditing. The fix is simple: just complete execution + of the constructors. This exposed the fact that the link map + for statically linked executables does not have l_init_called + set, even though constructors have run. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 9897ced8e78db5d813166a7ccccfd5a42c69ef20) + +diff --git a/elf/Makefile b/elf/Makefile +index 3085a0844c6604fe..7690ee9edc0b0c9a 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -414,6 +414,7 @@ tests += \ + tst-dlmopen1 \ + tst-dlmopen3 \ + tst-dlmopen4 \ ++ tst-dlopen-recurse \ + tst-dlopen-self \ + tst-dlopen-tlsmodid \ + tst-dlopen-tlsreinit1 \ +@@ -858,6 +859,8 @@ modules-names += \ + tst-dlmopen-twice-mod1 \ + tst-dlmopen-twice-mod2 \ + tst-dlmopen1mod \ ++ tst-dlopen-recursemod1 \ ++ tst-dlopen-recursemod2 \ + tst-dlopen-sgid-mod \ + tst-dlopen-tlsreinitmod1 \ + tst-dlopen-tlsreinitmod2 \ +@@ -3145,3 +3148,6 @@ $(objpfx)tst-dlopen-tlsreinit4.out: $(objpfx)tst-auditmod1.so + tst-dlopen-tlsreinit4-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so + + $(objpfx)tst-dlopen-sgid.out: $(objpfx)tst-dlopen-sgid-mod.so ++ ++$(objpfx)tst-dlopen-recurse.out: $(objpfx)tst-dlopen-recursemod1.so ++$(objpfx)tst-dlopen-recursemod1.so: $(objpfx)tst-dlopen-recursemod2.so +diff --git a/elf/dl-open.c b/elf/dl-open.c +index 8556e7bd2fb0b40e..5139d276e04a5d85 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -601,6 +601,14 @@ dl_open_worker_begin (void *a) + = _dl_debug_update (args->nsid)->r_state; + assert (r_state == RT_CONSISTENT); + ++ /* Do not return without calling the (supposedly new) map's ++ constructor. This case occurs if a dependency of a directly ++ opened map has a constructor that calls dlopen again on the ++ initially opened map. The new map is initialized last, so ++ checking only it is enough. */ ++ if (!new->l_init_called) ++ _dl_catch_exception (NULL, call_dl_init, args); ++ + return; + } + +diff --git a/elf/dl-support.c b/elf/dl-support.c +index 451932dd03e971b8..94e8197c632c11c8 100644 +--- a/elf/dl-support.c ++++ b/elf/dl-support.c +@@ -99,6 +99,7 @@ static struct link_map _dl_main_map = + .l_used = 1, + .l_tls_offset = NO_TLS_OFFSET, + .l_serial = 1, ++ .l_init_called = 1, + }; + + /* Namespace information. */ +diff --git a/elf/tst-dlopen-recurse.c b/elf/tst-dlopen-recurse.c +new file mode 100644 +index 0000000000000000..c7fb379d373c6e77 +--- /dev/null ++++ b/elf/tst-dlopen-recurse.c +@@ -0,0 +1,34 @@ ++/* Test that recursive dlopen runs constructors before return (bug 31986). ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ void *handle = xdlopen ("tst-dlopen-recursemod1.so", RTLD_NOW); ++ int *status = dlsym (handle, "recursemod1_status"); ++ printf ("info: recursemod1_status == %d (from main)\n", *status); ++ TEST_COMPARE (*status, 2); ++ xdlclose (handle); ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-dlopen-recursemod1.c b/elf/tst-dlopen-recursemod1.c +new file mode 100644 +index 0000000000000000..5e0cc0eb8c32d6d4 +--- /dev/null ++++ b/elf/tst-dlopen-recursemod1.c +@@ -0,0 +1,50 @@ ++/* Directly opened test module that gets recursively opened again. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++int recursemod1_status; ++ ++/* Force linking against st-dlopen-recursemod2.so. Also allows ++ checking for relocation. */ ++extern int recursemod2_status; ++int *force_recursemod2_reference = &recursemod2_status; ++ ++static void __attribute__ ((constructor)) ++init (void) ++{ ++ ++recursemod1_status; ++ printf ("info: tst-dlopen-recursemod1.so constructor called (status %d)\n", ++ recursemod1_status); ++} ++ ++static void __attribute__ ((destructor)) ++fini (void) ++{ ++ /* The recursemod1_status variable was incremented in the ++ tst-dlopen-recursemod2.so constructor. */ ++ printf ("info: tst-dlopen-recursemod1.so destructor called (status %d)\n", ++ recursemod1_status); ++ if (recursemod1_status != 2) ++ { ++ puts ("error: recursemod1_status == 2 expected"); ++ exit (1); ++ } ++} +diff --git a/elf/tst-dlopen-recursemod2.c b/elf/tst-dlopen-recursemod2.c +new file mode 100644 +index 0000000000000000..edd2f2526b877810 +--- /dev/null ++++ b/elf/tst-dlopen-recursemod2.c +@@ -0,0 +1,66 @@ ++/* Indirectly opened module that recursively opens the directly opened module. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++int recursemod2_status; ++ ++static void __attribute__ ((constructor)) ++init (void) ++{ ++ ++recursemod2_status; ++ printf ("info: tst-dlopen-recursemod2.so constructor called (status %d)\n", ++ recursemod2_status); ++ void *handle = dlopen ("tst-dlopen-recursemod1.so", RTLD_NOW); ++ if (handle == NULL) ++ { ++ printf ("error: dlopen: %s\n", dlerror ()); ++ exit (1); ++ } ++ int *status = dlsym (handle, "recursemod1_status"); ++ if (status == NULL) ++ { ++ printf ("error: dlsym: %s\n", dlerror ()); ++ exit (1); ++ } ++ printf ("info: recursemod1_status == %d\n", *status); ++ if (*status != 1) ++ { ++ puts ("error: recursemod1_status == 1 expected"); ++ exit (1); ++ } ++ ++*status; ++ printf ("info: recursemod1_status == %d\n", *status); ++ ++ int **mod2_status = dlsym (handle, "force_recursemod2_reference"); ++ if (mod2_status == NULL || *mod2_status != &recursemod2_status) ++ { ++ puts ("error: invalid recursemod2_status address in" ++ " tst-dlopen-recursemod1.so"); ++ exit (1); ++ } ++} ++ ++static void __attribute__ ((destructor)) ++fini (void) ++{ ++ printf ("info: tst-dlopen-recursemod2.so destructor called (status %d)\n", ++ recursemod2_status); ++} diff --git a/glibc-upstream-2.39-232.patch b/glibc-upstream-2.39-232.patch new file mode 100644 index 0000000..2b9f507 --- /dev/null +++ b/glibc-upstream-2.39-232.patch @@ -0,0 +1,97 @@ +commit d6cc325fcf3d5a4ceeabfee465e6f90be1f72e8b +Author: Florian Weimer +Date: Fri Oct 25 16:50:10 2024 +0200 + + elf: Signal LA_ACT_CONSISTENT to auditors after RT_CONSISTENT switch + + Auditors can call into the dynamic loader again if + LA_ACT_CONSISTENT, and those recursive calls could observe + r_state != RT_CONSISTENT. + + We should consider failing dlopen/dlmopen/dlclose if + r_state != RT_CONSISTENT. The dynamic linker is probably not + in a state in which it can handle reentrant calls. This + needs further investigation. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit e096b7a1896886eb7dd2732ccbf1184b0eec9a63) + +diff --git a/elf/dl-close.c b/elf/dl-close.c +index 88226245eb4b7a81..b6f4daac792b8a90 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -723,6 +723,11 @@ _dl_close_worker (struct link_map *map, bool force) + /* TLS is cleaned up for the unloaded modules. */ + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + ++ /* Notify the debugger those objects are finalized and gone. */ ++ r->r_state = RT_CONSISTENT; ++ _dl_debug_state (); ++ LIBC_PROBE (unmap_complete, 2, nsid, r); ++ + #ifdef SHARED + /* Auditing checkpoint: we have deleted all objects. Also, do not notify + auditors of the cleanup of a failed audit module loading attempt. */ +@@ -735,11 +740,6 @@ _dl_close_worker (struct link_map *map, bool force) + --GL(dl_nns); + while (GL(dl_ns)[GL(dl_nns) - 1]._ns_loaded == NULL); + +- /* Notify the debugger those objects are finalized and gone. */ +- r->r_state = RT_CONSISTENT; +- _dl_debug_state (); +- LIBC_PROBE (unmap_complete, 2, nsid, r); +- + /* Recheck if we need to retry, release the lock. */ + out: + if (dl_close_state == rerun) +diff --git a/elf/dl-open.c b/elf/dl-open.c +index 5139d276e04a5d85..5a30a57ee1487b31 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -639,17 +639,17 @@ dl_open_worker_begin (void *a) + #endif + } + +-#ifdef SHARED +- /* Auditing checkpoint: we have added all objects. */ +- _dl_audit_activity_nsid (new->l_ns, LA_ACT_CONSISTENT); +-#endif +- + /* Notify the debugger all new objects are now ready to go. */ + struct r_debug *r = _dl_debug_update (args->nsid); + r->r_state = RT_CONSISTENT; + _dl_debug_state (); + LIBC_PROBE (map_complete, 3, args->nsid, r, new); + ++#ifdef SHARED ++ /* Auditing checkpoint: we have added all objects. */ ++ _dl_audit_activity_nsid (new->l_ns, LA_ACT_CONSISTENT); ++#endif ++ + _dl_open_check (new); + + /* Print scope information. */ +diff --git a/elf/rtld.c b/elf/rtld.c +index 4760633866cf9159..b308f7c9577b4bb3 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -2416,9 +2416,6 @@ dl_main (const ElfW(Phdr) *phdr, + _dl_relocate_object might need to call `mprotect' for DT_TEXTREL. */ + _dl_sysdep_start_cleanup (); + +- /* Auditing checkpoint: we have added all objects. */ +- _dl_audit_activity_nsid (LM_ID_BASE, LA_ACT_CONSISTENT); +- + /* Notify the debugger all new objects are now ready to go. We must re-get + the address since by now the variable might be in another object. */ + r = _dl_debug_update (LM_ID_BASE); +@@ -2426,6 +2423,9 @@ dl_main (const ElfW(Phdr) *phdr, + _dl_debug_state (); + LIBC_PROBE (init_complete, 2, LM_ID_BASE, r); + ++ /* Auditing checkpoint: we have added all objects. */ ++ _dl_audit_activity_nsid (LM_ID_BASE, LA_ACT_CONSISTENT); ++ + #if defined USE_LDCONFIG && !defined MAP_COPY + /* We must munmap() the cache file. */ + _dl_unload_cache (); diff --git a/glibc-upstream-2.39-233.patch b/glibc-upstream-2.39-233.patch new file mode 100644 index 0000000..d3aee3f --- /dev/null +++ b/glibc-upstream-2.39-233.patch @@ -0,0 +1,337 @@ +commit 5f225025db0f5df9912893d4b399d9e640b84814 +Author: Florian Weimer +Date: Fri Oct 25 16:50:10 2024 +0200 + + elf: Signal RT_CONSISTENT after relocation processing in dlopen (bug 31986) + + Previously, a la_activity audit event was generated before + relocation processing completed. This does did not match what + happened during initial startup in elf/rtld.c (towards the end + of dl_main). It also caused various problems if an auditor + tried to open the same shared object again using dlmopen: + If it was the directly loaded object, it had a search scope + associated with it, so the early exit in dl_open_worker_begin + was taken even though the object was unrelocated. This caused + the r_state == RT_CONSISTENT assert to fail. Avoidance of the + assert also depends on reversing the order of r_state update + and auditor event (already implemented in a previous commit). + + At the later point, args->map can be NULL due to failure, + so use the assigned namespace ID instead if that is available. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 43db5e2c0672cae7edea7c9685b22317eae25471) + +diff --git a/elf/Makefile b/elf/Makefile +index 7690ee9edc0b0c9a..0f1125cb634d7184 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -414,6 +414,7 @@ tests += \ + tst-dlmopen1 \ + tst-dlmopen3 \ + tst-dlmopen4 \ ++ tst-dlopen-auditdup \ + tst-dlopen-recurse \ + tst-dlopen-self \ + tst-dlopen-tlsmodid \ +@@ -859,6 +860,8 @@ modules-names += \ + tst-dlmopen-twice-mod1 \ + tst-dlmopen-twice-mod2 \ + tst-dlmopen1mod \ ++ tst-dlopen-auditdup-auditmod \ ++ tst-dlopen-auditdupmod \ + tst-dlopen-recursemod1 \ + tst-dlopen-recursemod2 \ + tst-dlopen-sgid-mod \ +@@ -3151,3 +3154,6 @@ $(objpfx)tst-dlopen-sgid.out: $(objpfx)tst-dlopen-sgid-mod.so + + $(objpfx)tst-dlopen-recurse.out: $(objpfx)tst-dlopen-recursemod1.so + $(objpfx)tst-dlopen-recursemod1.so: $(objpfx)tst-dlopen-recursemod2.so ++tst-dlopen-auditdup-ENV = LD_AUDIT=$(objpfx)tst-dlopen-auditdup-auditmod.so ++$(objpfx)tst-dlopen-auditdup.out: \ ++ $(objpfx)tst-dlopen-auditdupmod.so $(objpfx)tst-dlopen-auditdup-auditmod.so +diff --git a/elf/dl-open.c b/elf/dl-open.c +index 5a30a57ee1487b31..88e8ad8d3abcdd44 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -576,6 +576,14 @@ dl_open_worker_begin (void *a) + _dl_debug_printf ("opening file=%s [%lu]; direct_opencount=%u\n\n", + new->l_name, new->l_ns, new->l_direct_opencount); + ++#ifdef SHARED ++ /* No relocation processing on this execution path. But ++ relocation has not been performed for static ++ position-dependent executables, so disable the assert for ++ static linking. */ ++ assert (new->l_relocated); ++#endif ++ + /* If the user requested the object to be in the global + namespace but it is not so far, prepare to add it now. This + can raise an exception to do a malloc failure. */ +@@ -597,10 +605,6 @@ dl_open_worker_begin (void *a) + if ((mode & RTLD_GLOBAL) && new->l_global == 0) + add_to_global_update (new); + +- const int r_state __attribute__ ((unused)) +- = _dl_debug_update (args->nsid)->r_state; +- assert (r_state == RT_CONSISTENT); +- + /* Do not return without calling the (supposedly new) map's + constructor. This case occurs if a dependency of a directly + opened map has a constructor that calls dlopen again on the +@@ -639,17 +643,6 @@ dl_open_worker_begin (void *a) + #endif + } + +- /* Notify the debugger all new objects are now ready to go. */ +- struct r_debug *r = _dl_debug_update (args->nsid); +- r->r_state = RT_CONSISTENT; +- _dl_debug_state (); +- LIBC_PROBE (map_complete, 3, args->nsid, r, new); +- +-#ifdef SHARED +- /* Auditing checkpoint: we have added all objects. */ +- _dl_audit_activity_nsid (new->l_ns, LA_ACT_CONSISTENT); +-#endif +- + _dl_open_check (new); + + /* Print scope information. */ +@@ -696,6 +689,7 @@ dl_open_worker_begin (void *a) + created dlmopen namespaces. Do not do this for static dlopen + because libc has relocations against ld.so, which may not have + been relocated at this point. */ ++ struct r_debug *r = _dl_debug_update (args->nsid); + #ifdef SHARED + if (GL(dl_ns)[args->nsid].libc_map != NULL) + _dl_open_relocate_one_object (args, r, GL(dl_ns)[args->nsid].libc_map, +@@ -787,6 +781,26 @@ dl_open_worker (void *a) + + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + ++ /* Auditing checkpoint and debugger signalling. Do this even on ++ error, so that dlopen exists with consistent state. */ ++ if (args->nsid >= 0 || args->map != NULL) ++ { ++ Lmid_t nsid = args->map != NULL ? args->map->l_ns : args->nsid; ++ struct r_debug *r = _dl_debug_update (nsid); ++#ifdef SHARED ++ bool was_not_consistent = r->r_state != RT_CONSISTENT; ++#endif ++ r->r_state = RT_CONSISTENT; ++ _dl_debug_state (); ++ LIBC_PROBE (map_complete, 3, nsid, r, new); ++ ++#ifdef SHARED ++ if (was_not_consistent) ++ /* Avoid redudant/recursive signalling. */ ++ _dl_audit_activity_nsid (nsid, LA_ACT_CONSISTENT); ++#endif ++ } ++ + if (__glibc_unlikely (ex.errstring != NULL)) + /* Reraise the error. */ + _dl_signal_exception (err, &ex, NULL); +diff --git a/elf/tst-dlopen-auditdup-auditmod.c b/elf/tst-dlopen-auditdup-auditmod.c +new file mode 100644 +index 0000000000000000..9b67295e94d03e7a +--- /dev/null ++++ b/elf/tst-dlopen-auditdup-auditmod.c +@@ -0,0 +1,100 @@ ++/* Auditor that opens again an object that just has been opened. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++unsigned int ++la_version (unsigned int v) ++{ ++ return LAV_CURRENT; ++} ++ ++static bool trigger_on_la_activity; ++ ++unsigned int ++la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) ++{ ++ printf ("info: la_objopen: \"%s\"\n", map->l_name); ++ if (strstr (map->l_name, "/tst-dlopen-auditdupmod.so") != NULL) ++ trigger_on_la_activity = true; ++ return 0; ++} ++ ++void ++la_activity (uintptr_t *cookie, unsigned int flag) ++{ ++ static unsigned int calls; ++ ++calls; ++ printf ("info: la_activity: call %u (flag %u)\n", calls, flag); ++ fflush (stdout); ++ if (trigger_on_la_activity) ++ { ++ /* Avoid triggering on the dlmopen call below. */ ++ static bool recursion; ++ if (recursion) ++ return; ++ recursion = true; ++ ++ puts ("info: about to dlmopen tst-dlopen-auditdupmod.so"); ++ fflush (stdout); ++ void *handle = dlmopen (LM_ID_BASE, "tst-dlopen-auditdupmod.so", ++ RTLD_NOW); ++ if (handle == NULL) ++ { ++ printf ("error: dlmopen: %s\n", dlerror ()); ++ fflush (stdout); ++ _exit (1); ++ } ++ ++ /* Check that the constructor has run. */ ++ int *status = dlsym (handle, "auditdupmod_status"); ++ if (status == NULL) ++ { ++ printf ("error: dlsym: %s\n", dlerror ()); ++ fflush (stdout); ++ _exit (1); ++ } ++ printf ("info: auditdupmod_status == %d\n", *status); ++ if (*status != 1) ++ { ++ puts ("error: auditdupmod_status == 1 expected"); ++ fflush (stdout); ++ _exit (1); ++ } ++ /* Checked in the destructor and the main program. */ ++ ++*status; ++ printf ("info: auditdupmod_status == %d\n", *status); ++ ++ /* Check that the module has been relocated. */ ++ int **status_address = dlsym (handle, "auditdupmod_status_address"); ++ if (status_address == NULL || *status_address != status) ++ { ++ puts ("error: invalid auditdupmod_status address in" ++ " tst-dlopen-auditdupmod.so"); ++ fflush (stdout); ++ _exit (1); ++ } ++ ++ fflush (stdout); ++ } ++} +diff --git a/elf/tst-dlopen-auditdup.c b/elf/tst-dlopen-auditdup.c +new file mode 100644 +index 0000000000000000..d022c58ae3091da1 +--- /dev/null ++++ b/elf/tst-dlopen-auditdup.c +@@ -0,0 +1,36 @@ ++/* Test that recursive dlopen from auditor works (bug 31986). ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ puts ("info: about to dlopen tst-dlopen-auditdupmod.so"); ++ fflush (stdout); ++ void *handle = xdlopen ("tst-dlopen-auditdupmod.so", RTLD_NOW); ++ int *status = xdlsym (handle, "auditdupmod_status"); ++ printf ("info: auditdupmod_status == %d (from main)\n", *status); ++ TEST_COMPARE (*status, 2); ++ xdlclose (handle); ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-dlopen-auditdupmod.c b/elf/tst-dlopen-auditdupmod.c +new file mode 100644 +index 0000000000000000..59b7e21daa8212df +--- /dev/null ++++ b/elf/tst-dlopen-auditdupmod.c +@@ -0,0 +1,48 @@ ++/* Directly opened test module that gets reopened from the auditor. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++int auditdupmod_status; ++ ++/* Used to check for successful relocation processing. */ ++int *auditdupmod_status_address = &auditdupmod_status; ++ ++static void __attribute__ ((constructor)) ++init (void) ++{ ++ ++auditdupmod_status; ++ printf ("info: tst-dlopen-auditdupmod.so constructor called (status %d)\n", ++ auditdupmod_status); ++} ++ ++static void __attribute__ ((destructor)) ++fini (void) ++{ ++ /* The tst-dlopen-auditdup-auditmod.so auditor incremented ++ auditdupmod_status. */ ++ printf ("info: tst-dlopen-auditdupmod.so destructor called (status %d)\n", ++ auditdupmod_status); ++ if (auditdupmod_status != 2) ++ { ++ puts ("error: auditdupmod_status == 2 expected"); ++ exit (1); ++ } ++} diff --git a/glibc-upstream-2.39-234.patch b/glibc-upstream-2.39-234.patch new file mode 100644 index 0000000..fccdb50 --- /dev/null +++ b/glibc-upstream-2.39-234.patch @@ -0,0 +1,26 @@ +commit 46e3ecad27f65dd239d6d3568b81338f4525585f +Author: Florian Weimer +Date: Fri Oct 25 17:41:53 2024 +0200 + + elf: Fix map_complete Systemtap probe in dl_open_worker + + The refactoring did not take the change of variable into account. + Fixes commit 43db5e2c0672cae7edea7c9685b22317eae25471 + ("elf: Signal RT_CONSISTENT after relocation processing in dlopen + (bug 31986)"). + + (cherry picked from commit ac73067cb7a328bf106ecd041c020fc61be7e087) + +diff --git a/elf/dl-open.c b/elf/dl-open.c +index 88e8ad8d3abcdd44..bd15f5f6a446115d 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -792,7 +792,7 @@ dl_open_worker (void *a) + #endif + r->r_state = RT_CONSISTENT; + _dl_debug_state (); +- LIBC_PROBE (map_complete, 3, nsid, r, new); ++ LIBC_PROBE (map_complete, 3, nsid, r, args->map); + + #ifdef SHARED + if (was_not_consistent) diff --git a/glibc-upstream-2.39-235.patch b/glibc-upstream-2.39-235.patch new file mode 100644 index 0000000..829d052 --- /dev/null +++ b/glibc-upstream-2.39-235.patch @@ -0,0 +1,271 @@ +commit 2b89de7c91e5e71732a32efef94075d9edbff95e +Author: Florian Weimer +Date: Mon Oct 28 14:45:30 2024 +0100 + + Revert "elf: Run constructors on cyclic recursive dlopen (bug 31986)" + + This reverts commit 9897ced8e78db5d813166a7ccccfd5a42c69ef20. + + Adjust the test expectations in elf/tst-dlopen-auditdup-auditmod.c + accordingly. + + (cherry picked from commit 95129e6b8fabdaa8cd8a4a5cc20be0f4cb0ba59f) + +diff --git a/elf/Makefile b/elf/Makefile +index 0f1125cb634d7184..479ef766c8f955f2 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -415,7 +415,6 @@ tests += \ + tst-dlmopen3 \ + tst-dlmopen4 \ + tst-dlopen-auditdup \ +- tst-dlopen-recurse \ + tst-dlopen-self \ + tst-dlopen-tlsmodid \ + tst-dlopen-tlsreinit1 \ +@@ -862,8 +861,6 @@ modules-names += \ + tst-dlmopen1mod \ + tst-dlopen-auditdup-auditmod \ + tst-dlopen-auditdupmod \ +- tst-dlopen-recursemod1 \ +- tst-dlopen-recursemod2 \ + tst-dlopen-sgid-mod \ + tst-dlopen-tlsreinitmod1 \ + tst-dlopen-tlsreinitmod2 \ +@@ -3152,8 +3149,6 @@ tst-dlopen-tlsreinit4-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so + + $(objpfx)tst-dlopen-sgid.out: $(objpfx)tst-dlopen-sgid-mod.so + +-$(objpfx)tst-dlopen-recurse.out: $(objpfx)tst-dlopen-recursemod1.so +-$(objpfx)tst-dlopen-recursemod1.so: $(objpfx)tst-dlopen-recursemod2.so + tst-dlopen-auditdup-ENV = LD_AUDIT=$(objpfx)tst-dlopen-auditdup-auditmod.so + $(objpfx)tst-dlopen-auditdup.out: \ + $(objpfx)tst-dlopen-auditdupmod.so $(objpfx)tst-dlopen-auditdup-auditmod.so +diff --git a/elf/dl-open.c b/elf/dl-open.c +index bd15f5f6a446115d..b00f283c42d19adb 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -605,14 +605,6 @@ dl_open_worker_begin (void *a) + if ((mode & RTLD_GLOBAL) && new->l_global == 0) + add_to_global_update (new); + +- /* Do not return without calling the (supposedly new) map's +- constructor. This case occurs if a dependency of a directly +- opened map has a constructor that calls dlopen again on the +- initially opened map. The new map is initialized last, so +- checking only it is enough. */ +- if (!new->l_init_called) +- _dl_catch_exception (NULL, call_dl_init, args); +- + return; + } + +diff --git a/elf/dl-support.c b/elf/dl-support.c +index 94e8197c632c11c8..451932dd03e971b8 100644 +--- a/elf/dl-support.c ++++ b/elf/dl-support.c +@@ -99,7 +99,6 @@ static struct link_map _dl_main_map = + .l_used = 1, + .l_tls_offset = NO_TLS_OFFSET, + .l_serial = 1, +- .l_init_called = 1, + }; + + /* Namespace information. */ +diff --git a/elf/tst-dlopen-auditdup-auditmod.c b/elf/tst-dlopen-auditdup-auditmod.c +index 9b67295e94d03e7a..270a595ec4de1439 100644 +--- a/elf/tst-dlopen-auditdup-auditmod.c ++++ b/elf/tst-dlopen-auditdup-auditmod.c +@@ -66,7 +66,11 @@ la_activity (uintptr_t *cookie, unsigned int flag) + _exit (1); + } + +- /* Check that the constructor has run. */ ++ /* Check that the constructor has not run. Running the ++ constructor would require constructing its dependencies, but ++ the constructor call that triggered this auditing activity ++ has not completed, and constructors among the dependencies ++ may not be able to deal with that. */ + int *status = dlsym (handle, "auditdupmod_status"); + if (status == NULL) + { +@@ -75,9 +79,9 @@ la_activity (uintptr_t *cookie, unsigned int flag) + _exit (1); + } + printf ("info: auditdupmod_status == %d\n", *status); +- if (*status != 1) ++ if (*status != 0) + { +- puts ("error: auditdupmod_status == 1 expected"); ++ puts ("error: auditdupmod_status == 0 expected"); + fflush (stdout); + _exit (1); + } +diff --git a/elf/tst-dlopen-recurse.c b/elf/tst-dlopen-recurse.c +deleted file mode 100644 +index c7fb379d373c6e77..0000000000000000 +--- a/elf/tst-dlopen-recurse.c ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* Test that recursive dlopen runs constructors before return (bug 31986). +- Copyright (C) 2024 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +- +-static int +-do_test (void) +-{ +- void *handle = xdlopen ("tst-dlopen-recursemod1.so", RTLD_NOW); +- int *status = dlsym (handle, "recursemod1_status"); +- printf ("info: recursemod1_status == %d (from main)\n", *status); +- TEST_COMPARE (*status, 2); +- xdlclose (handle); +- return 0; +-} +- +-#include +diff --git a/elf/tst-dlopen-recursemod1.c b/elf/tst-dlopen-recursemod1.c +deleted file mode 100644 +index 5e0cc0eb8c32d6d4..0000000000000000 +--- a/elf/tst-dlopen-recursemod1.c ++++ /dev/null +@@ -1,50 +0,0 @@ +-/* Directly opened test module that gets recursively opened again. +- Copyright (C) 2024 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +- +-int recursemod1_status; +- +-/* Force linking against st-dlopen-recursemod2.so. Also allows +- checking for relocation. */ +-extern int recursemod2_status; +-int *force_recursemod2_reference = &recursemod2_status; +- +-static void __attribute__ ((constructor)) +-init (void) +-{ +- ++recursemod1_status; +- printf ("info: tst-dlopen-recursemod1.so constructor called (status %d)\n", +- recursemod1_status); +-} +- +-static void __attribute__ ((destructor)) +-fini (void) +-{ +- /* The recursemod1_status variable was incremented in the +- tst-dlopen-recursemod2.so constructor. */ +- printf ("info: tst-dlopen-recursemod1.so destructor called (status %d)\n", +- recursemod1_status); +- if (recursemod1_status != 2) +- { +- puts ("error: recursemod1_status == 2 expected"); +- exit (1); +- } +-} +diff --git a/elf/tst-dlopen-recursemod2.c b/elf/tst-dlopen-recursemod2.c +deleted file mode 100644 +index edd2f2526b877810..0000000000000000 +--- a/elf/tst-dlopen-recursemod2.c ++++ /dev/null +@@ -1,66 +0,0 @@ +-/* Indirectly opened module that recursively opens the directly opened module. +- Copyright (C) 2024 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +- +-int recursemod2_status; +- +-static void __attribute__ ((constructor)) +-init (void) +-{ +- ++recursemod2_status; +- printf ("info: tst-dlopen-recursemod2.so constructor called (status %d)\n", +- recursemod2_status); +- void *handle = dlopen ("tst-dlopen-recursemod1.so", RTLD_NOW); +- if (handle == NULL) +- { +- printf ("error: dlopen: %s\n", dlerror ()); +- exit (1); +- } +- int *status = dlsym (handle, "recursemod1_status"); +- if (status == NULL) +- { +- printf ("error: dlsym: %s\n", dlerror ()); +- exit (1); +- } +- printf ("info: recursemod1_status == %d\n", *status); +- if (*status != 1) +- { +- puts ("error: recursemod1_status == 1 expected"); +- exit (1); +- } +- ++*status; +- printf ("info: recursemod1_status == %d\n", *status); +- +- int **mod2_status = dlsym (handle, "force_recursemod2_reference"); +- if (mod2_status == NULL || *mod2_status != &recursemod2_status) +- { +- puts ("error: invalid recursemod2_status address in" +- " tst-dlopen-recursemod1.so"); +- exit (1); +- } +-} +- +-static void __attribute__ ((destructor)) +-fini (void) +-{ +- printf ("info: tst-dlopen-recursemod2.so destructor called (status %d)\n", +- recursemod2_status); +-} diff --git a/glibc-upstream-2.39-236.patch b/glibc-upstream-2.39-236.patch new file mode 100644 index 0000000..f6bc066 --- /dev/null +++ b/glibc-upstream-2.39-236.patch @@ -0,0 +1,147 @@ +commit b2d8c6cbe70bbafb2238f0595c36fbedf64d00c2 +Author: Florian Weimer +Date: Wed Nov 6 10:33:44 2024 +0100 + + elf: rtld_multiple_ref is always true + + For a long time, libc.so.6 has dependend on ld.so, which + means that there is a reference to ld.so in all processes, + and rtld_multiple_ref is always true. In fact, if + rtld_multiple_ref were false, some of the ld.so setup code + would not run. + + Reviewed-by: DJ Delorie + (cherry picked from commit 8f8dd904c4a2207699bb666f30acceb5209c8d3f) + +diff --git a/elf/rtld.c b/elf/rtld.c +index b308f7c9577b4bb3..41f8c329772b2b7a 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -2010,43 +2010,37 @@ dl_main (const ElfW(Phdr) *phdr, + if (main_map->l_searchlist.r_list[i] == &GL(dl_rtld_map)) + break; + +- bool rtld_multiple_ref = false; +- if (__glibc_likely (i < main_map->l_searchlist.r_nlist)) +- { +- /* Some DT_NEEDED entry referred to the interpreter object itself, so +- put it back in the list of visible objects. We insert it into the +- chain in symbol search order because gdb uses the chain's order as +- its symbol search order. */ +- rtld_multiple_ref = true; ++ /* Insert the link map for the dynamic loader into the chain in ++ symbol search order because gdb uses the chain's order as its ++ symbol search order. */ + +- GL(dl_rtld_map).l_prev = main_map->l_searchlist.r_list[i - 1]; +- if (__glibc_likely (state.mode == rtld_mode_normal)) +- { +- GL(dl_rtld_map).l_next = (i + 1 < main_map->l_searchlist.r_nlist +- ? main_map->l_searchlist.r_list[i + 1] +- : NULL); ++ GL(dl_rtld_map).l_prev = main_map->l_searchlist.r_list[i - 1]; ++ if (__glibc_likely (state.mode == rtld_mode_normal)) ++ { ++ GL(dl_rtld_map).l_next = (i + 1 < main_map->l_searchlist.r_nlist ++ ? main_map->l_searchlist.r_list[i + 1] ++ : NULL); + #ifdef NEED_DL_SYSINFO_DSO +- if (GLRO(dl_sysinfo_map) != NULL +- && GL(dl_rtld_map).l_prev->l_next == GLRO(dl_sysinfo_map) +- && GL(dl_rtld_map).l_next != GLRO(dl_sysinfo_map)) +- GL(dl_rtld_map).l_prev = GLRO(dl_sysinfo_map); ++ if (GLRO(dl_sysinfo_map) != NULL ++ && GL(dl_rtld_map).l_prev->l_next == GLRO(dl_sysinfo_map) ++ && GL(dl_rtld_map).l_next != GLRO(dl_sysinfo_map)) ++ GL(dl_rtld_map).l_prev = GLRO(dl_sysinfo_map); + #endif +- } +- else +- /* In trace mode there might be an invisible object (which we +- could not find) after the previous one in the search list. +- In this case it doesn't matter much where we put the +- interpreter object, so we just initialize the list pointer so +- that the assertion below holds. */ +- GL(dl_rtld_map).l_next = GL(dl_rtld_map).l_prev->l_next; +- +- assert (GL(dl_rtld_map).l_prev->l_next == GL(dl_rtld_map).l_next); +- GL(dl_rtld_map).l_prev->l_next = &GL(dl_rtld_map); +- if (GL(dl_rtld_map).l_next != NULL) +- { +- assert (GL(dl_rtld_map).l_next->l_prev == GL(dl_rtld_map).l_prev); +- GL(dl_rtld_map).l_next->l_prev = &GL(dl_rtld_map); +- } ++ } ++ else ++ /* In trace mode there might be an invisible object (which we ++ could not find) after the previous one in the search list. ++ In this case it doesn't matter much where we put the ++ interpreter object, so we just initialize the list pointer so ++ that the assertion below holds. */ ++ GL(dl_rtld_map).l_next = GL(dl_rtld_map).l_prev->l_next; ++ ++ assert (GL(dl_rtld_map).l_prev->l_next == GL(dl_rtld_map).l_next); ++ GL(dl_rtld_map).l_prev->l_next = &GL(dl_rtld_map); ++ if (GL(dl_rtld_map).l_next != NULL) ++ { ++ assert (GL(dl_rtld_map).l_next->l_prev == GL(dl_rtld_map).l_prev); ++ GL(dl_rtld_map).l_next->l_prev = &GL(dl_rtld_map); + } + + /* Now let us see whether all libraries are available in the +@@ -2374,35 +2368,33 @@ dl_main (const ElfW(Phdr) *phdr, + /* Make sure no new search directories have been added. */ + assert (GLRO(dl_init_all_dirs) == GL(dl_all_dirs)); + +- if (rtld_multiple_ref) +- { +- /* There was an explicit ref to the dynamic linker as a shared lib. +- Re-relocate ourselves with user-controlled symbol definitions. ++ /* Re-relocate ourselves with user-controlled symbol definitions. + +- We must do this after TLS initialization in case after this +- re-relocation, we might call a user-supplied function +- (e.g. calloc from _dl_relocate_object) that uses TLS data. */ ++ We must do this after TLS initialization in case after this ++ re-relocation, we might call a user-supplied function ++ (e.g. calloc from _dl_relocate_object) that uses TLS data. */ + +- /* Set up the object lookup structures. */ +- _dl_find_object_init (); ++ /* Set up the object lookup structures. */ ++ _dl_find_object_init (); + +- /* The malloc implementation has been relocated, so resolving +- its symbols (and potentially calling IFUNC resolvers) is safe +- at this point. */ +- __rtld_malloc_init_real (main_map); ++ /* The malloc implementation has been relocated, so resolving ++ its symbols (and potentially calling IFUNC resolvers) is safe ++ at this point. */ ++ __rtld_malloc_init_real (main_map); + +- /* Likewise for the locking implementation. */ +- __rtld_mutex_init (); ++ /* Likewise for the locking implementation. */ ++ __rtld_mutex_init (); + +- RTLD_TIMING_VAR (start); +- rtld_timer_start (&start); ++ { ++ RTLD_TIMING_VAR (start); ++ rtld_timer_start (&start); + +- /* Mark the link map as not yet relocated again. */ +- GL(dl_rtld_map).l_relocated = 0; +- _dl_relocate_object (&GL(dl_rtld_map), main_map->l_scope, 0, 0); ++ /* Mark the link map as not yet relocated again. */ ++ GL(dl_rtld_map).l_relocated = 0; ++ _dl_relocate_object (&GL(dl_rtld_map), main_map->l_scope, 0, 0); + +- rtld_timer_accum (&relocate_time, start); +- } ++ rtld_timer_accum (&relocate_time, start); ++ } + + /* Relocation is complete. Perform early libc initialization. This + is the initial libc, even if audit modules have been loaded with diff --git a/glibc-upstream-2.39-237.patch b/glibc-upstream-2.39-237.patch new file mode 100644 index 0000000..89b9b06 --- /dev/null +++ b/glibc-upstream-2.39-237.patch @@ -0,0 +1,53 @@ +commit 5434cc2c4152dddcfb2a6f6cb39b6ff33099a193 +Author: Florian Weimer +Date: Wed Nov 6 10:33:44 2024 +0100 + + elf: Do not define consider_profiling, consider_symbind as macros + + This avoids surprises when refactoring the code if these identifiers + are re-used later in the file. + + Reviewed-by: DJ Delorie + (cherry picked from commit a79642204537dec8a1e1c58d1e0a074b3c624f46) + +diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c +index 4bf7aec88b844bc7..b2c1627ceb847486 100644 +--- a/elf/dl-reloc.c ++++ b/elf/dl-reloc.c +@@ -220,8 +220,8 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + int lazy = reloc_mode & RTLD_LAZY; + int skip_ifunc = reloc_mode & __RTLD_NOIFUNC; + +-#ifdef SHARED + bool consider_symbind = false; ++#ifdef SHARED + /* If we are auditing, install the same handlers we need for profiling. */ + if ((reloc_mode & __RTLD_AUDIT) == 0) + { +@@ -240,9 +240,7 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + } + #elif defined PROF + /* Never use dynamic linker profiling for gprof profiling code. */ +-# define consider_profiling 0 +-#else +-# define consider_symbind 0 ++ consider_profiling = 0; + #endif + + /* If DT_BIND_NOW is set relocate all references in this object. We +@@ -300,7 +298,6 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + + ELF_DYNAMIC_RELOCATE (l, scope, lazy, consider_profiling, skip_ifunc); + +-#ifndef PROF + if ((consider_profiling || consider_symbind) + && l->l_info[DT_PLTRELSZ] != NULL) + { +@@ -321,7 +318,6 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + _dl_fatal_printf (errstring, RTLD_PROGNAME, l->l_name); + } + } +-#endif + } + + /* Mark the object so we know this work has been done. */ diff --git a/glibc-upstream-2.39-238.patch b/glibc-upstream-2.39-238.patch new file mode 100644 index 0000000..7045d70 --- /dev/null +++ b/glibc-upstream-2.39-238.patch @@ -0,0 +1,78 @@ +commit 65d86471cee80144aee1829b191cd23dd9683497 +Author: Florian Weimer +Date: Wed Nov 6 10:33:44 2024 +0100 + + elf: Introduce _dl_relocate_object_no_relro + + And make _dl_protect_relro apply RELRO conditionally. + + Reviewed-by: DJ Delorie + (cherry picked from commit f2326c2ec0a0a8db7bc7f4db8cce3002768fc3b6) + +diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c +index b2c1627ceb847486..76d14830ddda83b7 100644 +--- a/elf/dl-reloc.c ++++ b/elf/dl-reloc.c +@@ -202,12 +202,9 @@ resolve_map (lookup_t l, struct r_scope_elem *scope[], const ElfW(Sym) **ref, + #include "dynamic-link.h" + + void +-_dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], +- int reloc_mode, int consider_profiling) ++_dl_relocate_object_no_relro (struct link_map *l, struct r_scope_elem *scope[], ++ int reloc_mode, int consider_profiling) + { +- if (l->l_relocated) +- return; +- + struct textrels + { + caddr_t start; +@@ -338,17 +335,24 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], + + textrels = textrels->next; + } +- +- /* In case we can protect the data now that the relocations are +- done, do it. */ +- if (l->l_relro_size != 0) +- _dl_protect_relro (l); + } + ++void ++_dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], ++ int reloc_mode, int consider_profiling) ++{ ++ if (l->l_relocated) ++ return; ++ _dl_relocate_object_no_relro (l, scope, reloc_mode, consider_profiling); ++ _dl_protect_relro (l); ++} + + void + _dl_protect_relro (struct link_map *l) + { ++ if (l->l_relro_size == 0) ++ return; ++ + ElfW(Addr) start = ALIGN_DOWN((l->l_addr + + l->l_relro_addr), + GLRO(dl_pagesize)); +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 259ce2e7d6e8ff31..91447a5e77c2466d 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1014,6 +1014,13 @@ extern void _dl_relocate_object (struct link_map *map, + int reloc_mode, int consider_profiling) + attribute_hidden; + ++/* Perform relocation, but do not apply RELRO. Does not check ++ L->relocated. Otherwise the same as _dl_relocate_object. */ ++void _dl_relocate_object_no_relro (struct link_map *map, ++ struct r_scope_elem *scope[], ++ int reloc_mode, int consider_profiling) ++ attribute_hidden; ++ + /* Protect PT_GNU_RELRO area. */ + extern void _dl_protect_relro (struct link_map *map) attribute_hidden; + diff --git a/glibc-upstream-2.39-239.patch b/glibc-upstream-2.39-239.patch new file mode 100644 index 0000000..03526bf --- /dev/null +++ b/glibc-upstream-2.39-239.patch @@ -0,0 +1,204 @@ +commit 4f145bb35d5aed0cb102ba2a7b05aec1cf980672 +Author: Florian Weimer +Date: Wed Nov 6 10:33:44 2024 +0100 + + elf: Switch to main malloc after final ld.so self-relocation + + Before commit ee1ada1bdb8074de6e1bdc956ab19aef7b6a7872 + ("elf: Rework exception handling in the dynamic loader + [BZ #25486]"), the previous order called the main calloc + to allocate a shadow GOT/PLT array for auditing support. + This happened before libc.so.6 ELF constructors were run, so + a user malloc could run without libc.so.6 having been + initialized fully. One observable effect was that + environ was NULL at this point. + + It does not seem to be possible at present to trigger such + an allocation, but it seems more robust to delay switching + to main malloc after ld.so self-relocation is complete. + The elf/tst-rtld-no-malloc-audit test case fails with a + 2.34-era glibc that does not have this fix. + + Reviewed-by: DJ Delorie + (cherry picked from commit c1560f3f75c0e892b5522c16f91b4e303f677094) + +diff --git a/elf/Makefile b/elf/Makefile +index 479ef766c8f955f2..91fd05c9c0e6084a 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -453,6 +453,9 @@ tests += \ + tst-recursive-tls \ + tst-relsort1 \ + tst-ro-dynamic \ ++ tst-rtld-no-malloc \ ++ tst-rtld-no-malloc-audit \ ++ tst-rtld-no-malloc-preload \ + tst-rtld-run-static \ + tst-single_threaded \ + tst-single_threaded-pthread \ +@@ -3152,3 +3155,9 @@ $(objpfx)tst-dlopen-sgid.out: $(objpfx)tst-dlopen-sgid-mod.so + tst-dlopen-auditdup-ENV = LD_AUDIT=$(objpfx)tst-dlopen-auditdup-auditmod.so + $(objpfx)tst-dlopen-auditdup.out: \ + $(objpfx)tst-dlopen-auditdupmod.so $(objpfx)tst-dlopen-auditdup-auditmod.so ++ ++# Reuse an audit module which provides ample debug logging. ++tst-rtld-no-malloc-audit-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so ++ ++# Any shared object should do. ++tst-rtld-no-malloc-preload-ENV = LD_PRELOAD=$(objpfx)tst-auditmod1.so +diff --git a/elf/dl-support.c b/elf/dl-support.c +index 451932dd03e971b8..ee590edf93824d9b 100644 +--- a/elf/dl-support.c ++++ b/elf/dl-support.c +@@ -338,8 +338,7 @@ _dl_non_dynamic_init (void) + call_function_static_weak (_dl_find_object_init); + + /* Setup relro on the binary itself. */ +- if (_dl_main_map.l_relro_size != 0) +- _dl_protect_relro (&_dl_main_map); ++ _dl_protect_relro (&_dl_main_map); + } + + #ifdef DL_SYSINFO_IMPLEMENTATION +diff --git a/elf/rtld.c b/elf/rtld.c +index 41f8c329772b2b7a..ff938186738e8a87 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -2368,30 +2368,27 @@ dl_main (const ElfW(Phdr) *phdr, + /* Make sure no new search directories have been added. */ + assert (GLRO(dl_init_all_dirs) == GL(dl_all_dirs)); + +- /* Re-relocate ourselves with user-controlled symbol definitions. +- +- We must do this after TLS initialization in case after this +- re-relocation, we might call a user-supplied function +- (e.g. calloc from _dl_relocate_object) that uses TLS data. */ +- + /* Set up the object lookup structures. */ + _dl_find_object_init (); + +- /* The malloc implementation has been relocated, so resolving +- its symbols (and potentially calling IFUNC resolvers) is safe +- at this point. */ +- __rtld_malloc_init_real (main_map); +- + /* Likewise for the locking implementation. */ + __rtld_mutex_init (); + ++ /* Re-relocate ourselves with user-controlled symbol definitions. */ ++ + { + RTLD_TIMING_VAR (start); + rtld_timer_start (&start); + +- /* Mark the link map as not yet relocated again. */ +- GL(dl_rtld_map).l_relocated = 0; +- _dl_relocate_object (&GL(dl_rtld_map), main_map->l_scope, 0, 0); ++ _dl_relocate_object_no_relro (&GL(dl_rtld_map), main_map->l_scope, 0, 0); ++ ++ /* The malloc implementation has been relocated, so resolving ++ its symbols (and potentially calling IFUNC resolvers) is safe ++ at this point. */ ++ __rtld_malloc_init_real (main_map); ++ ++ if (GL(dl_rtld_map).l_relro_size != 0) ++ _dl_protect_relro (&GL(dl_rtld_map)); + + rtld_timer_accum (&relocate_time, start); + } +diff --git a/elf/tst-rtld-no-malloc-audit.c b/elf/tst-rtld-no-malloc-audit.c +new file mode 100644 +index 0000000000000000..a028377ad1fea027 +--- /dev/null ++++ b/elf/tst-rtld-no-malloc-audit.c +@@ -0,0 +1 @@ ++#include "tst-rtld-no-malloc.c" +diff --git a/elf/tst-rtld-no-malloc-preload.c b/elf/tst-rtld-no-malloc-preload.c +new file mode 100644 +index 0000000000000000..a028377ad1fea027 +--- /dev/null ++++ b/elf/tst-rtld-no-malloc-preload.c +@@ -0,0 +1 @@ ++#include "tst-rtld-no-malloc.c" +diff --git a/elf/tst-rtld-no-malloc.c b/elf/tst-rtld-no-malloc.c +new file mode 100644 +index 0000000000000000..5f24d4bd72c4af0c +--- /dev/null ++++ b/elf/tst-rtld-no-malloc.c +@@ -0,0 +1,76 @@ ++/* Test that program loading does not call malloc. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ ++#include ++#include ++ ++static void ++print (const char *s) ++{ ++ const char *end = s + strlen (s); ++ while (s < end) ++ { ++ ssize_t ret = write (STDOUT_FILENO, s, end - s); ++ if (ret <= 0) ++ _exit (2); ++ s += ret; ++ } ++} ++ ++static void __attribute__ ((noreturn)) ++unexpected_call (const char *function) ++{ ++ print ("error: unexpected call to "); ++ print (function); ++ print ("\n"); ++ _exit (1); ++} ++ ++/* These are the malloc functions implement in elf/dl-minimal.c. */ ++ ++void ++free (void *ignored) ++{ ++ unexpected_call ("free"); ++} ++ ++void * ++calloc (size_t ignored1, size_t ignored2) ++{ ++ unexpected_call ("calloc"); ++} ++ ++void * ++malloc (size_t ignored) ++{ ++ unexpected_call ("malloc"); ++} ++ ++void * ++realloc (void *ignored1, size_t ignored2) ++{ ++ unexpected_call ("realloc"); ++} ++ ++int ++main (void) ++{ ++ /* Do not use the test wrapper, to avoid spurious malloc calls from it. */ ++ return 0; ++} diff --git a/glibc-upstream-2.39-240.patch b/glibc-upstream-2.39-240.patch new file mode 100644 index 0000000..aea3c22 --- /dev/null +++ b/glibc-upstream-2.39-240.patch @@ -0,0 +1,77 @@ +commit d21a217fa0199fed74f975b498408abb7606f6fe +Author: Florian Weimer +Date: Tue Sep 3 17:52:47 2024 +0200 + + elf: Update DSO list, write audit log to elf/tst-audit23.out + + After commit 1d5024f4f052c12e404d42d3b5bfe9c3e9fd27c4 + ("support: Build with exceptions and asynchronous unwind tables + [BZ #30587]"), libgcc_s is expected to show up in the DSO + list on 32-bit Arm. Do not update max_objs because vdso is not + tracked (and which is the reason why the test currently passes + even with libgcc_s present). + + Also write the log output from the auditor to standard output, + for easier test debugging. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 4a50fdf8b2c1106b50cd9056b4c6f3a72cdeed5f) + +diff --git a/elf/tst-audit23.c b/elf/tst-audit23.c +index 32e7c8b2a3129c51..dada6bb1f8dabab5 100644 +--- a/elf/tst-audit23.c ++++ b/elf/tst-audit23.c +@@ -85,13 +85,28 @@ do_test (int argc, char *argv[]) + = support_capture_subprogram (spargv[0], spargv); + support_capture_subprocess_check (&result, "tst-audit22", 0, sc_allow_stderr); + ++ { ++ FILE *fp = fmemopen (result.err.buffer, result.err.length, "r"); ++ TEST_VERIFY (fp != NULL); ++ unsigned int line = 0; ++ char *buffer = NULL; ++ size_t buffer_length = 0; ++ puts ("info: *** audit log start ***"); ++ while (xgetline (&buffer, &buffer_length, fp)) ++ printf ("%6u\t%s", ++line, buffer); ++ puts ("info: *** audit log end ***"); ++ free (buffer); ++ xfclose (fp); ++ } ++ + /* The expected la_objopen/la_objclose: + 1. executable + 2. loader + 3. libc.so +- 4. tst-audit23mod.so +- 5. libc.so (LM_ID_NEWLM). +- 6. vdso (optional and ignored). */ ++ 4. libgcc_s.so (one some architectures, for libsupport) ++ 5. tst-audit23mod.so ++ 6. libc.so (LM_ID_NEWLM). ++ vdso (optional and ignored). */ + enum { max_objs = 6 }; + struct la_obj_t + { +@@ -115,8 +130,10 @@ do_test (int argc, char *argv[]) + TEST_VERIFY (out != NULL); + char *buffer = NULL; + size_t buffer_length = 0; ++ unsigned int line = 0; + while (xgetline (&buffer, &buffer_length, out)) + { ++ ++line; + if (startswith (buffer, "la_activity: ")) + { + uintptr_t cookie; +@@ -174,8 +191,8 @@ do_test (int argc, char *argv[]) + if (is_vdso (lname)) + continue; + if (nobjs == max_objs) +- FAIL_EXIT1 ("non expected la_objopen: %s %"PRIxPTR" %ld", +- lname, laddr, lmid); ++ FAIL_EXIT1 ("(line %u) non expected la_objopen: %s %"PRIxPTR" %ld", ++ line, lname, laddr, lmid); + objs[nobjs].lname = lname; + objs[nobjs].laddr = laddr; + objs[nobjs].lmid = lmid; diff --git a/glibc-upstream-2.39-241.patch b/glibc-upstream-2.39-241.patch new file mode 100644 index 0000000..ecdd6c9 --- /dev/null +++ b/glibc-upstream-2.39-241.patch @@ -0,0 +1,35 @@ +commit fef226255dda886d74dc72c5e43dbdde231cc74e +Author: Florian Weimer +Date: Fri Nov 29 15:36:40 2024 +0100 + + elf: Add the endswith function to + + And include for a definition of bool. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit a20bc2f6233a726c7df8eaa332b6e498bd59321f) + +diff --git a/elf/endswith.h b/elf/endswith.h +index c6430c48be0c1071..3954e57f8eff0faa 100644 +--- a/elf/endswith.h ++++ b/elf/endswith.h +@@ -17,6 +17,7 @@ + #ifndef _ENDSWITH_H + #define _ENDSWITH_H + ++#include + #include + + /* Return true if the N bytes at NAME end with with the characters in +@@ -30,4 +31,11 @@ endswithn (const char *name, size_t n, const char *suffix) + strlen (suffix)) == 0); + } + ++/* Same as endswithn, but uses the entire SUBJECT for matching. */ ++static inline bool ++endswith (const char *subject, const char *suffix) ++{ ++ return endswithn (subject, strlen (subject), suffix); ++} ++ + #endif /* _ENDSWITH_H */ diff --git a/glibc-upstream-2.39-242.patch b/glibc-upstream-2.39-242.patch new file mode 100644 index 0000000..e735d12 --- /dev/null +++ b/glibc-upstream-2.39-242.patch @@ -0,0 +1,132 @@ +commit e27601b385fba1f3598168136021764166b819cf +Author: Florian Weimer +Date: Fri Aug 9 15:31:18 2024 +0200 + + elf: Signal la_objopen for the proxy link map in dlmopen (bug 31985) + + Previously, the ld.so link map was silently added to the namespace. + This change produces an auditing event for it. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 8f36b1469677afe37168f9af1b77402d7a70c673) + +diff --git a/elf/dl-load.c b/elf/dl-load.c +index ce8fdea3024359b0..75a7187c649e0202 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -929,6 +929,37 @@ _dl_process_pt_gnu_property (struct link_map *l, int fd, const ElfW(Phdr) *ph) + } + } + ++static void ++_dl_notify_new_object (int mode, Lmid_t nsid, struct link_map *l) ++{ ++ /* Signal that we are going to add new objects. */ ++ struct r_debug *r = _dl_debug_update (nsid); ++ if (r->r_state == RT_CONSISTENT) ++ { ++#ifdef SHARED ++ /* Auditing checkpoint: we are going to add new objects. Since this ++ is called after _dl_add_to_namespace_list the namespace is guaranteed ++ to not be empty. */ ++ if ((mode & __RTLD_AUDIT) == 0) ++ _dl_audit_activity_nsid (nsid, LA_ACT_ADD); ++#endif ++ ++ /* Notify the debugger we have added some objects. We need to ++ call _dl_debug_initialize in a static program in case dynamic ++ linking has not been used before. */ ++ r->r_state = RT_ADD; ++ _dl_debug_state (); ++ LIBC_PROBE (map_start, 2, nsid, r); ++ } ++ else ++ assert (r->r_state == RT_ADD); ++ ++#ifdef SHARED ++ /* Auditing checkpoint: we have a new object. */ ++ if (!GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing) ++ _dl_audit_objopen (l, nsid); ++#endif ++} + + /* Map in the shared object NAME, actually located in REALNAME, and already + opened on FD. */ +@@ -1029,6 +1060,8 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd, + /* Add the map for the mirrored object to the object list. */ + _dl_add_to_namespace_list (l, nsid); + ++ _dl_notify_new_object (mode, nsid, l); ++ + return l; + } + #endif +@@ -1487,33 +1520,7 @@ cannot enable executable stack as shared object requires"); + if (mode & __RTLD_SPROF) + return l; + +- /* Signal that we are going to add new objects. */ +- struct r_debug *r = _dl_debug_update (nsid); +- if (r->r_state == RT_CONSISTENT) +- { +-#ifdef SHARED +- /* Auditing checkpoint: we are going to add new objects. Since this +- is called after _dl_add_to_namespace_list the namespace is guaranteed +- to not be empty. */ +- if ((mode & __RTLD_AUDIT) == 0) +- _dl_audit_activity_nsid (nsid, LA_ACT_ADD); +-#endif +- +- /* Notify the debugger we have added some objects. We need to +- call _dl_debug_initialize in a static program in case dynamic +- linking has not been used before. */ +- r->r_state = RT_ADD; +- _dl_debug_state (); +- LIBC_PROBE (map_start, 2, nsid, r); +- } +- else +- assert (r->r_state == RT_ADD); +- +-#ifdef SHARED +- /* Auditing checkpoint: we have a new object. */ +- if (!GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing) +- _dl_audit_objopen (l, nsid); +-#endif ++ _dl_notify_new_object (mode, nsid, l); + + return l; + } +diff --git a/elf/tst-audit23.c b/elf/tst-audit23.c +index dada6bb1f8dabab5..32759f956a4b3c58 100644 +--- a/elf/tst-audit23.c ++++ b/elf/tst-audit23.c +@@ -17,6 +17,7 @@ + . */ + + #include ++#include + #include + #include + #include +@@ -106,8 +107,9 @@ do_test (int argc, char *argv[]) + 4. libgcc_s.so (one some architectures, for libsupport) + 5. tst-audit23mod.so + 6. libc.so (LM_ID_NEWLM). ++ 7. loader (proxy link map in new namespace) + vdso (optional and ignored). */ +- enum { max_objs = 6 }; ++ enum { max_objs = 7 }; + struct la_obj_t + { + char *lname; +@@ -236,7 +238,9 @@ do_test (int argc, char *argv[]) + + for (size_t i = 0; i < nobjs; i++) + { +- TEST_COMPARE (objs[i].closed, true); ++ /* This subtest currently does not pass because of bug 32065. */ ++ if (! (endswith (objs[i].lname, LD_SO) && objs[i].lmid != LM_ID_BASE)) ++ TEST_COMPARE (objs[i].closed, true); + free (objs[i].lname); + } + diff --git a/glibc-upstream-2.39-243.patch b/glibc-upstream-2.39-243.patch new file mode 100644 index 0000000..277509b --- /dev/null +++ b/glibc-upstream-2.39-243.patch @@ -0,0 +1,77 @@ +commit f407a14ff7788cb6158d91d2cd9850f218e796d3 +Author: Florian Weimer +Date: Fri Aug 9 16:06:40 2024 +0200 + + elf: Call la_objclose for proxy link maps in _dl_fini (bug 32065) + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit c4b160744cb39eca20dc36b39c7fa6e10352706c) + +diff --git a/elf/dl-fini.c b/elf/dl-fini.c +index db996270de65e86a..a1a4c25829471510 100644 +--- a/elf/dl-fini.c ++++ b/elf/dl-fini.c +@@ -69,6 +69,7 @@ _dl_fini (void) + + unsigned int i; + struct link_map *l; ++ struct link_map *proxy_link_map = NULL; + assert (nloaded != 0 || GL(dl_ns)[ns]._ns_loaded == NULL); + for (l = GL(dl_ns)[ns]._ns_loaded, i = 0; l != NULL; l = l->l_next) + /* Do not handle ld.so in secondary namespaces. */ +@@ -84,6 +85,11 @@ _dl_fini (void) + are not dlclose()ed from underneath us. */ + ++l->l_direct_opencount; + } ++ else ++ /* Used below to call la_objclose for the ld.so proxy ++ link map. */ ++ proxy_link_map = l; ++ + assert (ns != LM_ID_BASE || i == nloaded); + assert (ns == LM_ID_BASE || i == nloaded || i == nloaded - 1); + unsigned int nmaps = i; +@@ -122,6 +128,9 @@ _dl_fini (void) + --l->l_direct_opencount; + } + ++ if (proxy_link_map != NULL) ++ _dl_audit_objclose (proxy_link_map); ++ + #ifdef SHARED + _dl_audit_activity_nsid (ns, LA_ACT_CONSISTENT); + #endif +diff --git a/elf/tst-audit23.c b/elf/tst-audit23.c +index 32759f956a4b3c58..78b4ee384cfb0fe9 100644 +--- a/elf/tst-audit23.c ++++ b/elf/tst-audit23.c +@@ -236,13 +236,26 @@ do_test (int argc, char *argv[]) + } + } + ++ Lmid_t lmid_other = LM_ID_NEWLM; ++ unsigned int other_namespace_count = 0; + for (size_t i = 0; i < nobjs; i++) + { +- /* This subtest currently does not pass because of bug 32065. */ +- if (! (endswith (objs[i].lname, LD_SO) && objs[i].lmid != LM_ID_BASE)) +- TEST_COMPARE (objs[i].closed, true); ++ if (objs[i].lmid != LM_ID_BASE) ++ { ++ if (lmid_other == LM_ID_NEWLM) ++ lmid_other = objs[i].lmid; ++ TEST_COMPARE (objs[i].lmid, lmid_other); ++ ++other_namespace_count; ++ if (!(endswith (objs[i].lname, "/" LIBC_SO) ++ || endswith (objs[i].lname, "/" LD_SO))) ++ FAIL ("unexpected object in secondary namespace: %s", ++ objs[i].lname); ++ } ++ TEST_COMPARE (objs[i].closed, true); + free (objs[i].lname); + } ++ /* Both libc.so and ld.so should be present. */ ++ TEST_COMPARE (other_namespace_count, 2); + + /* la_activity(LA_ACT_CONSISTENT) should be the last callback received. + Since only one link map may be not-CONSISTENT at a time, this also diff --git a/glibc-upstream-2.39-244.patch b/glibc-upstream-2.39-244.patch new file mode 100644 index 0000000..8bae168 --- /dev/null +++ b/glibc-upstream-2.39-244.patch @@ -0,0 +1,141 @@ +commit 4c9b1877fde6535efb8bd3ba1888d1ef82c9a663 +Author: Florian Weimer +Date: Tue Sep 3 17:57:46 2024 +0200 + + elf: Reorder audit events in dlcose to match _dl_fini (bug 32066) + + This was discovered after extending elf/tst-audit23 to cover + dlclose of the dlmopen namespace. + + Auditors already experience the new order during process + shutdown (_dl_fini), so no LAV_CURRENT bump or backwards + compatibility code seems necessary. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 495b96e064da605630a23092d1e484ade4bdc093) + +diff --git a/elf/dl-close.c b/elf/dl-close.c +index b6f4daac792b8a90..4c963097f4dc8d79 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -264,6 +264,12 @@ _dl_close_worker (struct link_map *map, bool force) + _dl_catch_exception (NULL, _dl_call_fini, imap); + + #ifdef SHARED ++ /* Auditing checkpoint: we will start deleting objects. ++ This is supposed to happen before la_objclose (see _dl_fini), ++ but only once per non-recursive dlclose call. */ ++ if (!unload_any) ++ _dl_audit_activity_nsid (nsid, LA_ACT_DELETE); ++ + /* Auditing checkpoint: we remove an object. */ + _dl_audit_objclose (imap); + #endif +@@ -424,12 +430,8 @@ _dl_close_worker (struct link_map *map, bool force) + if (!unload_any) + goto out; + +-#ifdef SHARED +- /* Auditing checkpoint: we will start deleting objects. */ +- _dl_audit_activity_nsid (nsid, LA_ACT_DELETE); +-#endif +- +- /* Notify the debugger we are about to remove some loaded objects. */ ++ /* Notify the debugger we are about to remove some loaded objects. ++ LA_ACT_DELETE has already been signalled above for !unload_any. */ + struct r_debug *r = _dl_debug_update (nsid); + r->r_state = RT_DELETE; + _dl_debug_state (); +diff --git a/elf/tst-audit23.c b/elf/tst-audit23.c +index 78b4ee384cfb0fe9..1b76336595fcd301 100644 +--- a/elf/tst-audit23.c ++++ b/elf/tst-audit23.c +@@ -31,16 +31,21 @@ + #include + #include + #include ++#include + + static int restart; ++static int do_dlclose; + #define CMDLINE_OPTIONS \ +- { "restart", no_argument, &restart, 1 }, ++ { "restart", no_argument, &restart, 1 }, \ ++ { "dlclose", no_argument, &do_dlclose, 1 }, \ + + static int + handle_restart (void) + { + xdlopen ("tst-audit23mod.so", RTLD_NOW); +- xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW); ++ void *handle = xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW); ++ if (do_dlclose) ++ xdlclose (handle); + + return 0; + } +@@ -60,8 +65,8 @@ is_vdso (const char *str) + || startswith (str, "linux-vdso"); + } + +-static int +-do_test (int argc, char *argv[]) ++static void ++do_one_test (int argc, char *argv[], bool pass_dlclose_flag) + { + /* We must have either: + - One or four parameters left if called initially: +@@ -69,16 +74,15 @@ do_test (int argc, char *argv[]) + + "--library-path" optional + + the library path optional + + the application name */ +- if (restart) +- return handle_restart (); +- +- char *spargv[9]; ++ char *spargv[10]; + TEST_VERIFY_EXIT (((argc - 1) + 3) < array_length (spargv)); + int i = 0; + for (; i < argc - 1; i++) + spargv[i] = argv[i + 1]; + spargv[i++] = (char *) "--direct"; + spargv[i++] = (char *) "--restart"; ++ if (pass_dlclose_flag) ++ spargv[i++] = (char *) "--dlclose"; + spargv[i] = NULL; + + setenv ("LD_AUDIT", "tst-auditmod23.so", 0); +@@ -146,8 +150,14 @@ do_test (int argc, char *argv[]) + + /* The cookie identifies the object at the head of the link map, + so we only add a new namespace if it changes from the previous +- one. This works since dlmopen is the last in the test body. */ +- if (cookie != last_act_cookie && last_act_cookie != -1) ++ one. This works since dlmopen is the last in the test body. ++ ++ Currently, this does not work as expected because there ++ is no head link map if a namespace is completely deleted. ++ No LA_ACT_CONSISTENT event is generated in that case. ++ See the comment in _dl_audit_activity_nsid and bug 32068. */ ++ if (cookie != last_act_cookie && last_act_cookie != -1 ++ && !pass_dlclose_flag) + TEST_COMPARE (last_act, LA_ACT_CONSISTENT); + + if (this_act == LA_ACT_ADD && acts[nacts] != cookie) +@@ -265,7 +275,16 @@ do_test (int argc, char *argv[]) + + free (buffer); + xfclose (out); ++} ++ ++static int ++do_test (int argc, char *argv[]) ++{ ++ if (restart) ++ return handle_restart (); + ++ do_one_test (argc, argv, false); ++ do_one_test (argc, argv, true); + return 0; + } + diff --git a/glibc-upstream-2.39-245.patch b/glibc-upstream-2.39-245.patch new file mode 100644 index 0000000..79fca89 --- /dev/null +++ b/glibc-upstream-2.39-245.patch @@ -0,0 +1,223 @@ +commit 5f5c411132676d4c5eb171354c51b62baea27493 +Author: Florian Weimer +Date: Tue Jan 7 09:18:07 2025 +0100 + + elf: Second ld.so relocation only if libc.so has been loaded + + Commit 8f8dd904c4a2207699bb666f30acceb5209c8d3f (“elf: + rtld_multiple_ref is always true”) removed some code that happened + to enable compatibility with programs that do not link against + libc.so. Such programs cannot call dlopen or any dynamic linker + functions (except __tls_get_addr), so this is not really useful. + Still ld.so should not crash with a null-pointer dereference + or undefined symbol reference in these cases. + + In the main relocation loop, call _dl_relocate_object unconditionally + because it already checks if the object has been relocated. + + If libc.so was loaded, self-relocate ld.so against it and call + __rtld_mutex_init and __rtld_malloc_init_real to activate the full + implementations. Those are available only if libc.so is there, + so skip these initialization steps if libc.so is absent. Without + libc.so, the global scope can be completely empty. This can cause + ld.so self-relocation to fail because if it uses symbol-based + relocations, which is why the second ld.so self-relocation is not + performed if libc.so is missing. + + The previous concern regarding GOT updates through self-relocation + no longer applies because function pointers are updated + explicitly through __rtld_mutex_init and __rtld_malloc_init_real, + and not through relocation. However, the second ld.so self-relocation + is still delayed, in case there are other symbols being used. + + Fixes commit 8f8dd904c4a2207699bb666f30acceb5209c8d3f (“elf: + rtld_multiple_ref is always true”). + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 706209867f1ba89c458033408d419e92d8055f58) + +diff --git a/elf/Makefile b/elf/Makefile +index 91fd05c9c0e6084a..59de78a5d45bced4 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -3161,3 +3161,20 @@ tst-rtld-no-malloc-audit-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so + + # Any shared object should do. + tst-rtld-no-malloc-preload-ENV = LD_PRELOAD=$(objpfx)tst-auditmod1.so ++ ++# These rules link and run the special elf/tst-nolink-libc-* tests if ++# a port adds them to the tests variables. Neither test variant is ++# linked against libc.so, but tst-nolink-libc-1 is linked against ++# ld.so. The test is always run directly, not under the dynamic ++# linker. ++CFLAGS-tst-nolink-libc.c += $(no-stack-protector) ++$(objpfx)tst-nolink-libc-1: $(objpfx)tst-nolink-libc.o $(objpfx)ld.so ++ $(LINK.o) -nostdlib -nostartfiles -o $@ $< \ ++ -Wl,--dynamic-linker=$(objpfx)ld.so,--no-as-needed $(objpfx)ld.so ++$(objpfx)tst-nolink-libc-1.out: $(objpfx)tst-nolink-libc-1 $(objpfx)ld.so ++ $< > $@ 2>&1; $(evaluate-test) ++$(objpfx)tst-nolink-libc-2: $(objpfx)tst-nolink-libc.o ++ $(LINK.o) -nostdlib -nostartfiles -o $@ $< \ ++ -Wl,--dynamic-linker=$(objpfx)ld.so ++$(objpfx)tst-nolink-libc-2.out: $(objpfx)tst-nolink-libc-2 $(objpfx)ld.so ++ $< > $@ 2>&1; $(evaluate-test) +diff --git a/elf/rtld.c b/elf/rtld.c +index ff938186738e8a87..809fc807989b285e 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -2290,25 +2290,25 @@ dl_main (const ElfW(Phdr) *phdr, + + _rtld_main_check (main_map, _dl_argv[0]); + +- /* Now we have all the objects loaded. Relocate them all except for +- the dynamic linker itself. We do this in reverse order so that copy +- relocs of earlier objects overwrite the data written by later +- objects. We do not re-relocate the dynamic linker itself in this +- loop because that could result in the GOT entries for functions we +- call being changed, and that would break us. It is safe to relocate +- the dynamic linker out of order because it has no copy relocations. +- Likewise for libc, which is relocated early to ensure that IFUNC +- resolvers in libc work. */ ++ /* Now we have all the objects loaded. */ + + int consider_profiling = GLRO(dl_profile) != NULL; + + /* If we are profiling we also must do lazy reloaction. */ + GLRO(dl_lazy) |= consider_profiling; + ++ /* If libc.so has been loaded, relocate it early, after the dynamic ++ loader itself. The initial self-relocation of ld.so should be ++ sufficient for IFUNC resolvers in libc.so. */ + if (GL(dl_ns)[LM_ID_BASE].libc_map != NULL) +- _dl_relocate_object (GL(dl_ns)[LM_ID_BASE].libc_map, +- GL(dl_ns)[LM_ID_BASE].libc_map->l_scope, +- GLRO(dl_lazy) ? RTLD_LAZY : 0, consider_profiling); ++ { ++ RTLD_TIMING_VAR (start); ++ rtld_timer_start (&start); ++ _dl_relocate_object (GL(dl_ns)[LM_ID_BASE].libc_map, ++ GL(dl_ns)[LM_ID_BASE].libc_map->l_scope, ++ GLRO(dl_lazy) ? RTLD_LAZY : 0, consider_profiling); ++ rtld_timer_accum (&relocate_time, start); ++ } + + RTLD_TIMING_VAR (start); + rtld_timer_start (&start); +@@ -2331,9 +2331,8 @@ dl_main (const ElfW(Phdr) *phdr, + /* Also allocated with the fake malloc(). */ + l->l_free_initfini = 0; + +- if (l != &GL(dl_rtld_map)) +- _dl_relocate_object (l, l->l_scope, GLRO(dl_lazy) ? RTLD_LAZY : 0, +- consider_profiling); ++ _dl_relocate_object (l, l->l_scope, GLRO(dl_lazy) ? RTLD_LAZY : 0, ++ consider_profiling); + + /* Add object to slot information data if necessasy. */ + if (l->l_tls_blocksize != 0 && __rtld_tls_init_tp_called) +@@ -2371,27 +2370,22 @@ dl_main (const ElfW(Phdr) *phdr, + /* Set up the object lookup structures. */ + _dl_find_object_init (); + +- /* Likewise for the locking implementation. */ +- __rtld_mutex_init (); +- +- /* Re-relocate ourselves with user-controlled symbol definitions. */ +- +- { +- RTLD_TIMING_VAR (start); +- rtld_timer_start (&start); +- +- _dl_relocate_object_no_relro (&GL(dl_rtld_map), main_map->l_scope, 0, 0); +- +- /* The malloc implementation has been relocated, so resolving +- its symbols (and potentially calling IFUNC resolvers) is safe +- at this point. */ +- __rtld_malloc_init_real (main_map); ++ /* If libc.so was loaded, relocate ld.so against it. Complete ld.so ++ initialization with mutex symbols from libc.so and malloc symbols ++ from the global scope. */ ++ if (GL(dl_ns)[LM_ID_BASE].libc_map != NULL) ++ { ++ RTLD_TIMING_VAR (start); ++ rtld_timer_start (&start); ++ _dl_relocate_object_no_relro (&GL(dl_rtld_map), main_map->l_scope, 0, 0); ++ rtld_timer_accum (&relocate_time, start); + +- if (GL(dl_rtld_map).l_relro_size != 0) +- _dl_protect_relro (&GL(dl_rtld_map)); ++ __rtld_mutex_init (); ++ __rtld_malloc_init_real (main_map); ++ } + +- rtld_timer_accum (&relocate_time, start); +- } ++ /* All ld.so initialization is complete. Apply RELRO. */ ++ _dl_protect_relro (&GL(dl_rtld_map)); + + /* Relocation is complete. Perform early libc initialization. This + is the initial libc, even if audit modules have been loaded with +diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile +index b0daa44b95db3b72..a4b692febb3e87d9 100644 +--- a/sysdeps/unix/sysv/linux/Makefile ++++ b/sysdeps/unix/sysv/linux/Makefile +@@ -657,7 +657,15 @@ install-bin += \ + # install-bin + + $(objpfx)pldd: $(objpfx)xmalloc.o ++ ++test-internal-extras += tst-nolink-libc ++ifeq ($(run-built-tests),yes) ++tests-special += \ ++ $(objpfx)tst-nolink-libc-1.out \ ++ $(objpfx)tst-nolink-libc-2.out \ ++ # tests-special + endif ++endif # $(subdir) == elf + + ifeq ($(subdir),rt) + CFLAGS-mq_send.c += -fexceptions +diff --git a/sysdeps/unix/sysv/linux/arm/Makefile b/sysdeps/unix/sysv/linux/arm/Makefile +index a73c897f43c9a206..e73ce4f81114e789 100644 +--- a/sysdeps/unix/sysv/linux/arm/Makefile ++++ b/sysdeps/unix/sysv/linux/arm/Makefile +@@ -1,5 +1,8 @@ + ifeq ($(subdir),elf) + sysdep-rtld-routines += aeabi_read_tp libc-do-syscall ++# The test uses INTERNAL_SYSCALL_CALL. In thumb mode, this uses ++# an undefined reference to __libc_do_syscall. ++CFLAGS-tst-nolink-libc.c += -marm + endif + + ifeq ($(subdir),misc) +diff --git a/sysdeps/unix/sysv/linux/tst-nolink-libc.c b/sysdeps/unix/sysv/linux/tst-nolink-libc.c +new file mode 100644 +index 0000000000000000..817f37784b4080f9 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/tst-nolink-libc.c +@@ -0,0 +1,25 @@ ++/* Test program not linked against libc.so and not using any glibc functions. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++void ++_start (void) ++{ ++ INTERNAL_SYSCALL_CALL (exit_group, 0); ++} diff --git a/glibc-upstream-2.39-246.patch b/glibc-upstream-2.39-246.patch new file mode 100644 index 0000000..ef0e216 --- /dev/null +++ b/glibc-upstream-2.39-246.patch @@ -0,0 +1,283 @@ +commit 79d84b5da58ee989fdabf34767e1501a4b222194 +Author: Florian Weimer +Date: Fri Mar 7 17:37:50 2025 +0100 + + elf: Fix handling of symbol versions which hash to zero (bug 29190) + + This was found through code inspection. No application impact is + known. + + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 46d31980943d8be2f421c1e3276b265c7552636e) + +diff --git a/elf/Makefile b/elf/Makefile +index 59de78a5d45bced4..10c54be629124a17 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -488,6 +488,7 @@ tests += \ + tst-unique2 \ + tst-unwind-ctor \ + tst-unwind-main \ ++ tst-version-hash-zero \ + unload3 \ + unload4 \ + unload5 \ +@@ -979,6 +980,9 @@ modules-names += \ + tst-unique2mod1 \ + tst-unique2mod2 \ + tst-unwind-ctor-lib \ ++ tst-version-hash-zero-linkmod \ ++ tst-version-hash-zero-mod \ ++ tst-version-hash-zero-refmod \ + unload2dep \ + unload2mod \ + unload3mod1 \ +@@ -3178,3 +3182,20 @@ $(objpfx)tst-nolink-libc-2: $(objpfx)tst-nolink-libc.o + -Wl,--dynamic-linker=$(objpfx)ld.so + $(objpfx)tst-nolink-libc-2.out: $(objpfx)tst-nolink-libc-2 $(objpfx)ld.so + $< > $@ 2>&1; $(evaluate-test) ++ ++$(objpfx)tst-version-hash-zero.out: \ ++ $(objpfx)tst-version-hash-zero-mod.so \ ++ $(objpfx)tst-version-hash-zero-refmod.so ++LDFLAGS-tst-version-hash-zero-mod.so = \ ++ -Wl,--version-script=tst-version-hash-zero-mod.map ++# The run-time test module tst-version-hash-zero-refmod.so is linked ++# to a stub module, tst-version-hash-zero-linkmod.so, to produce an ++# expected relocation error. ++$(objpfx)tst-version-hash-zero-refmod.so: \ ++ $(objpfx)tst-version-hash-zero-linkmod.so ++LDFLAGS-tst-version-hash-zero-linkmod.so = \ ++ -Wl,--version-script=tst-version-hash-zero-linkmod.map \ ++ -Wl,--soname=tst-version-hash-zero-mod.so ++$(objpfx)tst-version-hash-zero-refmod.so: \ ++ $(objpfx)tst-version-hash-zero-linkmod.so ++tst-version-hash-zero-refmod.so-no-z-defs = yes +diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c +index 19ad2a25c5f70326..7a70f1df2d6cf839 100644 +--- a/elf/dl-lookup.c ++++ b/elf/dl-lookup.c +@@ -113,12 +113,22 @@ check_match (const char *const undef_name, + /* We can match the version information or use the + default one if it is not hidden. */ + ElfW(Half) ndx = verstab[symidx] & 0x7fff; +- if ((map->l_versions[ndx].hash != version->hash +- || strcmp (map->l_versions[ndx].name, version->name)) +- && (version->hidden || map->l_versions[ndx].hash +- || (verstab[symidx] & 0x8000))) +- /* It's not the version we want. */ +- return NULL; ++ if (map->l_versions[ndx].hash == version->hash ++ && strcmp (map->l_versions[ndx].name, version->name) == 0) ++ /* This is an exact version match. Return the symbol below. */ ++ ; ++ else ++ { ++ if (!version->hidden ++ && map->l_versions[ndx].name[0] == '\0' ++ && (verstab[symidx] & 0x8000) == 0 ++ && (*num_versions)++ == 0) ++ /* This is the global default version. Store it as a ++ fallback match. */ ++ *versioned_sym = sym; ++ ++ return NULL; ++ } + } + } + else +diff --git a/elf/dl-version.c b/elf/dl-version.c +index 8966d612cc79f0f1..708b1c94ea47d147 100644 +--- a/elf/dl-version.c ++++ b/elf/dl-version.c +@@ -357,6 +357,13 @@ _dl_check_map_versions (struct link_map *map, int verbose, int trace_mode) + ent = (ElfW(Verdef) *) ((char *) ent + ent->vd_next); + } + } ++ ++ /* The empty string has ELF hash zero. This avoids a NULL check ++ before the version string comparison in check_match in ++ dl-lookup.c. */ ++ for (unsigned int i = 0; i < map->l_nversions; ++i) ++ if (map->l_versions[i].name == NULL) ++ map->l_versions[i].name = ""; + } + + /* When there is a DT_VERNEED entry with libc.so on DT_NEEDED, issue +diff --git a/elf/tst-version-hash-zero-linkmod.c b/elf/tst-version-hash-zero-linkmod.c +new file mode 100644 +index 0000000000000000..15e2506d0111bc7e +--- /dev/null ++++ b/elf/tst-version-hash-zero-linkmod.c +@@ -0,0 +1,22 @@ ++/* Stub module for linking tst-version-hash-zero-refmod.so. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++/* The version script assigns a different symbol version for the stub ++ module. Loading the module with the incorrect version is expected ++ to fail. */ ++#include "tst-version-hash-zero-mod.c" +diff --git a/elf/tst-version-hash-zero-linkmod.map b/elf/tst-version-hash-zero-linkmod.map +new file mode 100644 +index 0000000000000000..2dba7c22d7ea7d09 +--- /dev/null ++++ b/elf/tst-version-hash-zero-linkmod.map +@@ -0,0 +1,7 @@ ++Base { ++ local: *; ++}; ++ ++OTHER_VERSION { ++ global: global_variable; ++} Base; +diff --git a/elf/tst-version-hash-zero-mod.c b/elf/tst-version-hash-zero-mod.c +new file mode 100644 +index 0000000000000000..ac6b0dc4a57b5775 +--- /dev/null ++++ b/elf/tst-version-hash-zero-mod.c +@@ -0,0 +1,20 @@ ++/* Test module with a zero version symbol hash. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++/* The symbol version is assigned by version script. */ ++int global_variable; +diff --git a/elf/tst-version-hash-zero-mod.map b/elf/tst-version-hash-zero-mod.map +new file mode 100644 +index 0000000000000000..41eaff79147a8fcd +--- /dev/null ++++ b/elf/tst-version-hash-zero-mod.map +@@ -0,0 +1,13 @@ ++Base { ++ local: *; ++}; ++ ++/* Define the version so that tst-version-hash-zero-refmod.so passes ++ the initial symbol version check. */ ++OTHER_VERSION { ++} Base; ++ ++/* This version string hashes to zero. */ ++PPPPPPPPPPPP { ++ global: global_variable; ++} Base; +diff --git a/elf/tst-version-hash-zero-refmod.c b/elf/tst-version-hash-zero-refmod.c +new file mode 100644 +index 0000000000000000..cd8b3dcef5b82012 +--- /dev/null ++++ b/elf/tst-version-hash-zero-refmod.c +@@ -0,0 +1,23 @@ ++/* Test module that triggers a relocation failure in tst-version-hash-zero. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++/* This is bound to global_variable@@OTHER_VERSION via ++ tst-version-hash-zero-linkmod.so, but at run time, only ++ global_variable@PPPPPPPPPPPP exists. */ ++extern int global_variable; ++int *pointer_variable = &global_variable; +diff --git a/elf/tst-version-hash-zero.c b/elf/tst-version-hash-zero.c +new file mode 100644 +index 0000000000000000..66a0db4f51fa0e10 +--- /dev/null ++++ b/elf/tst-version-hash-zero.c +@@ -0,0 +1,56 @@ ++/* Symbols with version hash zero should not match any version (bug 29190). ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#include ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ void *handle = xdlopen ("tst-version-hash-zero-mod.so", RTLD_NOW); ++ ++ /* This used to crash because some struct r_found_version entries ++ with hash zero did not have valid version strings. */ ++ TEST_VERIFY (xdlvsym (handle, "global_variable", "PPPPPPPPPPPP") != NULL); ++ ++ /* Consistency check. */ ++ TEST_VERIFY (xdlsym (handle, "global_variable") ++ == xdlvsym (handle, "global_variable", "PPPPPPPPPPPP")); ++ ++ /* This symbol version is supposed to be missing. */ ++ TEST_VERIFY (dlvsym (handle, "global_variable", "OTHER_VERSION") == NULL); ++ ++ /* tst-version-hash-zero-refmod.so references ++ global_variable@@OTHER_VERSION and is expected to fail to load. ++ dlvsym sets the hidden flag during lookup. Relocation does not, ++ so this exercises a different failure case. */ ++ TEST_VERIFY_EXIT (dlopen ("tst-version-hash-zero-refmod.so", RTLD_NOW) ++ == NULL); ++ const char *message = dlerror (); ++ if (strstr (message, ++ ": undefined symbol: global_variable, version OTHER_VERSION") ++ == NULL) ++ FAIL_EXIT1 ("unexpected dlopen failure: %s", message); ++ ++ xdlclose (handle); ++ return 0; ++} ++ ++#include diff --git a/glibc-upstream-2.39-247.patch b/glibc-upstream-2.39-247.patch new file mode 100644 index 0000000..c179dc4 --- /dev/null +++ b/glibc-upstream-2.39-247.patch @@ -0,0 +1,217 @@ +commit 24c94ea84e9323dc24ce11f34368531f75eb9a72 +Author: Florian Weimer +Date: Tue Mar 11 15:30:52 2025 +0100 + + elf: Test dlopen (NULL, RTLD_LAZY) from an ELF constructor + + This call must not complete initialization of all shared objects + in the global scope because the ELF constructor which makes the call + likely has not finished initialization. Calling more constructors + at this point would expose those to a partially constructed + dependency. + + This completes the revert of commit 9897ced8e78db5d813166a7ccccfd5a + ("elf: Run constructors on cyclic recursive dlopen (bug 31986)"). + + (cherry picked from commit d604f9c500570e80febfcc6a52b63a002b466f35) + +diff --git a/elf/Makefile b/elf/Makefile +index 10c54be629124a17..a102373793fd16bd 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -415,6 +415,7 @@ tests += \ + tst-dlmopen3 \ + tst-dlmopen4 \ + tst-dlopen-auditdup \ ++ tst-dlopen-constructor-null \ + tst-dlopen-self \ + tst-dlopen-tlsmodid \ + tst-dlopen-tlsreinit1 \ +@@ -865,6 +866,8 @@ modules-names += \ + tst-dlmopen1mod \ + tst-dlopen-auditdup-auditmod \ + tst-dlopen-auditdupmod \ ++ tst-dlopen-constructor-null-mod1 \ ++ tst-dlopen-constructor-null-mod2 \ + tst-dlopen-sgid-mod \ + tst-dlopen-tlsreinitmod1 \ + tst-dlopen-tlsreinitmod2 \ +@@ -3199,3 +3202,9 @@ LDFLAGS-tst-version-hash-zero-linkmod.so = \ + $(objpfx)tst-version-hash-zero-refmod.so: \ + $(objpfx)tst-version-hash-zero-linkmod.so + tst-version-hash-zero-refmod.so-no-z-defs = yes ++ ++$(objpfx)tst-dlopen-constructor-null: \ ++ $(objpfx)tst-dlopen-constructor-null-mod1.so \ ++ $(objpfx)tst-dlopen-constructor-null-mod2.so ++$(objpfx)tst-dlopen-constructor-null-mod2.so: \ ++ $(objpfx)tst-dlopen-constructor-null-mod1.so +diff --git a/elf/dl-open.c b/elf/dl-open.c +index b00f283c42d19adb..80f084d5c838fc1c 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -605,6 +605,16 @@ dl_open_worker_begin (void *a) + if ((mode & RTLD_GLOBAL) && new->l_global == 0) + add_to_global_update (new); + ++ /* It is not possible to run the ELF constructor for the new ++ link map if it has not executed yet: If this dlopen call came ++ from an ELF constructor that has not put that object into a ++ consistent state, completing initialization for the entire ++ scope will expose objects that have this partially ++ constructed object among its dependencies to this ++ inconsistent state. This could happen even with a benign ++ dlopen (NULL, RTLD_LAZY) call from a constructor of an ++ initially loaded shared object. */ ++ + return; + } + +diff --git a/elf/tst-dlopen-constructor-null-mod1.c b/elf/tst-dlopen-constructor-null-mod1.c +new file mode 100644 +index 0000000000000000..70a7a0ad46a1a666 +--- /dev/null ++++ b/elf/tst-dlopen-constructor-null-mod1.c +@@ -0,0 +1,55 @@ ++/* Module calling dlopen (NULL, RTLD_LAZY) to obtain the global scope. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++int mod1_status; ++ ++static void __attribute__ ((constructor)) ++init (void) ++{ ++ puts ("info: tst-dlopen-constructor-null-mod1.so constructor"); ++ ++ void *handle = dlopen (NULL, RTLD_LAZY); ++ if (handle == NULL) ++ { ++ printf ("error: %s\n", dlerror ()); ++ exit (1); ++ } ++ puts ("info: dlopen returned"); ++ if (dlsym (handle, "malloc") != malloc) ++ { ++ puts ("error: dlsym did not produce expected result"); ++ exit (1); ++ } ++ dlclose (handle); ++ ++ /* Check that the second module's constructor has not executed. */ ++ if (getenv ("mod2_status") != NULL) ++ { ++ printf ("error: mod2_status environment variable set: %s\n", ++ getenv ("mod2_status")); ++ exit (1); ++ } ++ ++ /* Communicate to the second module that the constructor executed. */ ++ mod1_status = 1; ++} +diff --git a/elf/tst-dlopen-constructor-null-mod2.c b/elf/tst-dlopen-constructor-null-mod2.c +new file mode 100644 +index 0000000000000000..d6e945beaec04815 +--- /dev/null ++++ b/elf/tst-dlopen-constructor-null-mod2.c +@@ -0,0 +1,37 @@ ++/* Module whose constructor should not be invoked by dlopen (NULL, RTLD_LAZY). ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++extern int mod1_status; ++int mod2_status; ++ ++static void __attribute__ ((constructor)) ++init (void) ++{ ++ printf ("info: tst-dlopen-constructor-null-mod2.so constructor" ++ " (mod1_status=%d)", mod1_status); ++ if (!(mod1_status == 1 && mod2_status == 0)) ++ { ++ puts ("error: mod1_status == 1 && mod2_status == 0 expected"); ++ exit (1); ++ } ++ setenv ("mod2_status", "constructed", 1); ++ mod2_status = 1; ++} +diff --git a/elf/tst-dlopen-constructor-null.c b/elf/tst-dlopen-constructor-null.c +new file mode 100644 +index 0000000000000000..db90643325c5235f +--- /dev/null ++++ b/elf/tst-dlopen-constructor-null.c +@@ -0,0 +1,38 @@ ++/* Verify that dlopen (NULL, RTLD_LAZY) does not complete initialization. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This test mimics what the glvndSetupPthreads function in libglvnd ++ does. */ ++ ++#include ++#include ++ ++/* Defined and initialized in the shared objects. */ ++extern int mod1_status; ++extern int mod2_status; ++ ++static int ++do_test (void) ++{ ++ TEST_COMPARE (mod1_status, 1); ++ TEST_COMPARE (mod2_status, 1); ++ TEST_COMPARE_STRING (getenv ("mod2_status"), "constructed"); ++ return 0; ++} ++ ++#include diff --git a/glibc-upstream-2.39-248.patch b/glibc-upstream-2.39-248.patch new file mode 100644 index 0000000..b270bf2 --- /dev/null +++ b/glibc-upstream-2.39-248.patch @@ -0,0 +1,159 @@ +commit 5601ad79b75f03db3e30fc3358e63c1122985f95 +Author: Florian Weimer +Date: Fri Jul 4 21:46:05 2025 +0200 + + elf: Introduce separate _r_debug_array variable + + It replaces the ns_debug member of the namespaces. Previously, + the base namespace had an unused ns_debug member. + + This change also fixes a concurrency issue: Now _dl_debug_initialize + only updates r_next of the previous namespace's r_debug after the new + r_debug is initialized, so that only the initialized version is + observed. (Client code accessing _r_debug will benefit from load + dependency tracking in CPUs even without explicit barriers.) + + Reviewed-by: H.J. Lu + (cherry picked from commit 7278d11f3a0cd528188c719bab75575b0aea2c6e) + +diff --git a/elf/dl-debug.c b/elf/dl-debug.c +index ef56de7a29565652..5c49fa847e91bd81 100644 +--- a/elf/dl-debug.c ++++ b/elf/dl-debug.c +@@ -30,17 +30,37 @@ extern const int verify_link_map_members[(VERIFY_MEMBER (l_addr) + && VERIFY_MEMBER (l_prev)) + ? 1 : -1]; + ++#ifdef SHARED ++/* r_debug structs for secondary namespaces. The first namespace is ++ handled separately because its r_debug structure must overlap with ++ the public _r_debug symbol, so the first array element corresponds ++ to LM_ID_BASE + 1. See elf/dl-debug-symbols.S. */ ++struct r_debug_extended _r_debug_array[DL_NNS - 1]; ++ ++/* Return the r_debug object for the namespace NS. */ ++static inline struct r_debug_extended * ++get_rdebug (Lmid_t ns) ++{ ++ if (ns == LM_ID_BASE) ++ return &_r_debug_extended; ++ else ++ return &_r_debug_array[ns - 1]; ++} ++#else /* !SHARED */ ++static inline struct r_debug_extended * ++get_rdebug (Lmid_t ns) ++{ ++ return &_r_debug_extended; /* There is just one namespace. */ ++} ++#endif /* !SHARED */ ++ + /* Update the `r_map' member and return the address of `struct r_debug' + of the namespace NS. */ + + struct r_debug * + _dl_debug_update (Lmid_t ns) + { +- struct r_debug_extended *r; +- if (ns == LM_ID_BASE) +- r = &_r_debug_extended; +- else +- r = &GL(dl_ns)[ns]._ns_debug; ++ struct r_debug_extended *r = get_rdebug (ns); + if (r->base.r_map == NULL) + atomic_store_release (&r->base.r_map, + (void *) GL(dl_ns)[ns]._ns_loaded); +@@ -54,34 +74,7 @@ _dl_debug_update (Lmid_t ns) + struct r_debug * + _dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) + { +- struct r_debug_extended *r, **pp = NULL; +- +- if (ns == LM_ID_BASE) +- { +- r = &_r_debug_extended; +- /* Initialize r_version to 1. */ +- if (_r_debug_extended.base.r_version == 0) +- _r_debug_extended.base.r_version = 1; +- } +- else if (DL_NNS > 1) +- { +- r = &GL(dl_ns)[ns]._ns_debug; +- if (r->base.r_brk == 0) +- { +- /* Add the new namespace to the linked list. After a namespace +- is initialized, r_brk becomes non-zero. A namespace becomes +- empty (r_map == NULL) when it is unused. But it is never +- removed from the linked list. */ +- struct r_debug_extended *p; +- for (pp = &_r_debug_extended.r_next; +- (p = *pp) != NULL; +- pp = &p->r_next) +- ; +- +- r->base.r_version = 2; +- } +- } +- ++ struct r_debug_extended *r = get_rdebug (ns); + if (r->base.r_brk == 0) + { + /* Tell the debugger where to find the map of loaded objects. +@@ -89,20 +82,36 @@ _dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) + only once. */ + r->base.r_ldbase = ldbase ?: _r_debug_extended.base.r_ldbase; + r->base.r_brk = (ElfW(Addr)) &_dl_debug_state; +- r->r_next = NULL; ++ ++#ifdef SHARED ++ /* Add the new namespace to the linked list. This assumes that ++ namespaces are allocated in increasing order. After a ++ namespace is initialized, r_brk becomes non-zero. A ++ namespace becomes empty (r_map == NULL) when it is unused. ++ But it is never removed from the linked list. */ ++ ++ if (ns != LM_ID_BASE) ++ { ++ r->base.r_version = 2; ++ if (ns - 1 == LM_ID_BASE) ++ { ++ atomic_store_release (&_r_debug_extended.r_next, r); ++ /* Now there are multiple namespaces. */ ++ atomic_store_release (&_r_debug_extended.base.r_version, 2); ++ } ++ else ++ /* Update r_debug_extended of the previous namespace. */ ++ atomic_store_release (&_r_debug_array[ns - 2].r_next, r); ++ } ++ else ++#endif /* SHARED */ ++ r->base.r_version = 1; + } + + if (r->base.r_map == NULL) + atomic_store_release (&r->base.r_map, + (void *) GL(dl_ns)[ns]._ns_loaded); + +- if (pp != NULL) +- { +- atomic_store_release (pp, r); +- /* Bump r_version to 2 for the new namespace. */ +- atomic_store_release (&_r_debug_extended.base.r_version, 2); +- } +- + return &r->base; + } + +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 91447a5e77c2466d..8b9ae3783056a29f 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -350,8 +350,6 @@ struct rtld_global + size_t n_elements; + void (*free) (void *); + } _ns_unique_sym_table; +- /* Keep track of changes to each namespace' list. */ +- struct r_debug_extended _ns_debug; + } _dl_ns[DL_NNS]; + /* One higher than index of last used namespace. */ + EXTERN size_t _dl_nns; diff --git a/glibc-upstream-2.39-249.patch b/glibc-upstream-2.39-249.patch new file mode 100644 index 0000000..fdfb7a2 --- /dev/null +++ b/glibc-upstream-2.39-249.patch @@ -0,0 +1,149 @@ +commit 97017da5ef946c6d38c252f56c8cb7c205b732fa +Author: Florian Weimer +Date: Fri Jul 4 21:46:16 2025 +0200 + + elf: Introduce _dl_debug_change_state + + It combines updating r_state with the debugger notification. + + The second change to _dl_open introduces an additional debugger + notification for dlmopen, but debuggers are expected to ignore it. + + Reviewed-by: H.J. Lu + (cherry picked from commit 8329939a37f483a16013dd8af8303cbcb86d92cb) + +diff --git a/elf/dl-close.c b/elf/dl-close.c +index 4c963097f4dc8d79..fb27a1231c1c5b66 100644 +--- a/elf/dl-close.c ++++ b/elf/dl-close.c +@@ -433,8 +433,7 @@ _dl_close_worker (struct link_map *map, bool force) + /* Notify the debugger we are about to remove some loaded objects. + LA_ACT_DELETE has already been signalled above for !unload_any. */ + struct r_debug *r = _dl_debug_update (nsid); +- r->r_state = RT_DELETE; +- _dl_debug_state (); ++ _dl_debug_change_state (r, RT_DELETE); + LIBC_PROBE (unmap_start, 2, nsid, r); + + if (unload_global) +@@ -726,8 +725,7 @@ _dl_close_worker (struct link_map *map, bool force) + __rtld_lock_unlock_recursive (GL(dl_load_tls_lock)); + + /* Notify the debugger those objects are finalized and gone. */ +- r->r_state = RT_CONSISTENT; +- _dl_debug_state (); ++ _dl_debug_change_state (r, RT_CONSISTENT); + LIBC_PROBE (unmap_complete, 2, nsid, r); + + #ifdef SHARED +diff --git a/elf/dl-debug.c b/elf/dl-debug.c +index 5c49fa847e91bd81..b3777ffc136469cf 100644 +--- a/elf/dl-debug.c ++++ b/elf/dl-debug.c +@@ -67,6 +67,13 @@ _dl_debug_update (Lmid_t ns) + return &r->base; + } + ++void ++_dl_debug_change_state (struct r_debug *r, int state) ++{ ++ atomic_store_release (&r->r_state, state); ++ _dl_debug_state (); ++} ++ + /* Initialize _r_debug_extended for the namespace NS. LDBASE is the + run-time load address of the dynamic linker, to be put in + _r_debug_extended.r_ldbase. Return the address of _r_debug. */ +diff --git a/elf/dl-load.c b/elf/dl-load.c +index 75a7187c649e0202..8b0890499d66f67a 100644 +--- a/elf/dl-load.c ++++ b/elf/dl-load.c +@@ -947,8 +947,7 @@ _dl_notify_new_object (int mode, Lmid_t nsid, struct link_map *l) + /* Notify the debugger we have added some objects. We need to + call _dl_debug_initialize in a static program in case dynamic + linking has not been used before. */ +- r->r_state = RT_ADD; +- _dl_debug_state (); ++ _dl_debug_change_state (r, RT_ADD); + LIBC_PROBE (map_start, 2, nsid, r); + } + else +diff --git a/elf/dl-open.c b/elf/dl-open.c +index 80f084d5c838fc1c..6f6d3ddbf94c764c 100644 +--- a/elf/dl-open.c ++++ b/elf/dl-open.c +@@ -792,8 +792,7 @@ dl_open_worker (void *a) + #ifdef SHARED + bool was_not_consistent = r->r_state != RT_CONSISTENT; + #endif +- r->r_state = RT_CONSISTENT; +- _dl_debug_state (); ++ _dl_debug_change_state (r, RT_CONSISTENT); + LIBC_PROBE (map_complete, 3, nsid, r, args->map); + + #ifdef SHARED +@@ -871,7 +870,7 @@ no more namespaces available for dlmopen()")); + } + + GL(dl_ns)[nsid].libc_map = NULL; +- _dl_debug_update (nsid)->r_state = RT_CONSISTENT; ++ _dl_debug_change_state (_dl_debug_update (nsid), RT_CONSISTENT); + } + /* Never allow loading a DSO in a namespace which is empty. Such + direct placements is only causing problems. Also don't allow +diff --git a/elf/rtld.c b/elf/rtld.c +index 809fc807989b285e..43bfc7378afc6cc7 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -1850,8 +1850,7 @@ dl_main (const ElfW(Phdr) *phdr, + elf_setup_debug_entry (main_map, r); + + /* We start adding objects. */ +- r->r_state = RT_ADD; +- _dl_debug_state (); ++ _dl_debug_change_state (r, RT_ADD); + LIBC_PROBE (init_start, 2, LM_ID_BASE, r); + + /* Auditing checkpoint: we are ready to signal that the initial map +@@ -2402,8 +2401,7 @@ dl_main (const ElfW(Phdr) *phdr, + /* Notify the debugger all new objects are now ready to go. We must re-get + the address since by now the variable might be in another object. */ + r = _dl_debug_update (LM_ID_BASE); +- r->r_state = RT_CONSISTENT; +- _dl_debug_state (); ++ _dl_debug_change_state (r, RT_CONSISTENT); + LIBC_PROBE (init_complete, 2, LM_ID_BASE, r); + + /* Auditing checkpoint: we have added all objects. */ +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 8b9ae3783056a29f..017406e7fa93c941 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1067,8 +1067,14 @@ extern void _dl_debug_state (void); + rtld_hidden_proto (_dl_debug_state) + + /* Initialize `struct r_debug_extended' for the namespace NS. LDBASE +- is the run-time load address of the dynamic linker, to be put in the +- `r_ldbase' member. Return the address of the structure. */ ++ is the run-time load address of the dynamic linker, to be put in ++ the `r_ldbase' member. ++ ++ Return the address of the r_debug structure for the namespace. ++ This is not merely a convenience or optimization, but it is ++ necessary for the LIBC_PROBE Systemtap/debugger probes to work ++ reliably: direct variable access can create probes that tools ++ cannot consume. */ + extern struct r_debug *_dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) + attribute_hidden; + +@@ -1076,6 +1082,10 @@ extern struct r_debug *_dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) + of the namespace NS. */ + extern struct r_debug *_dl_debug_update (Lmid_t ns) attribute_hidden; + ++/* Update R->r_state to STATE and notify the debugger by calling ++ _dl_debug_state. */ ++void _dl_debug_change_state (struct r_debug *r, int state) attribute_hidden; ++ + /* Initialize the basic data structure for the search paths. SOURCE + is either "LD_LIBRARY_PATH" or "--library-path". + GLIBC_HWCAPS_PREPEND adds additional glibc-hwcaps subdirectories to diff --git a/glibc-upstream-2.39-250.patch b/glibc-upstream-2.39-250.patch new file mode 100644 index 0000000..bede83b --- /dev/null +++ b/glibc-upstream-2.39-250.patch @@ -0,0 +1,252 @@ +commit 5cd1f4b1a1eaf7774821d81bbd0222d80a927db2 +Author: Florian Weimer +Date: Fri Jul 4 21:46:30 2025 +0200 + + elf: Restore support for _r_debug interpositions and copy relocations + + The changes in commit a93d9e03a31ec14405cb3a09aa95413b67067380 + ("Extend struct r_debug to support multiple namespaces [BZ #15971]") + break the dyninst dynamic instrumentation tool. It brings its + own definition of _r_debug (rather than a declaration). + + Furthermore, it turns out it is rather hard to use the proposed + handshake for accessing _r_debug via DT_DEBUG. If applications want + to access _r_debug, they can do so directly if the relevant code has + been built as PIC. To protect against harm from accidental copy + relocations due to linker relaxations, this commit restores copy + relocation support by adjusting both copies if interposition or + copy relocations are in play. Therefore, it is possible to + use a hidden reference in ld.so to access _r_debug. + + Only perform the copy relocation initialization if libc has been + loaded. Otherwise, the ld.so search scope can be empty, and the + lookup of the _r_debug symbol mail fail. + + Reviewed-by: H.J. Lu + (cherry picked from commit ea85e7d55087075376a29261e722e4fae14ecbe7) + +diff --git a/elf/Makefile b/elf/Makefile +index a102373793fd16bd..34933bb15f2d8228 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -414,6 +414,8 @@ tests += \ + tst-dlmopen1 \ + tst-dlmopen3 \ + tst-dlmopen4 \ ++ tst-dlmopen4-nonpic \ ++ tst-dlmopen4-pic \ + tst-dlopen-auditdup \ + tst-dlopen-constructor-null \ + tst-dlopen-self \ +@@ -2062,6 +2064,13 @@ $(objpfx)tst-dlmopen3.out: $(objpfx)tst-dlmopen1mod.so + + $(objpfx)tst-dlmopen4.out: $(objpfx)tst-dlmopen1mod.so + ++CFLAGS-tst-dlmopen4-pic.c += -fPIC ++$(objpfx)tst-dlmopen4-pic.out: $(objpfx)tst-dlmopen1mod.so ++ ++CFLAGS-tst-dlmopen4-nonpic.c += -fno-pie ++tst-dlmopen4-nonpic-no-pie = yes ++$(objpfx)tst-dlmopen4-nonpic.out: $(objpfx)tst-dlmopen1mod.so ++ + $(objpfx)tst-audit1.out: $(objpfx)tst-auditmod1.so + tst-audit1-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so + +diff --git a/elf/dl-debug-symbols.S b/elf/dl-debug-symbols.S +index 4e35adef5de5db33..33f0fc77de503aea 100644 +--- a/elf/dl-debug-symbols.S ++++ b/elf/dl-debug-symbols.S +@@ -38,3 +38,4 @@ + _r_debug: + _r_debug_extended: + .zero R_DEBUG_EXTENDED_SIZE ++rtld_hidden_def (_r_debug) +diff --git a/elf/dl-debug.c b/elf/dl-debug.c +index b3777ffc136469cf..8b513323091402db 100644 +--- a/elf/dl-debug.c ++++ b/elf/dl-debug.c +@@ -16,6 +16,7 @@ + License along with the GNU C Library; if not, see + . */ + ++#include + #include + + +@@ -37,6 +38,37 @@ extern const int verify_link_map_members[(VERIFY_MEMBER (l_addr) + to LM_ID_BASE + 1. See elf/dl-debug-symbols.S. */ + struct r_debug_extended _r_debug_array[DL_NNS - 1]; + ++/* If not null, pointer to the _r_debug in the main executable. */ ++static struct r_debug *_r_debug_main; ++ ++void ++_dl_debug_post_relocate (struct link_map *main_map) ++{ ++ /* Perform a full symbol search in all objects, to maintain ++ compatibility if interposed _r_debug definitions. The lookup ++ cannot fail because there is a definition in ld.so, and this ++ function is only called if the ld.so search scope is not empty. */ ++ const ElfW(Sym) *sym = NULL; ++ lookup_t result =_dl_lookup_symbol_x ("_r_debug", main_map, &sym, ++ main_map->l_scope, NULL, 0, 0, NULL); ++ if (sym->st_size >= sizeof (struct r_debug)) ++ { ++ struct r_debug *main_r_debug = DL_SYMBOL_ADDRESS (result, sym); ++ if (main_r_debug != &_r_debug_extended.base) ++ { ++ /* The extended version of the struct is not available in ++ the main executable because a copy relocation has been ++ used. r_map etc. have already been copied as part of the ++ copy relocation processing. */ ++ main_r_debug->r_version = 1; ++ ++ /* Record that dual updates of the initial link map are ++ required. */ ++ _r_debug_main = main_r_debug; ++ } ++ } ++} ++ + /* Return the r_debug object for the namespace NS. */ + static inline struct r_debug_extended * + get_rdebug (Lmid_t ns) +@@ -71,6 +103,11 @@ void + _dl_debug_change_state (struct r_debug *r, int state) + { + atomic_store_release (&r->r_state, state); ++#ifdef SHARED ++ if (r == &_r_debug_extended.base && _r_debug_main != NULL) ++ /* Update the copy-relocation of _r_debug. */ ++ atomic_store_release (&_r_debug_main->r_state, state); ++#endif + _dl_debug_state (); + } + +@@ -103,7 +140,9 @@ _dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) + if (ns - 1 == LM_ID_BASE) + { + atomic_store_release (&_r_debug_extended.r_next, r); +- /* Now there are multiple namespaces. */ ++ /* Now there are multiple namespaces. Note that this ++ deliberately does not update the copy in the main ++ executable (if it exists). */ + atomic_store_release (&_r_debug_extended.base.r_version, 2); + } + else +@@ -116,8 +155,15 @@ _dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) + } + + if (r->base.r_map == NULL) +- atomic_store_release (&r->base.r_map, +- (void *) GL(dl_ns)[ns]._ns_loaded); ++ { ++ struct link_map_public *l = (void *) GL(dl_ns)[ns]._ns_loaded; ++ atomic_store_release (&r->base.r_map, l); ++#ifdef SHARED ++ if (ns == LM_ID_BASE && _r_debug_main != NULL) ++ /* Update the copy-relocation of _r_debug. */ ++ atomic_store_release (&_r_debug_main->r_map, l); ++#endif ++ } + + return &r->base; + } +diff --git a/elf/rtld.c b/elf/rtld.c +index 43bfc7378afc6cc7..cd790e37f2a323a4 100644 +--- a/elf/rtld.c ++++ b/elf/rtld.c +@@ -2381,6 +2381,9 @@ dl_main (const ElfW(Phdr) *phdr, + + __rtld_mutex_init (); + __rtld_malloc_init_real (main_map); ++ ++ /* Update copy-relocated _r_debug if necessary. */ ++ _dl_debug_post_relocate (main_map); + } + + /* All ld.so initialization is complete. Apply RELRO. */ +diff --git a/elf/tst-dlmopen4-nonpic.c b/elf/tst-dlmopen4-nonpic.c +new file mode 100644 +index 0000000000000000..ad4e40995337f4f9 +--- /dev/null ++++ b/elf/tst-dlmopen4-nonpic.c +@@ -0,0 +1,2 @@ ++#define BUILD_FOR_NONPIC ++#include "tst-dlmopen4.c" +diff --git a/elf/tst-dlmopen4-pic.c b/elf/tst-dlmopen4-pic.c +new file mode 100644 +index 0000000000000000..919fa85c2579fb5d +--- /dev/null ++++ b/elf/tst-dlmopen4-pic.c +@@ -0,0 +1,2 @@ ++#define BUILD_FOR_PIC ++#include "tst-dlmopen4.c" +diff --git a/elf/tst-dlmopen4.c b/elf/tst-dlmopen4.c +index b1c5502621ed433d..9e053fbc59c531ae 100644 +--- a/elf/tst-dlmopen4.c ++++ b/elf/tst-dlmopen4.c +@@ -46,6 +46,15 @@ do_test (void) + TEST_COMPARE (debug->base.r_version, 1); + TEST_VERIFY_EXIT (debug->r_next == NULL); + ++#ifdef BUILD_FOR_PIC ++ /* In a PIC build, using _r_debug directly should give us the same ++ object. */ ++ TEST_VERIFY (&_r_debug == &debug->base); ++#endif ++#ifdef BUILD_FOR_NONPIC ++ TEST_COMPARE (_r_debug.r_version, 1); ++#endif ++ + void *h = xdlmopen (LM_ID_NEWLM, "$ORIGIN/tst-dlmopen1mod.so", + RTLD_LAZY); + +@@ -57,6 +66,19 @@ do_test (void) + const char *name = basename (debug->r_next->base.r_map->l_name); + TEST_COMPARE_STRING (name, "tst-dlmopen1mod.so"); + ++#ifdef BUILD_FOR_NONPIC ++ /* If a copy relocation is used, it must be at version 1. */ ++ if (&_r_debug != &debug->base) ++ { ++ TEST_COMPARE (_r_debug.r_version, 1); ++ TEST_COMPARE ((uintptr_t) _r_debug.r_map, ++ (uintptr_t) debug->base.r_map); ++ TEST_COMPARE (_r_debug.r_brk, debug->base.r_brk); ++ TEST_COMPARE (_r_debug.r_state, debug->base.r_state); ++ TEST_COMPARE (_r_debug.r_ldbase, debug->base.r_ldbase); ++ } ++#endif ++ + xdlclose (h); + + return 0; +diff --git a/include/link.h b/include/link.h +index 5ed445d5a6cdf12d..7ca305f7804442f5 100644 +--- a/include/link.h ++++ b/include/link.h +@@ -365,6 +365,8 @@ struct auditstate + dynamic linker. */ + extern struct r_debug_extended _r_debug_extended attribute_hidden; + ++rtld_hidden_proto (_r_debug) ++ + #if __ELF_NATIVE_CLASS == 32 + # define symbind symbind32 + # define LA_SYMBIND "la_symbind32" +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 017406e7fa93c941..043abd369700ad58 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -1078,6 +1078,10 @@ rtld_hidden_proto (_dl_debug_state) + extern struct r_debug *_dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) + attribute_hidden; + ++/* This is called after relocation processing to handle a potential ++ copy relocation for _r_debug. */ ++void _dl_debug_post_relocate (struct link_map *main_map) attribute_hidden; ++ + /* Update the `r_map' member and return the address of `struct r_debug' + of the namespace NS. */ + extern struct r_debug *_dl_debug_update (Lmid_t ns) attribute_hidden; diff --git a/glibc-upstream-2.39-251.patch b/glibc-upstream-2.39-251.patch new file mode 100644 index 0000000..ede1711 --- /dev/null +++ b/glibc-upstream-2.39-251.patch @@ -0,0 +1,81 @@ +commit cf0e7d512d3c5a5d46da50c0aa023a7f8dc0d560 +Author: Florian Weimer +Date: Mon Jul 28 14:16:52 2025 +0200 + + elf: Compile _dl_debug_state separately (bug 33224) + + This ensures that the compiler will not inline it, so that + debuggers which do not use the Systemtap probes can reliably + set a breakpoint on it. + + Reviewed-by: Andreas K. Huettel + Tested-by: Andreas K. Huettel + (cherry picked from commit 620f0730f311635cd0e175a3ae4d0fc700c76366) + +diff --git a/elf/Makefile b/elf/Makefile +index 34933bb15f2d8228..57726b1034046433 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -57,6 +57,7 @@ dl-routines = \ + dl-close \ + dl-debug \ + dl-debug-symbols \ ++ dl-debug_state \ + dl-deps \ + dl-exception \ + dl-execstack \ +diff --git a/elf/dl-debug.c b/elf/dl-debug.c +index 8b513323091402db..df36d61dcb66dd0f 100644 +--- a/elf/dl-debug.c ++++ b/elf/dl-debug.c +@@ -167,14 +167,3 @@ _dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns) + + return &r->base; + } +- +- +-/* This function exists solely to have a breakpoint set on it by the +- debugger. The debugger is supposed to find this function's address by +- examining the r_brk member of struct r_debug, but GDB 4.15 in fact looks +- for this particular symbol name in the PT_INTERP file. */ +-void +-_dl_debug_state (void) +-{ +-} +-rtld_hidden_def (_dl_debug_state) +diff --git a/elf/dl-debug_state.c b/elf/dl-debug_state.c +new file mode 100644 +index 0000000000000000..40c134a49e2455f3 +--- /dev/null ++++ b/elf/dl-debug_state.c +@@ -0,0 +1,30 @@ ++/* Debugger hook called after dynamic linker updates. ++ Copyright (C) 1996-2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++/* This function exists solely to have a breakpoint set on it by the ++ debugger. The debugger is supposed to find this function's address by ++ examining the r_brk member of struct r_debug, but GDB 4.15 in fact looks ++ for this particular symbol name in the PT_INTERP file. Therefore, ++ this function must not be inlined. */ ++void ++_dl_debug_state (void) ++{ ++} ++rtld_hidden_def (_dl_debug_state) diff --git a/glibc-upstream-2.39-252.patch b/glibc-upstream-2.39-252.patch new file mode 100644 index 0000000..9ef5193 --- /dev/null +++ b/glibc-upstream-2.39-252.patch @@ -0,0 +1,23 @@ +commit 4c2509882fd9768a067ce8cb7cb40394e1cf3862 +Author: Florian Weimer +Date: Mon Aug 18 13:52:02 2025 +0200 + + elf: Preserve _rtld_global layout for the release branch + + Backporting commit 97017da5ef946c6d38c252f56c8cb7c205b732fa + ("elf: Introduce _dl_debug_change_state") removed the + _ns_debug member. Keep it to preseve struct layout. + +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 043abd369700ad58..b4c6e6d2ca7a1fec 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -350,6 +350,8 @@ struct rtld_global + size_t n_elements; + void (*free) (void *); + } _ns_unique_sym_table; ++ /* Keep track of changes to each namespace' list. */ ++ struct r_debug_extended _ns_debug_unused; + } _dl_ns[DL_NNS]; + /* One higher than index of last used namespace. */ + EXTERN size_t _dl_nns; diff --git a/glibc-upstream-2.39-253.patch b/glibc-upstream-2.39-253.patch new file mode 100644 index 0000000..19ae42a --- /dev/null +++ b/glibc-upstream-2.39-253.patch @@ -0,0 +1,1149 @@ +commit 89596f46e3113bd9643e7d5a90e6f7db11259086 +Author: H.J. Lu +Date: Mon Jun 9 05:22:10 2025 +0800 + + i386: Update ___tls_get_addr to preserve vector registers + + Compiler generates the following instruction sequence for dynamic TLS + access: + + leal tls_var@tlsgd(,%ebx,1), %eax + call ___tls_get_addr@PLT + + CALL instruction is transparent to compiler which assumes all registers, + except for EFLAGS, AX, CX, and DX, are unchanged after CALL. But + ___tls_get_addr is a normal function which doesn't preserve any vector + registers. + + 1. Rename the generic __tls_get_addr function to ___tls_get_addr_internal. + 2. Change ___tls_get_addr to a wrapper function with implementations for + FNSAVE, FXSAVE, XSAVE and XSAVEC to save and restore all vector registers. + 3. dl-tlsdesc-dynamic.h has: + + _dl_tlsdesc_dynamic: + /* Like all TLS resolvers, preserve call-clobbered registers. + We need two scratch regs anyway. */ + subl $32, %esp + cfi_adjust_cfa_offset (32) + + It is wrong to use + + movl %ebx, -28(%esp) + movl %esp, %ebx + cfi_def_cfa_register(%ebx) + ... + mov %ebx, %esp + cfi_def_cfa_register(%esp) + movl -28(%esp), %ebx + + to preserve EBX on stack. Fix it with: + + movl %ebx, 28(%esp) + movl %esp, %ebx + cfi_def_cfa_register(%ebx) + ... + mov %ebx, %esp + cfi_def_cfa_register(%esp) + movl 28(%esp), %ebx + + 4. Update _dl_tlsdesc_dynamic to call ___tls_get_addr_internal directly. + 5. Add have-test-mtls-traditional to compile tst-tls23-mod.c with + traditional TLS variant to verify the fix. + 6. Define DL_RUNTIME_RESOLVE_REALIGN_STACK in sysdeps/x86/sysdep.h. + + This fixes BZ #32996. + + Co-Authored-By: Adhemerval Zanella + Signed-off-by: H.J. Lu + Reviewed-by: Adhemerval Zanella + (cherry picked from commit 848f0e46f03f22404ed9a8aabf3fd5ce8809a1be) + +diff --git a/configure b/configure +index c25b93dd0b317e4e..36a492ab5053486e 100755 +--- a/configure ++++ b/configure +@@ -4778,6 +4778,9 @@ with_fp_cond=1 + # A preconfigure script may define another name to TLS descriptor variant + mtls_descriptor=gnu2 + ++# A preconfigure script may define another name to traditional TLS variant ++mtls_traditional=gnu ++ + if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null` + then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5 +@@ -7057,6 +7060,39 @@ printf "%s\n" "$libc_cv_mtls_descriptor" >&6; } + config_vars="$config_vars + have-mtls-descriptor = $libc_cv_mtls_descriptor" + ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for traditional tls support" >&5 ++printf %s "checking for traditional tls support... " >&6; } ++if test ${libc_cv_test_mtls_traditional+y} ++then : ++ printf %s "(cached) " >&6 ++else case e in #( ++ e) cat > conftest.c <&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++then ++ libc_cv_test_mtls_traditional=$mtls_traditional ++else ++ libc_cv_test_mtls_traditional=no ++fi ++rm -f conftest* ;; ++esac ++fi ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_test_mtls_traditional" >&5 ++printf "%s\n" "$libc_cv_test_mtls_traditional" >&6; } ++config_vars="$config_vars ++have-test-mtls-traditional = $libc_cv_test_mtls_traditional" ++ + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if -Wno-ignored-attributes is required for aliases" >&5 + printf %s "checking if -Wno-ignored-attributes is required for aliases... " >&6; } + if test ${libc_cv_wno_ignored_attributes+y} +diff --git a/configure.ac b/configure.ac +index f00fc36f387af09d..c9eb28f7dc8950cc 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -454,6 +454,9 @@ with_fp_cond=1 + # A preconfigure script may define another name to TLS descriptor variant + mtls_descriptor=gnu2 + ++# A preconfigure script may define another name to traditional TLS variant ++mtls_traditional=gnu ++ + dnl Let sysdeps/*/preconfigure act here. + LIBC_PRECONFIGURE([$srcdir], [for sysdeps]) + +@@ -1319,6 +1322,28 @@ rm -f conftest*]) + AC_SUBST(libc_cv_mtls_descriptor) + LIBC_CONFIG_VAR([have-mtls-descriptor], [$libc_cv_mtls_descriptor]) + ++dnl Check if CC supports traditional tls. ++AC_CACHE_CHECK([for traditional tls support], ++ libc_cv_test_mtls_traditional, ++[dnl ++cat > conftest.c <&AS_MESSAGE_LOG_FD]) ++then ++ libc_cv_test_mtls_traditional=$mtls_traditional ++else ++ libc_cv_test_mtls_traditional=no ++fi ++rm -f conftest*]) ++LIBC_CONFIG_VAR([have-test-mtls-traditional], ++ [$libc_cv_test_mtls_traditional]) ++ + dnl clang emits an warning for a double alias redirection, to warn the + dnl original symbol is sed even when weak definition overrides it. + dnl It is a usual pattern for weak_alias, where multiple alias point to +diff --git a/elf/Makefile b/elf/Makefile +index 57726b1034046433..fb35102f827b96cd 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -485,6 +485,7 @@ tests += \ + tst-tls19 \ + tst-tls20 \ + tst-tls21 \ ++ tst-tls23 \ + tst-tlsalign \ + tst-tlsalign-extern \ + tst-tlsgap \ +@@ -956,6 +957,7 @@ modules-names += \ + tst-tls19mod3 \ + tst-tls20mod-bad \ + tst-tls21mod \ ++ tst-tls23-mod \ + tst-tlsalign-lib \ + tst-tlsgap-mod0 \ + tst-tlsgap-mod1 \ +@@ -3136,6 +3138,13 @@ CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=$(have-mtls-descriptor) + CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=$(have-mtls-descriptor) + endif + ++$(objpfx)tst-tls23: $(shared-thread-library) ++$(objpfx)tst-tls23.out: $(objpfx)tst-tls23-mod.so ++ ++ifneq (no,$(have-test-mtls-traditional)) ++CFLAGS-tst-tls23-mod.c += -mtls-dialect=$(have-test-mtls-traditional) ++endif ++ + $(objpfx)tst-recursive-tls: $(objpfx)tst-recursive-tlsmallocmod.so + # More objects than DTV_SURPLUS, to trigger DTV reallocation. + $(objpfx)tst-recursive-tls.out: \ +diff --git a/elf/tst-tls23-mod.c b/elf/tst-tls23-mod.c +new file mode 100644 +index 0000000000000000..3ee4c70e40b3fdc6 +--- /dev/null ++++ b/elf/tst-tls23-mod.c +@@ -0,0 +1,32 @@ ++/* DSO used by tst-tls23. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++__thread struct tls tls_var0 __attribute__ ((visibility ("hidden"))); ++ ++struct tls * ++apply_tls (struct tls *p) ++{ ++ INIT_TLS_CALL (); ++ BEFORE_TLS_CALL (); ++ tls_var0 = *p; ++ struct tls *ret = &tls_var0; ++ AFTER_TLS_CALL (); ++ return ret; ++} +diff --git a/elf/tst-tls23.c b/elf/tst-tls23.c +new file mode 100644 +index 0000000000000000..afe594c067331019 +--- /dev/null ++++ b/elf/tst-tls23.c +@@ -0,0 +1,106 @@ ++/* Test that __tls_get_addr preserves caller-saved registers. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifndef IS_SUPPORTED ++# define IS_SUPPORTED() true ++#endif ++ ++/* An architecture can define it to clobber caller-saved registers in ++ malloc below to verify that __tls_get_addr won't change caller-saved ++ registers. */ ++#ifndef PREPARE_MALLOC ++# define PREPARE_MALLOC() ++#endif ++ ++extern void * __libc_malloc (size_t); ++ ++size_t malloc_counter = 0; ++ ++void * ++malloc (size_t n) ++{ ++ PREPARE_MALLOC (); ++ malloc_counter++; ++ return __libc_malloc (n); ++} ++ ++static void *mod; ++static const char *modname = "tst-tls23-mod.so"; ++ ++static void ++open_mod (void) ++{ ++ mod = xdlopen (modname, RTLD_LAZY); ++ printf ("open %s\n", modname); ++} ++ ++static void ++close_mod (void) ++{ ++ xdlclose (mod); ++ mod = NULL; ++ printf ("close %s\n", modname); ++} ++ ++static void ++access_mod (const char *sym) ++{ ++ struct tls var = { -4, -4, -4, -4 }; ++ struct tls *(*f) (struct tls *) = xdlsym (mod, sym); ++ /* Check that our malloc is called. */ ++ malloc_counter = 0; ++ struct tls *p = f (&var); ++ TEST_VERIFY (malloc_counter != 0); ++ printf ("access %s: %s() = %p\n", modname, sym, p); ++ TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0); ++ ++(p->a); ++} ++ ++static void * ++start (void *arg) ++{ ++ access_mod ("apply_tls"); ++ return arg; ++} ++ ++static int ++do_test (void) ++{ ++ if (!IS_SUPPORTED ()) ++ return EXIT_UNSUPPORTED; ++ ++ open_mod (); ++ pthread_t t = xpthread_create (NULL, start, NULL); ++ xpthread_join (t); ++ close_mod (); ++ ++ return 0; ++} ++ ++#include +diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/elf/tst-tls23.h +similarity index 52% +rename from sysdeps/x86_64/dl-trampoline-save.h +rename to elf/tst-tls23.h +index 84eac4a8ac13ad86..d0e734569c60a47d 100644 +--- a/sysdeps/x86_64/dl-trampoline-save.h ++++ b/elf/tst-tls23.h +@@ -1,5 +1,5 @@ +-/* x86-64 PLT trampoline register save macros. +- Copyright (C) 2024 Free Software Foundation, Inc. ++/* Test that __tls_get_addr preserves caller-saved registers. ++ Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -16,19 +16,25 @@ + License along with the GNU C Library; if not, see + . */ + +-#ifndef DL_STACK_ALIGNMENT +-/* Due to GCC bug: ++#include + +- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 ++struct tls ++{ ++ int64_t a, b, c, d; ++}; + +- __tls_get_addr may be called with 8-byte stack alignment. Although +- this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume +- that stack will be always aligned at 16 bytes. */ +-# define DL_STACK_ALIGNMENT 8 ++extern struct tls *apply_tls (struct tls *); ++ ++/* An architecture can define them to verify that caller-saved registers ++ aren't changed by __tls_get_addr. */ ++#ifndef INIT_TLS_CALL ++# define INIT_TLS_CALL() ++#endif ++ ++#ifndef BEFORE_TLS_CALL ++# define BEFORE_TLS_CALL() + #endif + +-/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align +- stack to 16 bytes before calling _dl_fixup. */ +-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ +- (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ +- || 16 > DL_STACK_ALIGNMENT) ++#ifndef AFTER_TLS_CALL ++# define AFTER_TLS_CALL() ++#endif +diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure +index 19657b627bc84c4e..e1b772c58600c0f2 100644 +--- a/sysdeps/aarch64/preconfigure ++++ b/sysdeps/aarch64/preconfigure +@@ -3,5 +3,6 @@ aarch64*) + base_machine=aarch64 + machine=aarch64 + mtls_descriptor=desc ++ mtls_traditional=trad + ;; + esac +diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile +index a2e8c0b12822be0b..ee6470d78e856315 100644 +--- a/sysdeps/i386/Makefile ++++ b/sysdeps/i386/Makefile +@@ -30,7 +30,9 @@ stack-align-test-flags += -malign-double + endif + + ifeq ($(subdir),elf) +-sysdep-dl-routines += tlsdesc dl-tlsdesc ++sysdep-dl-routines += \ ++ dl-tls-get-addr \ ++# sysdep-dl-routines + + tests += tst-audit3 + modules-names += tst-auditmod3a tst-auditmod3b +diff --git a/sysdeps/i386/dl-tls-get-addr.c b/sysdeps/i386/dl-tls-get-addr.c +new file mode 100644 +index 0000000000000000..c97e5c57beca8caa +--- /dev/null ++++ b/sysdeps/i386/dl-tls-get-addr.c +@@ -0,0 +1,68 @@ ++/* Ifunc selector for ___tls_get_addr. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifdef SHARED ++# define ___tls_get_addr __redirect____tls_get_addr ++# include ++# undef ___tls_get_addr ++# undef __tls_get_addr ++ ++# define SYMBOL_NAME ___tls_get_addr ++# include ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (fnsave) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (fxsave) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (xsave) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (xsavec) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++ const struct cpu_features* cpu_features = __get_cpu_features (); ++ ++ if (cpu_features->xsave_state_size != 0) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) ++ return OPTIMIZE (xsavec); ++ else ++ return OPTIMIZE (xsave); ++ } ++ else if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) ++ return OPTIMIZE (fxsave); ++ return OPTIMIZE (fnsave); ++} ++ ++libc_ifunc_redirected (__redirect____tls_get_addr, ___tls_get_addr, ++ IFUNC_SELECTOR ()); ++ ++/* The special thing about the x86 TLS ABI is that we have two ++ variants of the __tls_get_addr function with different calling ++ conventions. The GNU version, which we are mostly concerned here, ++ takes the parameter in a register. The name is changed by adding ++ an additional underscore at the beginning. The Sun version uses ++ the normal calling convention. */ ++ ++rtld_hidden_proto (___tls_get_addr) ++rtld_hidden_def (___tls_get_addr) ++ ++void * ++__tls_get_addr (tls_index *ti) ++{ ++ return ___tls_get_addr (ti); ++} ++#endif +diff --git a/sysdeps/i386/dl-tls.h b/sysdeps/i386/dl-tls.h +index f17286703d9270ac..2380ec1bd4391de0 100644 +--- a/sysdeps/i386/dl-tls.h ++++ b/sysdeps/i386/dl-tls.h +@@ -29,33 +29,13 @@ typedef struct dl_tls_index + /* This is the prototype for the GNU version. */ + extern void *___tls_get_addr (tls_index *ti) + __attribute__ ((__regparm__ (1))); +-extern void *___tls_get_addr_internal (tls_index *ti) +- __attribute__ ((__regparm__ (1))) attribute_hidden; +- + # if IS_IN (rtld) +-/* The special thing about the x86 TLS ABI is that we have two +- variants of the __tls_get_addr function with different calling +- conventions. The GNU version, which we are mostly concerned here, +- takes the parameter in a register. The name is changed by adding +- an additional underscore at the beginning. The Sun version uses +- the normal calling convention. */ +-void * +-__tls_get_addr (tls_index *ti) +-{ +- return ___tls_get_addr_internal (ti); +-} +- +- + /* Prepare using the definition of __tls_get_addr in the generic + version of this file. */ +-# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr +-strong_alias (___tls_get_addr, ___tls_get_addr_internal) +-rtld_hidden_proto (___tls_get_addr) +-rtld_hidden_def (___tls_get_addr) +-#else +- ++# define __tls_get_addr \ ++ __attribute__ ((__regparm__ (1))) ___tls_get_addr_internal ++# else + /* Users should get the better interface. */ +-# define __tls_get_addr ___tls_get_addr +- ++# define __tls_get_addr ___tls_get_addr + # endif + #endif +diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h +index 36270285775016ff..8a5952421e76d52a 100644 +--- a/sysdeps/i386/dl-tlsdesc-dynamic.h ++++ b/sysdeps/i386/dl-tlsdesc-dynamic.h +@@ -16,34 +16,6 @@ + License along with the GNU C Library; if not, see + . */ + +-#undef REGISTER_SAVE_AREA +- +-#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 +-# error STATE_SAVE_ALIGNMENT must be multiple of 16 +-#endif +- +-#if DL_RUNTIME_RESOLVE_REALIGN_STACK +-# ifdef USE_FNSAVE +-# error USE_FNSAVE shouldn't be defined +-# endif +-# ifdef USE_FXSAVE +-/* Use fxsave to save all registers. */ +-# define REGISTER_SAVE_AREA 512 +-# endif +-#else +-# ifdef USE_FNSAVE +-/* Use fnsave to save x87 FPU stack registers. */ +-# define REGISTER_SAVE_AREA 108 +-# else +-# ifndef USE_FXSAVE +-# error USE_FXSAVE must be defined +-# endif +-/* Use fxsave to save all registers. Add 12 bytes to align the stack +- to 16 bytes. */ +-# define REGISTER_SAVE_AREA (512 + 12) +-# endif +-#endif +- + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,@function +@@ -104,85 +76,7 @@ _dl_tlsdesc_dynamic: + ret + .p2align 4,,7 + 2: +- cfi_adjust_cfa_offset (32) +-#if DL_RUNTIME_RESOLVE_REALIGN_STACK +- movl %ebx, -28(%esp) +- movl %esp, %ebx +- cfi_def_cfa_register(%ebx) +- and $-STATE_SAVE_ALIGNMENT, %esp +-#endif +-#ifdef REGISTER_SAVE_AREA +- subl $REGISTER_SAVE_AREA, %esp +-# if !DL_RUNTIME_RESOLVE_REALIGN_STACK +- cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) +-# endif +-#else +-# if !DL_RUNTIME_RESOLVE_REALIGN_STACK +-# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true +-# endif +- /* Allocate stack space of the required size to save the state. */ +- LOAD_PIC_REG (cx) +- subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp +-#endif +-#ifdef USE_FNSAVE +- fnsave (%esp) +-#elif defined USE_FXSAVE +- fxsave (%esp) +-#else +- /* Save the argument for ___tls_get_addr in EAX. */ +- movl %eax, %ecx +- movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax +- xorl %edx, %edx +- /* Clear the XSAVE Header. */ +-# ifdef USE_XSAVE +- movl %edx, (512)(%esp) +- movl %edx, (512 + 4 * 1)(%esp) +- movl %edx, (512 + 4 * 2)(%esp) +- movl %edx, (512 + 4 * 3)(%esp) +-# endif +- movl %edx, (512 + 4 * 4)(%esp) +- movl %edx, (512 + 4 * 5)(%esp) +- movl %edx, (512 + 4 * 6)(%esp) +- movl %edx, (512 + 4 * 7)(%esp) +- movl %edx, (512 + 4 * 8)(%esp) +- movl %edx, (512 + 4 * 9)(%esp) +- movl %edx, (512 + 4 * 10)(%esp) +- movl %edx, (512 + 4 * 11)(%esp) +- movl %edx, (512 + 4 * 12)(%esp) +- movl %edx, (512 + 4 * 13)(%esp) +- movl %edx, (512 + 4 * 14)(%esp) +- movl %edx, (512 + 4 * 15)(%esp) +-# ifdef USE_XSAVE +- xsave (%esp) +-# else +- xsavec (%esp) +-# endif +- /* Restore the argument for ___tls_get_addr in EAX. */ +- movl %ecx, %eax +-#endif +- call HIDDEN_JUMPTARGET (___tls_get_addr) +- /* Get register content back. */ +-#ifdef USE_FNSAVE +- frstor (%esp) +-#elif defined USE_FXSAVE +- fxrstor (%esp) +-#else +- /* Save and retore ___tls_get_addr return value stored in EAX. */ +- movl %eax, %ecx +- movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax +- xorl %edx, %edx +- xrstor (%esp) +- movl %ecx, %eax +-#endif +-#if DL_RUNTIME_RESOLVE_REALIGN_STACK +- mov %ebx, %esp +- cfi_def_cfa_register(%esp) +- movl -28(%esp), %ebx +- cfi_restore(%ebx) +-#else +- addl $REGISTER_SAVE_AREA, %esp +- cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) +-#endif ++#include "tls-get-addr-wrapper.h" + jmp 1b + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic +diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S +index f002feee56e43f71..dec7049911d65f7d 100644 +--- a/sysdeps/i386/dl-tlsdesc.S ++++ b/sysdeps/i386/dl-tlsdesc.S +@@ -22,23 +22,6 @@ + #include + #include "tlsdesc.h" + +-#ifndef DL_STACK_ALIGNMENT +-/* Due to GCC bug: +- +- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 +- +- __tls_get_addr may be called with 4-byte stack alignment. Although +- this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume +- that stack will be always aligned at 16 bytes. */ +-# define DL_STACK_ALIGNMENT 4 +-#endif +- +-/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align +- stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr. */ +-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ +- (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ +- || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) +- + .text + + /* This function is used to compute the TP offset for symbols in +diff --git a/sysdeps/i386/tls-get-addr-wrapper.h b/sysdeps/i386/tls-get-addr-wrapper.h +new file mode 100644 +index 0000000000000000..0708e5ad1dc9d8ec +--- /dev/null ++++ b/sysdeps/i386/tls-get-addr-wrapper.h +@@ -0,0 +1,127 @@ ++/* Wrapper of i386 ___tls_get_addr to save and restore vector registers. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#undef REGISTER_SAVE_AREA ++ ++#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 ++# error STATE_SAVE_ALIGNMENT must be multiple of 16 ++#endif ++ ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++# ifdef USE_FNSAVE ++# error USE_FNSAVE shouldn't be defined ++# endif ++# ifdef USE_FXSAVE ++/* Use fxsave to save all registers. */ ++# define REGISTER_SAVE_AREA 512 ++# endif ++#else ++# ifdef USE_FNSAVE ++/* Use fnsave to save x87 FPU stack registers. */ ++# define REGISTER_SAVE_AREA 108 ++# else ++# ifndef USE_FXSAVE ++# error USE_FXSAVE must be defined ++# endif ++/* Use fxsave to save all registers. Add 12 bytes to align the stack ++ to 16 bytes. */ ++# define REGISTER_SAVE_AREA (512 + 12) ++# endif ++#endif ++ ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++ movl %ebx, 28(%esp) ++ movl %esp, %ebx ++ cfi_def_cfa_register(%ebx) ++ and $-STATE_SAVE_ALIGNMENT, %esp ++#endif ++#ifdef REGISTER_SAVE_AREA ++ subl $REGISTER_SAVE_AREA, %esp ++# if !DL_RUNTIME_RESOLVE_REALIGN_STACK ++ cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) ++# endif ++#else ++# if !DL_RUNTIME_RESOLVE_REALIGN_STACK ++# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true ++# endif ++ /* Allocate stack space of the required size to save the state. */ ++ LOAD_PIC_REG (cx) ++ subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET \ ++ +XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp ++#endif ++#ifdef USE_FNSAVE ++ fnsave (%esp) ++#elif defined USE_FXSAVE ++ fxsave (%esp) ++#else ++ /* Save the argument for ___tls_get_addr in EAX. */ ++ movl %eax, %ecx ++ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax ++ xorl %edx, %edx ++ /* Clear the XSAVE Header. */ ++# ifdef USE_XSAVE ++ movl %edx, (512)(%esp) ++ movl %edx, (512 + 4 * 1)(%esp) ++ movl %edx, (512 + 4 * 2)(%esp) ++ movl %edx, (512 + 4 * 3)(%esp) ++# endif ++ movl %edx, (512 + 4 * 4)(%esp) ++ movl %edx, (512 + 4 * 5)(%esp) ++ movl %edx, (512 + 4 * 6)(%esp) ++ movl %edx, (512 + 4 * 7)(%esp) ++ movl %edx, (512 + 4 * 8)(%esp) ++ movl %edx, (512 + 4 * 9)(%esp) ++ movl %edx, (512 + 4 * 10)(%esp) ++ movl %edx, (512 + 4 * 11)(%esp) ++ movl %edx, (512 + 4 * 12)(%esp) ++ movl %edx, (512 + 4 * 13)(%esp) ++ movl %edx, (512 + 4 * 14)(%esp) ++ movl %edx, (512 + 4 * 15)(%esp) ++# ifdef USE_XSAVE ++ xsave (%esp) ++# else ++ xsavec (%esp) ++# endif ++ /* Restore the argument for ___tls_get_addr in EAX. */ ++ movl %ecx, %eax ++#endif ++ call ___tls_get_addr_internal ++ /* Get register content back. */ ++#ifdef USE_FNSAVE ++ frstor (%esp) ++#elif defined USE_FXSAVE ++ fxrstor (%esp) ++#else ++ /* Save and retore ___tls_get_addr return value stored in EAX. */ ++ movl %eax, %ecx ++ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax ++ xorl %edx, %edx ++ xrstor (%esp) ++ movl %ecx, %eax ++#endif ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++ mov %ebx, %esp ++ cfi_def_cfa_register(%esp) ++ movl 28(%esp), %ebx ++ cfi_restore(%ebx) ++#else ++ addl $REGISTER_SAVE_AREA, %esp ++ cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) ++#endif ++ ++#undef STATE_SAVE_ALIGNMENT +diff --git a/sysdeps/i386/tls_get_addr.S b/sysdeps/i386/tls_get_addr.S +new file mode 100644 +index 0000000000000000..7d143d8a23b06884 +--- /dev/null ++++ b/sysdeps/i386/tls_get_addr.S +@@ -0,0 +1,57 @@ ++/* Thread-local storage handling in the ELF dynamic linker. i386 version. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++ .text ++#ifdef SHARED ++# define USE_FNSAVE ++# define MINIMUM_ALIGNMENT 4 ++# define STATE_SAVE_ALIGNMENT 4 ++# define ___tls_get_addr _____tls_get_addr_fnsave ++# include "tls_get_addr.h" ++# undef ___tls_get_addr ++# undef MINIMUM_ALIGNMENT ++# undef USE_FNSAVE ++ ++# define MINIMUM_ALIGNMENT 16 ++ ++# define USE_FXSAVE ++# define STATE_SAVE_ALIGNMENT 16 ++# define ___tls_get_addr _____tls_get_addr_fxsave ++# include "tls_get_addr.h" ++# undef ___tls_get_addr ++# undef USE_FXSAVE ++ ++# define USE_XSAVE ++# define STATE_SAVE_ALIGNMENT 64 ++# define ___tls_get_addr _____tls_get_addr_xsave ++# include "tls_get_addr.h" ++# undef ___tls_get_addr ++# undef USE_XSAVE ++ ++# define USE_XSAVEC ++# define STATE_SAVE_ALIGNMENT 64 ++# define ___tls_get_addr _____tls_get_addr_xsavec ++# include "tls_get_addr.h" ++# undef ___tls_get_addr ++# undef USE_XSAVEC ++#endif /* SHARED */ +diff --git a/sysdeps/i386/tls_get_addr.h b/sysdeps/i386/tls_get_addr.h +new file mode 100644 +index 0000000000000000..182579872407cefe +--- /dev/null ++++ b/sysdeps/i386/tls_get_addr.h +@@ -0,0 +1,42 @@ ++/* Thread-local storage handling in the ELF dynamic linker. i386 version. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ .hidden ___tls_get_addr ++ .global ___tls_get_addr ++ .type ___tls_get_addr,@function ++ ++ /* This function is a wrapper of ___tls_get_addr_internal to ++ preserve caller-saved vector registers. */ ++ ++ cfi_startproc ++ .align 16 ++___tls_get_addr: ++ /* Like all TLS resolvers, preserve call-clobbered registers. ++ We need two scratch regs anyway. */ ++ subl $32, %esp ++ cfi_adjust_cfa_offset (32) ++ movl %ecx, 20(%esp) ++ movl %edx, 24(%esp) ++#include "tls-get-addr-wrapper.h" ++ movl 20(%esp), %ecx ++ movl 24(%esp), %edx ++ addl $32, %esp ++ cfi_adjust_cfa_offset (-32) ++ ret ++ cfi_endproc ++ .size ___tls_get_addr, .-___tls_get_addr +diff --git a/sysdeps/loongarch/preconfigure b/sysdeps/loongarch/preconfigure +index dfc7ecfd9e529710..6b015ae5dc649630 100644 +--- a/sysdeps/loongarch/preconfigure ++++ b/sysdeps/loongarch/preconfigure +@@ -43,6 +43,7 @@ loongarch*) + + + base_machine=loongarch ++ mtls_traditional=trad + ;; + esac + +diff --git a/sysdeps/loongarch/preconfigure.ac b/sysdeps/loongarch/preconfigure.ac +index 67e4357013675645..31e9579e63d8255a 100644 +--- a/sysdeps/loongarch/preconfigure.ac ++++ b/sysdeps/loongarch/preconfigure.ac +@@ -41,6 +41,7 @@ loongarch*) + AC_DEFINE_UNQUOTED([LOONGARCH_ABI_FRLEN], [$abi_flen]) + + base_machine=loongarch ++ mtls_traditional=trad + ;; + esac + +diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile +index 5e6cb07ce66decfa..5cdb64f29bac34f3 100644 +--- a/sysdeps/powerpc/Makefile ++++ b/sysdeps/powerpc/Makefile +@@ -28,6 +28,11 @@ tst-cache-ppc-static-dlopen-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(c + $(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so + + $(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so ++ ++# The test checks if the __tls_get_addr does not clobber caller-saved ++# register, so disable the powerpc specific optimization to force a ++# __tls_get_addr call. ++LDFLAGS-tst-tls23-mod.so = -Wl,--no-tls-get-addr-optimize + endif + + ifneq (no,$(multi-arch)) +diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile +index 01b0192ddf5e23ca..f64cee3cd9a13c3e 100644 +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -4,7 +4,13 @@ endif + + ifeq ($(subdir),elf) + sysdep_routines += get-cpuid-feature-leaf +-sysdep-dl-routines += dl-get-cpu-features ++sysdep-dl-routines += \ ++ dl-get-cpu-features \ ++ dl-tlsdesc \ ++ tls_get_addr \ ++ tlsdesc \ ++# sysdep-dl-routines ++ + sysdep_headers += \ + bits/platform/features.h \ + bits/platform/x86.h \ +@@ -113,6 +119,14 @@ $(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \ + $(objpfx)tst-gnu2-tls2mod0.so \ + $(objpfx)tst-gnu2-tls2mod1.so \ + $(objpfx)tst-gnu2-tls2mod2.so ++ ++CFLAGS-tst-tls23.c += -msse2 ++CFLAGS-tst-tls23-mod.c += -msse2 -mtune=haswell ++ ++LDFLAGS-tst-tls23 += -rdynamic ++tst-tls23-mod.so-no-z-defs = yes ++ ++$(objpfx)tst-tls23-mod.so: $(libsupport) + endif + + ifeq ($(subdir),math) +diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h +index 1d6cabd816bf84cc..d5f5ec0eccd242ab 100644 +--- a/sysdeps/x86/sysdep.h ++++ b/sysdeps/x86/sysdep.h +@@ -183,6 +183,29 @@ + + #define atom_text_section .section ".text.atom", "ax" + ++#ifndef DL_STACK_ALIGNMENT ++/* Due to GCC bug: ++ ++ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 ++ ++ __tls_get_addr may be called with 8-byte/4-byte stack alignment. ++ Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't ++ assume that stack will be always aligned at 16 bytes. */ ++# ifdef __x86_64__ ++# define DL_STACK_ALIGNMENT 8 ++# define MINIMUM_ALIGNMENT 16 ++# else ++# define DL_STACK_ALIGNMENT 4 ++# endif ++#endif ++ ++/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for ++ STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling ++ _dl_fixup/__tls_get_addr. */ ++#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ ++ (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ ++ || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) ++ + #endif /* __ASSEMBLER__ */ + + #endif /* _X86_SYSDEP_H */ +diff --git a/sysdeps/x86/tst-tls23.c b/sysdeps/x86/tst-tls23.c +new file mode 100644 +index 0000000000000000..6130d91cf88030dc +--- /dev/null ++++ b/sysdeps/x86/tst-tls23.c +@@ -0,0 +1,22 @@ ++#ifndef __x86_64__ ++#include ++ ++#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) ++#endif ++ ++/* Set XMM0...XMM7 to all 1s. */ ++#define PREPARE_MALLOC() \ ++{ \ ++ asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" ); \ ++ asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" ); \ ++ asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" ); \ ++ asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" ); \ ++ asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" ); \ ++ asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" ); \ ++ asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" ); \ ++ asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" ); \ ++} ++ ++#include ++ ++v2di v1, v2, v3; +diff --git a/sysdeps/x86/tst-tls23.h b/sysdeps/x86/tst-tls23.h +new file mode 100644 +index 0000000000000000..21cee4ca0761967c +--- /dev/null ++++ b/sysdeps/x86/tst-tls23.h +@@ -0,0 +1,35 @@ ++/* Test that __tls_get_addr preserves XMM registers. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++typedef long long v2di __attribute__((vector_size(16))); ++extern v2di v1, v2, v3; ++ ++#define BEFORE_TLS_CALL() \ ++ v1 = __extension__(v2di){0, 0}; \ ++ v2 = __extension__(v2di){0, 0}; ++ ++#define AFTER_TLS_CALL() \ ++ v3 = __extension__(v2di){0, 0}; \ ++ asm volatile ("" : "+x" (v3)); \ ++ union { v2di x; long long a[2]; } u; \ ++ u.x = v3; \ ++ TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0); ++ ++#include +diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile +index 579bb33ada0e5f16..a738e0178220d9ca 100644 +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -41,9 +41,6 @@ ifeq ($(subdir),elf) + CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ + -mno-mmx) + +-sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr +- +-tests += ifuncmain8 + modules-names += ifuncmod8 + + $(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so +diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S +index 057a10862afd6208..586079291af71811 100644 +--- a/sysdeps/x86_64/dl-tlsdesc.S ++++ b/sysdeps/x86_64/dl-tlsdesc.S +@@ -22,7 +22,6 @@ + #include + #include + #include "tlsdesc.h" +-#include "dl-trampoline-save.h" + + /* Area on stack to save and restore registers used for parameter + passing when calling _dl_tlsdesc_dynamic. */ +diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S +index 87c5137837f01a63..4c11fcf032c6544b 100644 +--- a/sysdeps/x86_64/dl-trampoline.S ++++ b/sysdeps/x86_64/dl-trampoline.S +@@ -22,7 +22,6 @@ + #include + #include + #include +-#include "dl-trampoline-save.h" + + /* Area on stack to save and restore registers used for parameter + passing when calling _dl_fixup. */ diff --git a/glibc-upstream-2.39-256.patch b/glibc-upstream-2.39-256.patch new file mode 100644 index 0000000..8226a1c --- /dev/null +++ b/glibc-upstream-2.39-256.patch @@ -0,0 +1,90 @@ +commit 5541edb1bd57414556c8dfe08493ae4b8694e4b4 +Author: H.J. Lu +Date: Mon Aug 18 09:06:48 2025 -0700 + + i386: Also add GLIBC_ABI_GNU2_TLS version [BZ #33129] + + Since the GNU2 TLS run-time bug: + + https://sourceware.org/bugzilla/show_bug.cgi?id=31372 + + affects both i386 and x86-64, also add GLIBC_ABI_GNU2_TLS version to i386 + to indicate the working GNU2 TLS run-time. For x86-64, the additional + GNU2 TLS run-time bug fix is needed for + + https://sourceware.org/bugzilla/show_bug.cgi?id=31501 + + Signed-off-by: H.J. Lu + Reviewed-by: Sam James + (cherry picked from commit bd4628f3f18ac312408782eea450429c6f044860) + +diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile +index f64cee3cd9a13c3e..c814060e08b3ceeb 100644 +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -127,6 +127,15 @@ LDFLAGS-tst-tls23 += -rdynamic + tst-tls23-mod.so-no-z-defs = yes + + $(objpfx)tst-tls23-mod.so: $(libsupport) ++ ++tests-special += $(objpfx)check-gnu2-tls.out ++ ++$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so ++ LC_ALL=C $(READELF) -V -W $< \ ++ | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ ++ | grep GLIBC_ABI_GNU2_TLS > $@; \ ++ $(evaluate-test) ++generated += check-gnu2-tls.out + endif + + ifeq ($(subdir),math) +diff --git a/sysdeps/x86/Versions b/sysdeps/x86/Versions +index 33dbd67b64c3ab5e..06f414bc148340bd 100644 +--- a/sysdeps/x86/Versions ++++ b/sysdeps/x86/Versions +@@ -8,4 +8,9 @@ libc { + GLIBC_2.33 { + __x86_get_cpuid_feature_leaf; + } ++ GLIBC_ABI_GNU2_TLS { ++ # This symbol is used only for empty version map and will be removed ++ # by scripts/versions.awk. ++ __placeholder_only_for_empty_version_map; ++ } + } +diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile +index a738e0178220d9ca..c97b3ac13af248c0 100644 +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -215,15 +215,6 @@ $(objpfx)check-dt-x86-64-plt.out: $(common-objpfx)libc.so + | grep GLIBC_ABI_DT_X86_64_PLT > $@; \ + $(evaluate-test) + generated += check-dt-x86-64-plt.out +- +-tests-special += $(objpfx)check-gnu2-tls.out +- +-$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so +- LC_ALL=C $(READELF) -V -W $< \ +- | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ +- | grep GLIBC_ABI_GNU2_TLS > $@; \ +- $(evaluate-test) +-generated += check-gnu2-tls.out + endif + + test-internal-extras += tst-gnu2-tls2mod1 +diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions +index 0a759029e5a00cf1..6a989ad3b373cdf6 100644 +--- a/sysdeps/x86_64/Versions ++++ b/sysdeps/x86_64/Versions +@@ -5,11 +5,6 @@ libc { + GLIBC_2.13 { + __fentry__; + } +- GLIBC_ABI_GNU2_TLS { +- # This symbol is used only for empty version map and will be removed +- # by scripts/versions.awk. +- __placeholder_only_for_empty_version_map; +- } + GLIBC_ABI_DT_X86_64_PLT { + # This symbol is used only for empty version map and will be removed + # by scripts/versions.awk. diff --git a/glibc-upstream-2.39-257.patch b/glibc-upstream-2.39-257.patch new file mode 100644 index 0000000..01f5fcd --- /dev/null +++ b/glibc-upstream-2.39-257.patch @@ -0,0 +1,60 @@ +commit 83340b35ccb24dcda8b709c4683ab9eade454bd7 +Author: H.J. Lu +Date: Mon Jul 28 12:16:11 2025 -0700 + + i386: Add GLIBC_ABI_GNU_TLS version [BZ #33221] + + On i386, programs and shared libraries with __thread usage may fail + silently at run-time against glibc without the TLS run-time fix for: + + https://sourceware.org/bugzilla/show_bug.cgi?id=32996 + + Add GLIBC_ABI_GNU_TLS version to indicate that glibc has the working + GNU TLS run-time. Linker can add the GLIBC_ABI_GNU_TLS version to + binaries which depend on the working TLS run-time so that such programs + and shared libraries will fail to load and run at run-time against + libc.so without the GLIBC_ABI_GNU_TLS version, instead of fail silently + at random. + + This fixes BZ #33221. + + Signed-off-by: H.J. Lu + Reviewed-by: Sam James + (cherry picked from commit ed1b7a5a489ab555a27fad9c101ebe2e1c1ba881) + +diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile +index ee6470d78e856315..c0c017b899ebccbe 100644 +--- a/sysdeps/i386/Makefile ++++ b/sysdeps/i386/Makefile +@@ -60,6 +60,15 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/i386/tst-ld-sse-use.sh $(objpfx)ld.so + @echo "Checking ld.so for SSE register use. This will take a few seconds..." + $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \ + $(evaluate-test) ++ ++tests-special += $(objpfx)check-gnu-tls.out ++ ++$(objpfx)check-gnu-tls.out: $(common-objpfx)libc.so ++ LC_ALL=C $(READELF) -V -W $< \ ++ | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ ++ | grep GLIBC_ABI_GNU_TLS > $@; \ ++ $(evaluate-test) ++generated += check-gnu-tls.out + else + CFLAGS-.os += $(if $(filter rtld-%.os,$(@F)), $(rtld-CFLAGS)) + endif +diff --git a/sysdeps/i386/Versions b/sysdeps/i386/Versions +index 36e23b466a622f43..9c84c8ef049eb18d 100644 +--- a/sysdeps/i386/Versions ++++ b/sysdeps/i386/Versions +@@ -28,6 +28,11 @@ libc { + GLIBC_2.13 { + __fentry__; + } ++ GLIBC_ABI_GNU_TLS { ++ # This symbol is used only for empty version map and will be removed ++ # by scripts/versions.awk. ++ __placeholder_only_for_empty_version_map; ++ } + } + libm { + GLIBC_2.1 { diff --git a/glibc-upstream-2.39-258.patch b/glibc-upstream-2.39-258.patch new file mode 100644 index 0000000..52a8a0f --- /dev/null +++ b/glibc-upstream-2.39-258.patch @@ -0,0 +1,61 @@ +commit fffc2df8a3e2c8cda2991063d23086360268b777 +Author: Florian Weimer +Date: Fri May 16 19:53:09 2025 +0200 + + Optimize __libc_tsd_* thread variable access + + These variables are not exported, and libc.so TLS is initial-exec + anyway. Declare these variables as hidden and use the initial-exec + TLS model. + + Reviewed-by: Frédéric Bérat + (cherry picked from commit a894f04d877653bea1639fc9a4adf73bd9347bf4) + +diff --git a/include/ctype.h b/include/ctype.h +index ae078a63d355af61..a15e5b66781535d4 100644 +--- a/include/ctype.h ++++ b/include/ctype.h +@@ -29,9 +29,12 @@ libc_hidden_proto (toupper) + # define CTYPE_EXTERN_INLINE extern inline + # endif + +-extern __thread const uint16_t * __libc_tsd_CTYPE_B; +-extern __thread const int32_t * __libc_tsd_CTYPE_TOUPPER; +-extern __thread const int32_t * __libc_tsd_CTYPE_TOLOWER; ++extern __thread const uint16_t * __libc_tsd_CTYPE_B ++ attribute_hidden attribute_tls_model_ie; ++extern __thread const int32_t * __libc_tsd_CTYPE_TOUPPER ++ attribute_hidden attribute_tls_model_ie; ++extern __thread const int32_t * __libc_tsd_CTYPE_TOLOWER ++ attribute_hidden attribute_tls_model_ie; + + + CTYPE_EXTERN_INLINE const uint16_t ** __attribute__ ((const)) +diff --git a/include/rpc/rpc.h b/include/rpc/rpc.h +index 936ea3cebb8101e1..ba967833ad8d8ac3 100644 +--- a/include/rpc/rpc.h ++++ b/include/rpc/rpc.h +@@ -45,7 +45,8 @@ extern void __rpc_thread_key_cleanup (void) attribute_hidden; + + extern void __rpc_thread_destroy (void) attribute_hidden; + +-extern __thread struct rpc_thread_variables *__libc_tsd_RPC_VARS; ++extern __thread struct rpc_thread_variables *__libc_tsd_RPC_VARS ++ attribute_hidden attribute_tls_model_ie; + + #define RPC_THREAD_VARIABLE(x) (__rpc_thread_variables()->x) + +diff --git a/locale/localeinfo.h b/locale/localeinfo.h +index bc8e92e4dca80d62..c3249d371537ad7d 100644 +--- a/locale/localeinfo.h ++++ b/locale/localeinfo.h +@@ -237,7 +237,8 @@ extern struct __locale_struct _nl_global_locale attribute_hidden; + /* This fetches the thread-local locale_t pointer, either one set with + uselocale or &_nl_global_locale. */ + #define _NL_CURRENT_LOCALE __libc_tsd_LOCALE +-extern __thread locale_t __libc_tsd_LOCALE; ++extern __thread locale_t __libc_tsd_LOCALE ++ attribute_hidden attribute_tls_model_ie; + + /* For static linking it is desireable to avoid always linking in the code + and data for every category when we can tell at link time that they are diff --git a/glibc.spec b/glibc.spec index a0c5f07..8626baa 100644 --- a/glibc.spec +++ b/glibc.spec @@ -147,8 +147,8 @@ Version: %{glibcversion} # - It allows using the Release number without the %%dist tag in the dependency # generator to make the generated requires interchangeable between Rawhide # and ELN (.elnYY < .fcXX). -%global baserelease 56 -Release: %{baserelease}%{?dist}.alma.2 +%global baserelease 58 +Release: %{baserelease}%{?dist}.alma.1 # Licenses: # @@ -699,6 +699,53 @@ Patch373: glibc-RHEL-108823-11.patch Patch374: glibc-RHEL-108823-12.patch Patch375: glibc-RHEL-108823-13.patch Patch376: glibc-RHEL-108823-14.patch +# glibc-2.39-212-gb027d5b145 is glibc-RHEL-105324.patch. +Patch377: glibc-upstream-2.39-213.patch +Patch378: glibc-upstream-2.39-214.patch +Patch379: glibc-upstream-2.39-215.patch +Patch380: glibc-upstream-2.39-216.patch +Patch381: glibc-upstream-2.39-217.patch +Patch382: glibc-upstream-2.39-218.patch +Patch383: glibc-upstream-2.39-219.patch +Patch384: glibc-upstream-2.39-220.patch +Patch385: glibc-upstream-2.39-221.patch +Patch386: glibc-upstream-2.39-222.patch +Patch387: glibc-upstream-2.39-223.patch +Patch388: glibc-upstream-2.39-224.patch +Patch389: glibc-upstream-2.39-225.patch +# glibc-2.39-226-g42a8cb7560 is glibc-RHEL-108475-1.patch. +# glibc-2.39-227-gf0e8d04eef is glibc-RHEL-108475-2.patch. +Patch390: glibc-upstream-2.39-228.patch +Patch391: glibc-upstream-2.39-229.patch +Patch392: glibc-upstream-2.39-230.patch +Patch393: glibc-upstream-2.39-231.patch +Patch394: glibc-upstream-2.39-232.patch +Patch395: glibc-upstream-2.39-233.patch +Patch396: glibc-upstream-2.39-234.patch +Patch397: glibc-upstream-2.39-235.patch +Patch398: glibc-upstream-2.39-236.patch +Patch399: glibc-upstream-2.39-237.patch +Patch400: glibc-upstream-2.39-238.patch +Patch401: glibc-upstream-2.39-239.patch +Patch402: glibc-upstream-2.39-240.patch +Patch403: glibc-upstream-2.39-241.patch +Patch404: glibc-upstream-2.39-242.patch +Patch405: glibc-upstream-2.39-243.patch +Patch406: glibc-upstream-2.39-244.patch +Patch407: glibc-upstream-2.39-245.patch +Patch408: glibc-upstream-2.39-246.patch +Patch409: glibc-upstream-2.39-247.patch +Patch410: glibc-upstream-2.39-248.patch +Patch411: glibc-upstream-2.39-249.patch +Patch412: glibc-upstream-2.39-250.patch +Patch413: glibc-upstream-2.39-251.patch +Patch414: glibc-upstream-2.39-252.patch +Patch415: glibc-upstream-2.39-253.patch +# glibc-2.39-254-g3b6c8ea878 is glibc-RHEL-106562-16.patch. +# glibc-2.39-255-g1f17635507 is glibc-RHEL-106562-17.patch. +Patch416: glibc-upstream-2.39-256.patch +Patch417: glibc-upstream-2.39-257.patch +Patch418: glibc-upstream-2.39-258.patch ############################################################################## # Continued list of core "glibc" package information: @@ -716,6 +763,9 @@ Provides: rtld(GNU_HASH) # We need libgcc for cancellation support in POSIX threads. Requires: libgcc%{_isa} +# Encourage the package manager to break the libgcc/glibc dependency +# cycle by installing libgcc first. (This is the historic installation order.) +Requires(pre): libgcc%{_isa} Requires: glibc-common = %{version}-%{release} @@ -2715,9 +2765,31 @@ update_gconv_modules_cache () %endif %changelog -* Wed Aug 20 2025 Eduard Abdullin - 2.39-56.alma.2 +* Fri Aug 22 2025 Eduard Abdullin - 2.39-58.alma.1 - Overwrite target for x86_64_v2 +* Thu Aug 21 2025 Florian Weimer - 2.39-58 +- Use Requires(pre): libgcc%{_isa} to break libgcc cycle (RHEL-110559) + +* Thu Aug 21 2025 Arjun Shankar - 2.39-57 +- Sync with upstream branch release/2.39/master (RHEL-109536) +- Upstream commit: fffc2df8a3e2c8cda2991063d23086360268b777 +- Extend struct r_debug to support multiple namespaces (RHEL-101985) +- Fix a potential crash in the dynamic loader when processing specific + symbol versions (RHEL-109683) +- Signal la_objopen for ld.so with dlmopen (RHEL-109693) +- Switch to main malloc after final ld.so self-relocation (RHEL-109703) +- Prevent ld.so from asserting and crashing during audited library loads + (RHEL-109702) +- x86-64: Provide GLIBC_ABI_DT_X86_64_PLT symbol version (RHEL-109621) +- x86-64: Provide GLIBC_ABI_GNU2_TLS symbol version (RHEL-109625) +- Ensure fallback initialization of ctype TLS data pointers to fix segfaults in + programs using dlmopen or auditors (RHEL-72018) +- Handle load segment gaps in _dl_find_object (RHEL-104854) +- AArch64: Improve codegen in SVE log1p +- AArch64: Optimize inverse trig functions +- AArch64: Avoid memset ifunc in cpu-features.c [BZ #33112] + * Tue Aug 19 2025 Arjun Shankar - 2.39-56 - Add FUSE based tests for fchmod, lstat, and mkstemp (RHEL-108823)