forked from rpms/glibc
f94f56a23b
Upstream commit: 5d7f1bce7d8eea31f4baeb68bcc3124b35acc751 - Apply glibc-benchtests-aarch64.patch to fix an aarch64 build failure. - Drop glibc-rh2244688.patch revert. Fix applied upstream. - Drop glibc-rh2244992.patch, glibc-rh2248915.patch, glibc-rh2248502-3.patch. All applied upstream. - posix: Revert the removal of the crypt prototype from <unistd.h> - elf: Add comments on how LD_AUDIT and LD_PRELOAD handle __libc_enable_secure - elf: Ignore LD_LIBRARY_PATH and debug env var for setuid for static - elf: Remove any_debug from dl_main_state - elf: Remove LD_PROFILE for static binaries - elf: Ignore LD_PROFILE for setuid binaries - s390: Use dl-symbol-redir-ifunc.h on cpu-tunables - x86: Use dl-symbol-redir-ifunc.h on cpu-tunables - elf: Emit warning if tunable is ill-formatted - elf: Fix _dl_debug_vdprintf to work before self-relocation - elf: Do not parse ill-formatted strings - elf: Do not process invalid tunable format - elf: Add all malloc tunable to unsecvars - elf: Ignore GLIBC_TUNABLES for setuid/setgid binaries - elf: Add GLIBC_TUNABLES to unsecvars - elf: Remove /etc/suid-debug support - stdlib: The qsort implementation needs to use heapsort in more cases - stdlib: Handle various corner cases in the fallback heapsort for qsort - stdlib: Avoid another self-comparison in qsort - hurd: fix restarting reauth_dtable on signal - hurd: Prevent the final file_exec_paths call from signals - manual: Fix termios.c example. (Bug 31078) - aarch64: Add vector implementations of expm1 routines - linux: Use fchmodat2 on fchmod for flags different than 0 (BZ 26401) - intl: Add test case for bug 16621 - resolv: free only initialized items from gai pool - ldconfig: Fixes for skipping temporary files. - nptl: Link tst-execstack-threads-mod.so with -z execstack - nptl: Rename tst-execstack to tst-execstack-threads - localedata: Convert oc_FR locale to UTF-8 - localedata: Add information for Occitan - elf: Fix force_first handling in dlclose (bug 30981) - elf: Handle non-directory name in search path (BZ 31035) - New Zealand locales (en_NZ & mi_NZ) first day of week should be Monday - x86: Fix unchecked AVX512-VBMI2 usage in strrchr-evex-base.S - posix: Check pidfd_spawn with tst-spawn7-pid - y2038: Fix support for 64-bit time on legacy ABIs - AArch64: Remove Falkor memcpy - AArch64: Add memset_zva64 - AArch64: Cleanup emag memset - test: Run the tst-tls-allocation-failure-static-patched with test-wrapper. - aarch64: Add vector implementations of log1p routines - aarch64: Add vector implementations of atan2 routines - aarch64: Add vector implementations of atan routines - aarch64: Add vector implementations of acos routines - aarch64: Add vector implementations of asin routines
251 lines
9.0 KiB
Diff
251 lines
9.0 KiB
Diff
Author: Joe Ramsay <Joe.Ramsay@arm.com>
|
|
Date: Tue Nov 21 14:39:39 2023 +0000
|
|
|
|
aarch64: Fix libmvec benchmarks
|
|
|
|
These were broken by the new atan2 functions, as they were only
|
|
set up for univariate functions. Arity is now detected from the
|
|
input file - this revealed a mistake that the double-precision
|
|
inputs were being used for both single- and double-precision
|
|
routines, which is now remedied.
|
|
|
|
diff --git a/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py b/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py
|
|
index 3e124c781065fea9..3661a24044cc9770 100644
|
|
--- a/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py
|
|
+++ b/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py
|
|
@@ -22,40 +22,49 @@ TEMPLATE = """
|
|
#include <math.h>
|
|
#include <arm_neon.h>
|
|
|
|
-#define STRIDE {stride}
|
|
+#define STRIDE {rowlen}
|
|
|
|
-#define CALL_BENCH_FUNC(v, i) (__extension__ ({{ \\
|
|
- {rtype} mx0 = {fname}(vld1q_f{prec_short} (variants[v].in[i].arg0)); \\
|
|
+#define CALL_BENCH_FUNC_1(v, i) (__extension__ ({{ \\
|
|
+ {rtype} mx0 = {fname}(vld1q_f{prec_short} (&variants[v].in->arg0[i * STRIDE])); \\
|
|
mx0; }}))
|
|
|
|
-struct args
|
|
+#define CALL_BENCH_FUNC_2(v, i) (__extension__ ({{ \\
|
|
+ {rtype} mx0 = {fname}(vld1q_f{prec_short} (&variants[v].in->arg0[i * STRIDE]), \\
|
|
+ vld1q_f{prec_short} (&variants[v].in->arg1[i * STRIDE])); \\
|
|
+ mx0; }}))
|
|
+
|
|
+struct args_1
|
|
+{{
|
|
+ {stype} arg0[{nelems}];
|
|
+}};
|
|
+
|
|
+struct args_2
|
|
{{
|
|
- {stype} arg0[STRIDE];
|
|
- double timing;
|
|
+ {stype} arg0[{nelems}];
|
|
+ {stype} arg1[{nelems}];
|
|
}};
|
|
|
|
struct _variants
|
|
{{
|
|
const char *name;
|
|
- int count;
|
|
- const struct args *in;
|
|
+ const struct args_{arity} *in;
|
|
}};
|
|
|
|
-static const struct args in0[{rowcount}] = {{
|
|
+static const struct args_{arity} in0 = {{
|
|
{in_data}
|
|
}};
|
|
|
|
static const struct _variants variants[1] = {{
|
|
- {{"", {rowcount}, in0}},
|
|
+ {{"", &in0}},
|
|
}};
|
|
|
|
#define NUM_VARIANTS 1
|
|
-#define NUM_SAMPLES(i) (variants[i].count)
|
|
+#define NUM_SAMPLES(i) ({nelems} / STRIDE)
|
|
#define VARIANT(i) (variants[i].name)
|
|
|
|
static {rtype} volatile ret;
|
|
|
|
-#define BENCH_FUNC(i, j) ({{ ret = CALL_BENCH_FUNC(i, j); }})
|
|
+#define BENCH_FUNC(i, j) ({{ ret = CALL_BENCH_FUNC_{arity}(i, j); }})
|
|
#define FUNCNAME "{fname}"
|
|
#include <bench-libmvec-skeleton.c>
|
|
"""
|
|
@@ -63,27 +72,34 @@ static {rtype} volatile ret;
|
|
def main(name):
|
|
_, prec, _, func = name.split("-")
|
|
scalar_to_advsimd_type = {"double": "float64x2_t", "float": "float32x4_t"}
|
|
-
|
|
- stride = {"double": 2, "float": 4}[prec]
|
|
+ rowlen = {"double": 2, "float": 4}[prec]
|
|
rtype = scalar_to_advsimd_type[prec]
|
|
atype = scalar_to_advsimd_type[prec]
|
|
- fname = f"_ZGVnN{stride}v_{func}{'f' if prec == 'float' else ''}"
|
|
prec_short = {"double": 64, "float": 32}[prec]
|
|
-
|
|
- with open(f"../benchtests/libmvec/{func}-inputs") as f:
|
|
- in_vals = [l.strip() for l in f.readlines() if l and not l.startswith("#")]
|
|
- in_vals = [in_vals[i:i+stride] for i in range(0, len(in_vals), stride)]
|
|
- rowcount= len(in_vals)
|
|
- in_data = ",\n".join("{{" + ", ".join(row) + "}, 0}" for row in in_vals)
|
|
-
|
|
- print(TEMPLATE.format(stride=stride,
|
|
+ input_filename = {"double": f"{func}-inputs", "float": f"{func}f-inputs"}[prec]
|
|
+
|
|
+ with open(f"../benchtests/libmvec/{input_filename}") as f:
|
|
+ input_file = f.readlines()
|
|
+ in_vals = (l.strip() for l in input_file if l and not l.startswith("#"))
|
|
+ # Split in case of multivariate signature
|
|
+ in_vals = (l.split(", ") for l in in_vals)
|
|
+ # Transpose
|
|
+ in_vals = list(zip(*in_vals))
|
|
+ in_data = ",\n".join("{" + (", ".join(val for val in col) + "}")
|
|
+ for col in in_vals)
|
|
+
|
|
+ arity = [l for l in input_file if l.startswith("## args: ")][0].count(prec)
|
|
+ fname = f"_ZGVnN{rowlen}{'v' * arity}_{func}{'f' if prec == 'float' else ''}"
|
|
+
|
|
+ print(TEMPLATE.format(rowlen=rowlen,
|
|
rtype=rtype,
|
|
atype=atype,
|
|
fname=fname,
|
|
prec_short=prec_short,
|
|
in_data=in_data,
|
|
- rowcount=rowcount,
|
|
- stype=prec))
|
|
+ stype=prec,
|
|
+ arity=arity,
|
|
+ nelems=len(in_vals[0])))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
diff --git a/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py b/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py
|
|
index 66f2c8e0f465f9ce..5d9332be9c5a536a 100755
|
|
--- a/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py
|
|
+++ b/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py
|
|
@@ -22,46 +22,55 @@ TEMPLATE = """
|
|
#include <math.h>
|
|
#include <arm_sve.h>
|
|
|
|
-#define MAX_STRIDE {max_stride}
|
|
#define STRIDE {stride}
|
|
#define PTRUE svptrue_b{prec_short}
|
|
#define SV_LOAD svld1_f{prec_short}
|
|
#define SV_STORE svst1_f{prec_short}
|
|
#define REQUIRE_SVE
|
|
|
|
-#define CALL_BENCH_FUNC(v, i) (__extension__ ({{ \\
|
|
- {rtype} mx0 = {fname}(SV_LOAD (PTRUE(), variants[v].in[i].arg0), PTRUE()); \\
|
|
+#define CALL_BENCH_FUNC_1(v, i) (__extension__ ({{ \\
|
|
+ {rtype} mx0 = {fname}(SV_LOAD (PTRUE(), &variants[v].in->arg0[i * STRIDE]), PTRUE()); \\
|
|
mx0; }}))
|
|
|
|
-struct args
|
|
+#define CALL_BENCH_FUNC_2(v, i) (__extension__ ({{ \\
|
|
+ {rtype} mx0 = {fname}(SV_LOAD (PTRUE(), &variants[v].in->arg0[i * STRIDE]), \\
|
|
+ SV_LOAD (PTRUE(), &variants[v].in->arg1[i * STRIDE]), \\
|
|
+ PTRUE()); \\
|
|
+ mx0; }}))
|
|
+
|
|
+struct args_1
|
|
{{
|
|
- {stype} arg0[MAX_STRIDE];
|
|
- double timing;
|
|
+ {stype} arg0[{nelems}];
|
|
+}};
|
|
+
|
|
+struct args_2
|
|
+{{
|
|
+ {stype} arg0[{nelems}];
|
|
+ {stype} arg1[{nelems}];
|
|
}};
|
|
|
|
struct _variants
|
|
{{
|
|
const char *name;
|
|
- int count;
|
|
- const struct args *in;
|
|
+ const struct args_{arity} *in;
|
|
}};
|
|
|
|
-static const struct args in0[{rowcount}] = {{
|
|
+static const struct args_{arity} in0 = {{
|
|
{in_data}
|
|
}};
|
|
|
|
static const struct _variants variants[1] = {{
|
|
- {{"", {rowcount}, in0}},
|
|
+ {{"", &in0}},
|
|
}};
|
|
|
|
#define NUM_VARIANTS 1
|
|
-#define NUM_SAMPLES(i) (variants[i].count)
|
|
+#define NUM_SAMPLES(i) ({nelems} / STRIDE)
|
|
#define VARIANT(i) (variants[i].name)
|
|
|
|
// Cannot pass volatile pointer to svst1. This still does not appear to get optimised out.
|
|
-static {stype} /*volatile*/ ret[MAX_STRIDE];
|
|
+static {stype} /*volatile*/ ret[{rowlen}];
|
|
|
|
-#define BENCH_FUNC(i, j) ({{ SV_STORE(PTRUE(), ret, CALL_BENCH_FUNC(i, j)); }})
|
|
+#define BENCH_FUNC(i, j) ({{ SV_STORE(PTRUE(), ret, CALL_BENCH_FUNC_{arity}(i, j)); }})
|
|
#define FUNCNAME "{fname}"
|
|
#include <bench-libmvec-skeleton.c>
|
|
"""
|
|
@@ -69,23 +78,29 @@ static {stype} /*volatile*/ ret[MAX_STRIDE];
|
|
def main(name):
|
|
_, prec, _, func = name.split("-")
|
|
scalar_to_sve_type = {"double": "svfloat64_t", "float": "svfloat32_t"}
|
|
-
|
|
stride = {"double": "svcntd()", "float": "svcntw()"}[prec]
|
|
rtype = scalar_to_sve_type[prec]
|
|
atype = scalar_to_sve_type[prec]
|
|
- fname = f"_ZGVsMxv_{func}{'f' if prec == 'float' else ''}"
|
|
prec_short = {"double": 64, "float": 32}[prec]
|
|
# Max SVE vector length is 2048 bits. To ensure benchmarks are
|
|
# vector-length-agnostic, but still use as wide vectors as
|
|
# possible on any given target, divide input data into 2048-bit
|
|
# rows, then load/store as many elements as the target will allow.
|
|
- max_stride = 2048 // prec_short
|
|
-
|
|
- with open(f"../benchtests/libmvec/{func}-inputs") as f:
|
|
- in_vals = [l.strip() for l in f.readlines() if l and not l.startswith("#")]
|
|
- in_vals = [in_vals[i:i+max_stride] for i in range(0, len(in_vals), max_stride)]
|
|
- rowcount= len(in_vals)
|
|
- in_data = ",\n".join("{{" + ", ".join(row) + "}, 0}" for row in in_vals)
|
|
+ rowlen = {"double": 32, "float": 64}[prec]
|
|
+ input_filename = {"double": f"{func}-inputs", "float": f"{func}f-inputs"}[prec]
|
|
+
|
|
+ with open(f"../benchtests/libmvec/{input_filename}") as f:
|
|
+ input_file = f.readlines()
|
|
+ in_vals = (l.strip() for l in input_file if l and not l.startswith("#"))
|
|
+ # Split in case of multivariate signature
|
|
+ in_vals = (l.split(", ") for l in in_vals)
|
|
+ # Transpose
|
|
+ in_vals = list(zip(*in_vals))
|
|
+ in_data = ",\n".join("{" + (", ".join(val for val in col) + "}")
|
|
+ for col in in_vals)
|
|
+
|
|
+ arity = [l for l in input_file if l.startswith("## args: ")][0].count(prec)
|
|
+ fname = f"_ZGVsMx{'v' * arity}_{func}{'f' if prec == 'float' else ''}"
|
|
|
|
print(TEMPLATE.format(stride=stride,
|
|
rtype=rtype,
|
|
@@ -93,9 +108,10 @@ def main(name):
|
|
fname=fname,
|
|
prec_short=prec_short,
|
|
in_data=in_data,
|
|
- rowcount=rowcount,
|
|
stype=prec,
|
|
- max_stride=max_stride))
|
|
+ rowlen=rowlen,
|
|
+ arity=arity,
|
|
+ nelems=len(in_vals[0])))
|
|
|
|
|
|
if __name__ == "__main__":
|