glibc/glibc-benchtests-aarch64.patch

Author: Joe Ramsay <Joe.Ramsay@arm.com>
Date:   Tue Nov 21 14:39:39 2023 +0000

    aarch64: Fix libmvec benchmarks
    
    These were broken by the new atan2 functions, as they were only
    set up for univariate functions. Arity is now detected from the
    input file - this revealed a mistake that the double-precision
    inputs were being used for both single- and double-precision
    routines, which is now remedied.

diff --git a/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py b/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py
index 3e124c781065fea9..3661a24044cc9770 100644
--- a/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py
+++ b/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py
@@ -22,40 +22,49 @@ TEMPLATE = """
 #include <math.h>
 #include <arm_neon.h>
 
-#define STRIDE {stride}
+#define STRIDE {rowlen}
 
-#define CALL_BENCH_FUNC(v, i) (__extension__ ({{                         \\
-   {rtype} mx0 = {fname}(vld1q_f{prec_short} (variants[v].in[i].arg0));  \\
+#define CALL_BENCH_FUNC_1(v, i) (__extension__ ({{                                 \\
+   {rtype} mx0 = {fname}(vld1q_f{prec_short} (&variants[v].in->arg0[i * STRIDE])); \\
    mx0; }}))
 
-struct args
+#define CALL_BENCH_FUNC_2(v, i) (__extension__ ({{                                 \\
+   {rtype} mx0 = {fname}(vld1q_f{prec_short} (&variants[v].in->arg0[i * STRIDE]),  \\
+                         vld1q_f{prec_short} (&variants[v].in->arg1[i * STRIDE])); \\
+   mx0; }}))
+
+struct args_1
+{{
+  {stype} arg0[{nelems}];
+}};
+
+struct args_2
 {{
-  {stype} arg0[STRIDE];
-  double timing;
+  {stype} arg0[{nelems}];
+  {stype} arg1[{nelems}];
 }};
 
 struct _variants
 {{
   const char *name;
-  int count;
-  const struct args *in;
+  const struct args_{arity} *in;
 }};
 
-static const struct args in0[{rowcount}] = {{
+static const struct args_{arity} in0 = {{
 {in_data}
 }};
 
 static const struct _variants variants[1] = {{
-  {{"", {rowcount}, in0}},
+  {{"", &in0}},
 }};
 
 #define NUM_VARIANTS 1
-#define NUM_SAMPLES(i) (variants[i].count)
+#define NUM_SAMPLES(i) ({nelems} / STRIDE)
 #define VARIANT(i) (variants[i].name)
 
 static {rtype} volatile ret;
 
-#define BENCH_FUNC(i, j) ({{ ret = CALL_BENCH_FUNC(i, j); }})
+#define BENCH_FUNC(i, j) ({{ ret = CALL_BENCH_FUNC_{arity}(i, j); }})
 #define FUNCNAME "{fname}"
 #include <bench-libmvec-skeleton.c>
 """
@@ -63,27 +72,34 @@ static {rtype} volatile ret;
 def main(name):
     _, prec, _, func = name.split("-")
     scalar_to_advsimd_type = {"double": "float64x2_t", "float": "float32x4_t"}
-
-    stride = {"double": 2, "float": 4}[prec]
+    rowlen = {"double": 2, "float": 4}[prec]
     rtype = scalar_to_advsimd_type[prec]
     atype = scalar_to_advsimd_type[prec]
-    fname = f"_ZGVnN{stride}v_{func}{'f' if prec == 'float' else ''}"
     prec_short = {"double": 64, "float": 32}[prec]
-
-    with open(f"../benchtests/libmvec/{func}-inputs") as f:
-        in_vals = [l.strip() for l in f.readlines() if l and not l.startswith("#")]
-    in_vals = [in_vals[i:i+stride] for i in range(0, len(in_vals), stride)]
-    rowcount= len(in_vals)
-    in_data = ",\n".join("{{" + ", ".join(row) + "}, 0}" for row in in_vals)
-
-    print(TEMPLATE.format(stride=stride,
+    input_filename = {"double": f"{func}-inputs", "float": f"{func}f-inputs"}[prec]
+
+    with open(f"../benchtests/libmvec/{input_filename}") as f:
+        input_file = f.readlines()
+    in_vals = (l.strip() for l in input_file if l and not l.startswith("#"))
+    # Split in case of multivariate signature
+    in_vals = (l.split(", ") for l in in_vals)
+    # Transpose
+    in_vals = list(zip(*in_vals))
+    in_data = ",\n".join("{" + (", ".join(val for val in col) + "}")
+                         for col in in_vals)
+
+    arity = [l for l in input_file if l.startswith("## args: ")][0].count(prec)
+    fname = f"_ZGVnN{rowlen}{'v' * arity}_{func}{'f' if prec == 'float' else ''}"
+
+    print(TEMPLATE.format(rowlen=rowlen,
                           rtype=rtype,
                           atype=atype,
                           fname=fname,
                           prec_short=prec_short,
                           in_data=in_data,
-                          rowcount=rowcount,
-                          stype=prec))
+                          stype=prec,
+                          arity=arity,
+                          nelems=len(in_vals[0])))
 
 
 if __name__ == "__main__":
diff --git a/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py b/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py
index 66f2c8e0f465f9ce..5d9332be9c5a536a 100755
--- a/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py
+++ b/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py
@@ -22,46 +22,55 @@ TEMPLATE = """
 #include <math.h>
 #include <arm_sve.h>
 
-#define MAX_STRIDE {max_stride}
 #define STRIDE {stride}
 #define PTRUE svptrue_b{prec_short}
 #define SV_LOAD svld1_f{prec_short}
 #define SV_STORE svst1_f{prec_short}
 #define REQUIRE_SVE
 
-#define CALL_BENCH_FUNC(v, i) (__extension__ ({{                              \\
-   {rtype} mx0 = {fname}(SV_LOAD (PTRUE(), variants[v].in[i].arg0), PTRUE()); \\
+#define CALL_BENCH_FUNC_1(v, i) (__extension__ ({{                                       \\
+   {rtype} mx0 = {fname}(SV_LOAD (PTRUE(), &variants[v].in->arg0[i * STRIDE]), PTRUE()); \\
    mx0; }}))
 
-struct args
+#define CALL_BENCH_FUNC_2(v, i) (__extension__ ({{                              \\
+   {rtype} mx0 = {fname}(SV_LOAD (PTRUE(), &variants[v].in->arg0[i * STRIDE]),  \\
+                         SV_LOAD (PTRUE(), &variants[v].in->arg1[i * STRIDE]),  \\
+                         PTRUE());                                              \\
+   mx0; }}))
+
+struct args_1
 {{
-  {stype} arg0[MAX_STRIDE];
-  double timing;
+  {stype} arg0[{nelems}];
+}};
+
+struct args_2
+{{
+  {stype} arg0[{nelems}];
+  {stype} arg1[{nelems}];
 }};
 
 struct _variants
 {{
   const char *name;
-  int count;
-  const struct args *in;
+  const struct args_{arity} *in;
 }};
 
-static const struct args in0[{rowcount}] = {{
+static const struct args_{arity} in0 = {{
 {in_data}
 }};
 
 static const struct _variants variants[1] = {{
-  {{"", {rowcount}, in0}},
+  {{"", &in0}},
 }};
 
 #define NUM_VARIANTS 1
-#define NUM_SAMPLES(i) (variants[i].count)
+#define NUM_SAMPLES(i) ({nelems} / STRIDE)
 #define VARIANT(i) (variants[i].name)
 
 // Cannot pass volatile pointer to svst1. This still does not appear to get optimised out.
-static {stype} /*volatile*/ ret[MAX_STRIDE];
+static {stype} /*volatile*/ ret[{rowlen}];
 
-#define BENCH_FUNC(i, j) ({{ SV_STORE(PTRUE(), ret, CALL_BENCH_FUNC(i, j)); }})
+#define BENCH_FUNC(i, j) ({{ SV_STORE(PTRUE(), ret, CALL_BENCH_FUNC_{arity}(i, j)); }})
 #define FUNCNAME "{fname}"
 #include <bench-libmvec-skeleton.c>
 """
@@ -69,23 +78,29 @@ static {stype} /*volatile*/ ret[MAX_STRIDE];
 def main(name):
     _, prec, _, func = name.split("-")
     scalar_to_sve_type = {"double": "svfloat64_t", "float": "svfloat32_t"}
-
     stride = {"double": "svcntd()", "float": "svcntw()"}[prec]
     rtype = scalar_to_sve_type[prec]
     atype = scalar_to_sve_type[prec]
-    fname = f"_ZGVsMxv_{func}{'f' if prec == 'float' else ''}"
     prec_short = {"double": 64, "float": 32}[prec]
     # Max SVE vector length is 2048 bits. To ensure benchmarks are
     # vector-length-agnostic, but still use as wide vectors as
     # possible on any given target, divide input data into 2048-bit
     # rows, then load/store as many elements as the target will allow.
-    max_stride = 2048 // prec_short
-
-    with open(f"../benchtests/libmvec/{func}-inputs") as f:
-        in_vals = [l.strip() for l in f.readlines() if l and not l.startswith("#")]
-    in_vals = [in_vals[i:i+max_stride] for i in range(0, len(in_vals), max_stride)]
-    rowcount= len(in_vals)
-    in_data = ",\n".join("{{" + ", ".join(row) + "}, 0}" for row in in_vals)
+    rowlen = {"double": 32, "float": 64}[prec]
+    input_filename = {"double": f"{func}-inputs", "float": f"{func}f-inputs"}[prec]
+
+    with open(f"../benchtests/libmvec/{input_filename}") as f:
+        input_file = f.readlines()
+    in_vals = (l.strip() for l in input_file if l and not l.startswith("#"))
+    # Split in case of multivariate signature
+    in_vals = (l.split(", ") for l in in_vals)
+    # Transpose
+    in_vals = list(zip(*in_vals))
+    in_data = ",\n".join("{" + (", ".join(val for val in col) + "}")
+                         for col in in_vals)
+
+    arity = [l for l in input_file if l.startswith("## args: ")][0].count(prec)
+    fname = f"_ZGVsMx{'v' * arity}_{func}{'f' if prec == 'float' else ''}"
 
     print(TEMPLATE.format(stride=stride,
                           rtype=rtype,
@@ -93,9 +108,10 @@ def main(name):
                           fname=fname,
                           prec_short=prec_short,
                           in_data=in_data,
-                          rowcount=rowcount,
                           stype=prec,
-                          max_stride=max_stride))
+                          rowlen=rowlen,
+                          arity=arity,
+                          nelems=len(in_vals[0])))
 
 
 if __name__ == "__main__":
Auto-sync with upstream branch master Upstream commit: 5d7f1bce7d8eea31f4baeb68bcc3124b35acc751 - Apply glibc-benchtests-aarch64.patch to fix an aarch64 build failure. - Drop glibc-rh2244688.patch revert. Fix applied upstream. - Drop glibc-rh2244992.patch, glibc-rh2248915.patch, glibc-rh2248502-3.patch. All applied upstream. - posix: Revert the removal of the crypt prototype from <unistd.h> - elf: Add comments on how LD_AUDIT and LD_PRELOAD handle __libc_enable_secure - elf: Ignore LD_LIBRARY_PATH and debug env var for setuid for static - elf: Remove any_debug from dl_main_state - elf: Remove LD_PROFILE for static binaries - elf: Ignore LD_PROFILE for setuid binaries - s390: Use dl-symbol-redir-ifunc.h on cpu-tunables - x86: Use dl-symbol-redir-ifunc.h on cpu-tunables - elf: Emit warning if tunable is ill-formatted - elf: Fix _dl_debug_vdprintf to work before self-relocation - elf: Do not parse ill-formatted strings - elf: Do not process invalid tunable format - elf: Add all malloc tunable to unsecvars - elf: Ignore GLIBC_TUNABLES for setuid/setgid binaries - elf: Add GLIBC_TUNABLES to unsecvars - elf: Remove /etc/suid-debug support - stdlib: The qsort implementation needs to use heapsort in more cases - stdlib: Handle various corner cases in the fallback heapsort for qsort - stdlib: Avoid another self-comparison in qsort - hurd: fix restarting reauth_dtable on signal - hurd: Prevent the final file_exec_paths call from signals - manual: Fix termios.c example. (Bug 31078) - aarch64: Add vector implementations of expm1 routines - linux: Use fchmodat2 on fchmod for flags different than 0 (BZ 26401) - intl: Add test case for bug 16621 - resolv: free only initialized items from gai pool - ldconfig: Fixes for skipping temporary files. - nptl: Link tst-execstack-threads-mod.so with -z execstack - nptl: Rename tst-execstack to tst-execstack-threads - localedata: Convert oc_FR locale to UTF-8 - localedata: Add information for Occitan - elf: Fix force_first handling in dlclose (bug 30981) - elf: Handle non-directory name in search path (BZ 31035) - New Zealand locales (en_NZ & mi_NZ) first day of week should be Monday - x86: Fix unchecked AVX512-VBMI2 usage in strrchr-evex-base.S - posix: Check pidfd_spawn with tst-spawn7-pid - y2038: Fix support for 64-bit time on legacy ABIs - AArch64: Remove Falkor memcpy - AArch64: Add memset_zva64 - AArch64: Cleanup emag memset - test: Run the tst-tls-allocation-failure-static-patched with test-wrapper. - aarch64: Add vector implementations of log1p routines - aarch64: Add vector implementations of atan2 routines - aarch64: Add vector implementations of atan routines - aarch64: Add vector implementations of acos routines - aarch64: Add vector implementations of asin routines 2023-11-22 07:49:54 +00:00			`Author: Joe Ramsay <Joe.Ramsay@arm.com>`
			`Date: Tue Nov 21 14:39:39 2023 +0000`

			`aarch64: Fix libmvec benchmarks`

			`These were broken by the new atan2 functions, as they were only`
			`set up for univariate functions. Arity is now detected from the`
			`input file - this revealed a mistake that the double-precision`
			`inputs were being used for both single- and double-precision`
			`routines, which is now remedied.`

			`diff --git a/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py b/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py`
			`index 3e124c781065fea9..3661a24044cc9770 100644`
			`--- a/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py`
			`+++ b/sysdeps/aarch64/fpu/scripts/bench_libmvec_advsimd.py`
			`@@ -22,40 +22,49 @@ TEMPLATE = """`
			`#include <math.h>`
			`#include <arm_neon.h>`

			`-#define STRIDE {stride}`
			`+#define STRIDE {rowlen}`

			`-#define CALL_BENCH_FUNC(v, i) (__extension__ ({{ \\`
			`- {rtype} mx0 = {fname}(vld1q_f{prec_short} (variants[v].in[i].arg0)); \\`
			`+#define CALL_BENCH_FUNC_1(v, i) (__extension__ ({{ \\`
			`+ {rtype} mx0 = {fname}(vld1q_f{prec_short} (&variants[v].in->arg0[i * STRIDE])); \\`
			`mx0; }}))`

			`-struct args`
			`+#define CALL_BENCH_FUNC_2(v, i) (__extension__ ({{ \\`
			`+ {rtype} mx0 = {fname}(vld1q_f{prec_short} (&variants[v].in->arg0[i * STRIDE]), \\`
			`+ vld1q_f{prec_short} (&variants[v].in->arg1[i * STRIDE])); \\`
			`+ mx0; }}))`
			`+`
			`+struct args_1`
			`+{{`
			`+ {stype} arg0[{nelems}];`
			`+}};`
			`+`
			`+struct args_2`
			`{{`
			`- {stype} arg0[STRIDE];`
			`- double timing;`
			`+ {stype} arg0[{nelems}];`
			`+ {stype} arg1[{nelems}];`
			`}};`

			`struct _variants`
			`{{`
			`const char *name;`
			`- int count;`
			`- const struct args *in;`
			`+ const struct args_{arity} *in;`
			`}};`

			`-static const struct args in0[{rowcount}] = {{`
			`+static const struct args_{arity} in0 = {{`
			`{in_data}`
			`}};`

			`static const struct _variants variants[1] = {{`
			`- {{"", {rowcount}, in0}},`
			`+ {{"", &in0}},`
			`}};`

			`#define NUM_VARIANTS 1`
			`-#define NUM_SAMPLES(i) (variants[i].count)`
			`+#define NUM_SAMPLES(i) ({nelems} / STRIDE)`
			`#define VARIANT(i) (variants[i].name)`

			`static {rtype} volatile ret;`

			`-#define BENCH_FUNC(i, j) ({{ ret = CALL_BENCH_FUNC(i, j); }})`
			`+#define BENCH_FUNC(i, j) ({{ ret = CALL_BENCH_FUNC_{arity}(i, j); }})`
			`#define FUNCNAME "{fname}"`
			`#include <bench-libmvec-skeleton.c>`
			`"""`
			`@@ -63,27 +72,34 @@ static {rtype} volatile ret;`
			`def main(name):`
			`_, prec, _, func = name.split("-")`
			`scalar_to_advsimd_type = {"double": "float64x2_t", "float": "float32x4_t"}`
			`-`
			`- stride = {"double": 2, "float": 4}[prec]`
			`+ rowlen = {"double": 2, "float": 4}[prec]`
			`rtype = scalar_to_advsimd_type[prec]`
			`atype = scalar_to_advsimd_type[prec]`
			`- fname = f"_ZGVnN{stride}v_{func}{'f' if prec == 'float' else ''}"`
			`prec_short = {"double": 64, "float": 32}[prec]`
			`-`
			`- with open(f"../benchtests/libmvec/{func}-inputs") as f:`
			`- in_vals = [l.strip() for l in f.readlines() if l and not l.startswith("#")]`
			`- in_vals = [in_vals[i:i+stride] for i in range(0, len(in_vals), stride)]`
			`- rowcount= len(in_vals)`
			`- in_data = ",\n".join("{{" + ", ".join(row) + "}, 0}" for row in in_vals)`
			`-`
			`- print(TEMPLATE.format(stride=stride,`
			`+ input_filename = {"double": f"{func}-inputs", "float": f"{func}f-inputs"}[prec]`
			`+`
			`+ with open(f"../benchtests/libmvec/{input_filename}") as f:`
			`+ input_file = f.readlines()`
			`+ in_vals = (l.strip() for l in input_file if l and not l.startswith("#"))`
			`+ # Split in case of multivariate signature`
			`+ in_vals = (l.split(", ") for l in in_vals)`
			`+ # Transpose`
			`+ in_vals = list(zip(*in_vals))`
			`+ in_data = ",\n".join("{" + (", ".join(val for val in col) + "}")`
			`+ for col in in_vals)`
			`+`
			`+ arity = [l for l in input_file if l.startswith("## args: ")][0].count(prec)`
			`+ fname = f"_ZGVnN{rowlen}{'v' * arity}_{func}{'f' if prec == 'float' else ''}"`
			`+`
			`+ print(TEMPLATE.format(rowlen=rowlen,`
			`rtype=rtype,`
			`atype=atype,`
			`fname=fname,`
			`prec_short=prec_short,`
			`in_data=in_data,`
			`- rowcount=rowcount,`
			`- stype=prec))`
			`+ stype=prec,`
			`+ arity=arity,`
			`+ nelems=len(in_vals[0])))`


			`if __name__ == "__main__":`
			`diff --git a/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py b/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py`
			`index 66f2c8e0f465f9ce..5d9332be9c5a536a 100755`
			`--- a/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py`
			`+++ b/sysdeps/aarch64/fpu/scripts/bench_libmvec_sve.py`
			`@@ -22,46 +22,55 @@ TEMPLATE = """`
			`#include <math.h>`
			`#include <arm_sve.h>`

			`-#define MAX_STRIDE {max_stride}`
			`#define STRIDE {stride}`
			`#define PTRUE svptrue_b{prec_short}`
			`#define SV_LOAD svld1_f{prec_short}`
			`#define SV_STORE svst1_f{prec_short}`
			`#define REQUIRE_SVE`

			`-#define CALL_BENCH_FUNC(v, i) (__extension__ ({{ \\`
			`- {rtype} mx0 = {fname}(SV_LOAD (PTRUE(), variants[v].in[i].arg0), PTRUE()); \\`
			`+#define CALL_BENCH_FUNC_1(v, i) (__extension__ ({{ \\`
			`+ {rtype} mx0 = {fname}(SV_LOAD (PTRUE(), &variants[v].in->arg0[i * STRIDE]), PTRUE()); \\`
			`mx0; }}))`

			`-struct args`
			`+#define CALL_BENCH_FUNC_2(v, i) (__extension__ ({{ \\`
			`+ {rtype} mx0 = {fname}(SV_LOAD (PTRUE(), &variants[v].in->arg0[i * STRIDE]), \\`
			`+ SV_LOAD (PTRUE(), &variants[v].in->arg1[i * STRIDE]), \\`
			`+ PTRUE()); \\`
			`+ mx0; }}))`
			`+`
			`+struct args_1`
			`{{`
			`- {stype} arg0[MAX_STRIDE];`
			`- double timing;`
			`+ {stype} arg0[{nelems}];`
			`+}};`
			`+`
			`+struct args_2`
			`+{{`
			`+ {stype} arg0[{nelems}];`
			`+ {stype} arg1[{nelems}];`
			`}};`

			`struct _variants`
			`{{`
			`const char *name;`
			`- int count;`
			`- const struct args *in;`
			`+ const struct args_{arity} *in;`
			`}};`

			`-static const struct args in0[{rowcount}] = {{`
			`+static const struct args_{arity} in0 = {{`
			`{in_data}`
			`}};`

			`static const struct _variants variants[1] = {{`
			`- {{"", {rowcount}, in0}},`
			`+ {{"", &in0}},`
			`}};`

			`#define NUM_VARIANTS 1`
			`-#define NUM_SAMPLES(i) (variants[i].count)`
			`+#define NUM_SAMPLES(i) ({nelems} / STRIDE)`
			`#define VARIANT(i) (variants[i].name)`

			`// Cannot pass volatile pointer to svst1. This still does not appear to get optimised out.`
			`-static {stype} /volatile/ ret[MAX_STRIDE];`
			`+static {stype} /volatile/ ret[{rowlen}];`

			`-#define BENCH_FUNC(i, j) ({{ SV_STORE(PTRUE(), ret, CALL_BENCH_FUNC(i, j)); }})`
			`+#define BENCH_FUNC(i, j) ({{ SV_STORE(PTRUE(), ret, CALL_BENCH_FUNC_{arity}(i, j)); }})`
			`#define FUNCNAME "{fname}"`
			`#include <bench-libmvec-skeleton.c>`
			`"""`
			`@@ -69,23 +78,29 @@ static {stype} /volatile/ ret[MAX_STRIDE];`
			`def main(name):`
			`_, prec, _, func = name.split("-")`
			`scalar_to_sve_type = {"double": "svfloat64_t", "float": "svfloat32_t"}`
			`-`
			`stride = {"double": "svcntd()", "float": "svcntw()"}[prec]`
			`rtype = scalar_to_sve_type[prec]`
			`atype = scalar_to_sve_type[prec]`
			`- fname = f"_ZGVsMxv_{func}{'f' if prec == 'float' else ''}"`
			`prec_short = {"double": 64, "float": 32}[prec]`
			`# Max SVE vector length is 2048 bits. To ensure benchmarks are`
			`# vector-length-agnostic, but still use as wide vectors as`
			`# possible on any given target, divide input data into 2048-bit`
			`# rows, then load/store as many elements as the target will allow.`
			`- max_stride = 2048 // prec_short`
			`-`
			`- with open(f"../benchtests/libmvec/{func}-inputs") as f:`
			`- in_vals = [l.strip() for l in f.readlines() if l and not l.startswith("#")]`
			`- in_vals = [in_vals[i:i+max_stride] for i in range(0, len(in_vals), max_stride)]`
			`- rowcount= len(in_vals)`
			`- in_data = ",\n".join("{{" + ", ".join(row) + "}, 0}" for row in in_vals)`
			`+ rowlen = {"double": 32, "float": 64}[prec]`
			`+ input_filename = {"double": f"{func}-inputs", "float": f"{func}f-inputs"}[prec]`
			`+`
			`+ with open(f"../benchtests/libmvec/{input_filename}") as f:`
			`+ input_file = f.readlines()`
			`+ in_vals = (l.strip() for l in input_file if l and not l.startswith("#"))`
			`+ # Split in case of multivariate signature`
			`+ in_vals = (l.split(", ") for l in in_vals)`
			`+ # Transpose`
			`+ in_vals = list(zip(*in_vals))`
			`+ in_data = ",\n".join("{" + (", ".join(val for val in col) + "}")`
			`+ for col in in_vals)`
			`+`
			`+ arity = [l for l in input_file if l.startswith("## args: ")][0].count(prec)`
			`+ fname = f"_ZGVsMx{'v' * arity}_{func}{'f' if prec == 'float' else ''}"`

			`print(TEMPLATE.format(stride=stride,`
			`rtype=rtype,`
			`@@ -93,9 +108,10 @@ def main(name):`
			`fname=fname,`
			`prec_short=prec_short,`
			`in_data=in_data,`
			`- rowcount=rowcount,`
			`stype=prec,`
			`- max_stride=max_stride))`
			`+ rowlen=rowlen,`
			`+ arity=arity,`
			`+ nelems=len(in_vals[0])))`


			`if __name__ == "__main__":`