283 lines
9.6 KiB
Diff
283 lines
9.6 KiB
Diff
From f5d1c336e9276dd5947ef94c9831d9d53673b75b Mon Sep 17 00:00:00 2001
|
|
From: Paul Floyd <pjfloyd@wanadoo.fr>
|
|
Date: Thu, 9 May 2024 21:01:52 +0200
|
|
Subject: [PATCH 05/11] aarch64 frinta and frinta vector instructions
|
|
|
|
The initial fix for Bug 484426 only corrected frinta and frintn
|
|
scalar instructions. This adds support for the vector variants.
|
|
|
|
(cherry picked from commit 7b66a5b58219ac1a4865da8e371edbdb8d765f32)
|
|
---
|
|
NEWS | 1 +
|
|
VEX/priv/guest_arm64_toIR.c | 47 ++++++----
|
|
none/tests/arm64/frinta_frintn.cpp | 141 +++++++++++++++++++++++++++++
|
|
3 files changed, 171 insertions(+), 18 deletions(-)
|
|
|
|
diff --git a/NEWS b/NEWS
|
|
index b65f9206679b..adb52169dd87 100644
|
|
--- a/NEWS
|
|
+++ b/NEWS
|
|
@@ -8,6 +8,7 @@ The following bugs have been fixed or resolved on this branch.
|
|
486180 [MIPS] 'VexGuestArchState' has no member named 'guest_IP_AT_SYSCALL'
|
|
486293 memccpy false positives
|
|
486569 linux inotify_init syscall wrapper missing POST entry in syscall_table
|
|
+n-i-bz aarch64 frinta and frinta vector instructions
|
|
|
|
To see details of a given bug, visit
|
|
https://bugs.kde.org/show_bug.cgi?id=XXXXXX
|
|
diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
|
|
index c7e395b4b63d..27d945d6328d 100644
|
|
--- a/VEX/priv/guest_arm64_toIR.c
|
|
+++ b/VEX/priv/guest_arm64_toIR.c
|
|
@@ -13821,46 +13821,57 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
|
|
/* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
|
|
/* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
|
|
/* rm plan:
|
|
- FRINTN: tieeven -- !! FIXME KLUDGED !!
|
|
+ FRINTN: tieeven
|
|
FRINTM: -inf
|
|
FRINTP: +inf
|
|
FRINTZ: zero
|
|
- FRINTA: tieaway -- !! FIXME KLUDGED !!
|
|
+ FRINTA: tieaway
|
|
FRINTX: per FPCR + "exact = TRUE"
|
|
FRINTI: per FPCR
|
|
*/
|
|
Bool isD = (size & 1) == 1;
|
|
if (bitQ == 0 && isD) return False; // implied 1d case
|
|
|
|
- IRTemp irrmRM = mk_get_IR_rounding_mode();
|
|
-
|
|
- UChar ch = '?';
|
|
- IRTemp irrm = newTemp(Ity_I32);
|
|
+ UChar ch = '?';
|
|
+ IROp op = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
|
|
+ Bool isBinop = True;
|
|
+ IRExpr* irrmE = NULL;
|
|
switch (ix) {
|
|
- case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
|
|
- case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
|
|
- case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
|
|
- case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
|
|
+ case 1: ch = 'n'; isBinop = False; op = isD ? Iop_RoundF64toIntE : Iop_RoundF32toIntE; break;
|
|
+ case 2: ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
|
|
+ case 3: ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
|
|
+ case 4: ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
|
|
// The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
|
|
- case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
|
|
+ case 5: ch = 'a'; isBinop = False; op = isD ? Iop_RoundF64toIntA0 : Iop_RoundF32toIntA0; break;
|
|
// I am unsure about the following, due to the "integral exact"
|
|
// description in the manual. What does it mean? (frintx, that is)
|
|
- case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
|
|
- case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
|
|
+ case 6: ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
|
|
+ case 8: ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
|
|
default: vassert(0);
|
|
}
|
|
|
|
- IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
|
|
if (isD) {
|
|
for (UInt i = 0; i < 2; i++) {
|
|
- putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
|
|
- getQRegLane(nn, i, Ity_F64)));
|
|
+ if (isBinop) {
|
|
+ IRTemp irrm = newTemp(Ity_I32);
|
|
+ assign(irrm, irrmE);
|
|
+ putQRegLane(dd, i, binop(op, mkexpr(irrm),
|
|
+ getQRegLane(nn, i, Ity_F64)));
|
|
+ } else {
|
|
+ putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F64)));
|
|
+ }
|
|
}
|
|
} else {
|
|
UInt n = bitQ==1 ? 4 : 2;
|
|
for (UInt i = 0; i < n; i++) {
|
|
- putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
|
|
- getQRegLane(nn, i, Ity_F32)));
|
|
+ if (isBinop) {
|
|
+ IRTemp irrm = newTemp(Ity_I32);
|
|
+ assign(irrm, irrmE);
|
|
+ putQRegLane(dd, i, binop(op, mkexpr(irrm),
|
|
+ getQRegLane(nn, i, Ity_F32)));
|
|
+ } else {
|
|
+ putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F32)));
|
|
+ }
|
|
}
|
|
if (bitQ == 0)
|
|
putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
|
|
diff --git a/none/tests/arm64/frinta_frintn.cpp b/none/tests/arm64/frinta_frintn.cpp
|
|
index 8e13761eb966..c0803688f698 100644
|
|
--- a/none/tests/arm64/frinta_frintn.cpp
|
|
+++ b/none/tests/arm64/frinta_frintn.cpp
|
|
@@ -36,6 +36,55 @@ void test_frinta(T input, T expected)
|
|
}
|
|
}
|
|
|
|
+template<typename T>
|
|
+void test_frinta_fullvec(T* input, T* expected)
|
|
+{
|
|
+ T result[2*sizeof(double)/sizeof(T)];
|
|
+ T* rp = result;
|
|
+ if constexpr (std::is_same_v<double, T> == true)
|
|
+ {
|
|
+ __asm__ __volatile__(
|
|
+ "ldr q23, [%1];\n"
|
|
+ "frinta v22.2d, v23.2d;\n"
|
|
+ "str q22, [%0];\n"
|
|
+ : "+rm" (rp)
|
|
+ : "r" (input)
|
|
+ : "memory", "v22", "v23");
|
|
+ assert(result[0] == expected[0]);
|
|
+ assert(result[1] == expected[1]);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ __asm__ __volatile__(
|
|
+ "ldr q23, [%1];\n"
|
|
+ "frinta v22.4s, v23.4s;\n"
|
|
+ "str q22, [%0];\n"
|
|
+ : "+rm" (rp)
|
|
+ : "r" (input)
|
|
+ : "memory", "v22", "v23");
|
|
+ assert(result[0] == expected[0]);
|
|
+ assert(result[1] == expected[1]);
|
|
+ assert(result[2] == expected[2]);
|
|
+ assert(result[3] == expected[3]);
|
|
+ }
|
|
+}
|
|
+
|
|
+void test_frinta_halfvec(float* input, float* expected)
|
|
+{
|
|
+ float result[2];
|
|
+ float* rp = result;
|
|
+ __asm__ __volatile__(
|
|
+ "ldr d23, [%1];\n"
|
|
+ "frinta v22.2s, v23.2s;\n"
|
|
+ "str d22, [%0];\n"
|
|
+ : "+rm" (rp)
|
|
+ : "r" (input)
|
|
+ : "memory", "v22", "v23");
|
|
+ assert(result[0] == expected[0]);
|
|
+ assert(result[1] == expected[1]);
|
|
+}
|
|
+
|
|
+
|
|
template<typename T>
|
|
void test_frintn(T input, T expected)
|
|
{
|
|
@@ -66,6 +115,54 @@ void test_frintn(T input, T expected)
|
|
}
|
|
}
|
|
|
|
+template<typename T>
|
|
+void test_frintn_fullvec(T* input, T* expected)
|
|
+{
|
|
+ T result[2*sizeof(double)/sizeof(T)];
|
|
+ T* rp = result;
|
|
+ if constexpr (std::is_same_v<double, T> == true)
|
|
+ {
|
|
+ __asm__ __volatile__(
|
|
+ "ldr q23, [%1];\n"
|
|
+ "frintn v22.2d, v23.2d;\n"
|
|
+ "str q22, [%0];\n"
|
|
+ : "+rm" (rp)
|
|
+ : "r" (input)
|
|
+ : "memory", "v22", "v23");
|
|
+ assert(result[0] == expected[0]);
|
|
+ assert(result[1] == expected[1]);
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ __asm__ __volatile__(
|
|
+ "ldr q23, [%1];\n"
|
|
+ "frintn v22.4s, v23.4s;\n"
|
|
+ "str q22, [%0];\n"
|
|
+ : "+rm" (rp)
|
|
+ : "r" (input)
|
|
+ : "memory", "v22", "v23");
|
|
+ assert(result[0] == expected[0]);
|
|
+ assert(result[1] == expected[1]);
|
|
+ assert(result[2] == expected[2]);
|
|
+ assert(result[3] == expected[3]);
|
|
+ }
|
|
+}
|
|
+
|
|
+void test_frintn_halfvec(float* input, float* expected)
|
|
+{
|
|
+ float result[2];
|
|
+ float* rp = result;
|
|
+ __asm__ __volatile__(
|
|
+ "ldr d23, [%1];\n"
|
|
+ "frintn v22.2s, v23.2s;\n"
|
|
+ "str d22, [%0];\n"
|
|
+ : "+rm" (rp)
|
|
+ : "r" (input)
|
|
+ : "memory", "v22", "v23");
|
|
+ assert(result[0] == expected[0]);
|
|
+ assert(result[1] == expected[1]);
|
|
+}
|
|
+
|
|
int main()
|
|
{
|
|
// round "away from zero"
|
|
@@ -78,6 +175,36 @@ int main()
|
|
test_frinta(-1.5F, -2.0F);
|
|
test_frinta(-2.5F, -3.0F);
|
|
|
|
+ double in1[] = {1.5, 1.5};
|
|
+ double out1[] = {2.0, 2,0};
|
|
+ test_frinta_fullvec(in1, out1);
|
|
+ double in2[] = {2.5, 2.5};
|
|
+ double out2[] = {3.0, 3,0};
|
|
+ test_frinta_fullvec(in2, out2);
|
|
+ double in3[] = {-1.5, -1.5};
|
|
+ double out3[] = {-2.0, -2,0};
|
|
+ test_frinta_fullvec(in3, out3);
|
|
+ double in4[] = {-2.5, -2.5};
|
|
+ double out4[] = {-3.0, -3,0};
|
|
+ test_frinta_fullvec(in4, out4);
|
|
+
|
|
+ float in1f[] = {1.5F, 1.5F, 1.5F, 1.5F};
|
|
+ float out1f[] = {2.0F, 2.0F, 2.0F, 2.0F};
|
|
+ test_frinta_fullvec(in1f, out1f);
|
|
+ test_frinta_halfvec(in1f, out1f);
|
|
+ float in2f[] = {2.5F, 2.5F, 2.5F, 2.5F};
|
|
+ float out2f[] = {3.0F, 3.0F, 3.0F, 3.0F};
|
|
+ test_frinta_fullvec(in2f, out2f);
|
|
+ test_frinta_halfvec(in2f, out2f);
|
|
+ float in3f[] = {-1.5F, -1.5F, -1.5F, -1.5F};
|
|
+ float out3f[] = {-2.0F, -2.0F, -2.0F, -2.0F};
|
|
+ test_frinta_fullvec(in3f, out3f);
|
|
+ test_frinta_halfvec(in3f, out3f);
|
|
+ float in4f[] = {-2.5F, -2.5F, -2.5F, -2.5F};
|
|
+ float out4f[] = {-3.0F, -3.0F, -3.0F, -3.0F};
|
|
+ test_frinta_fullvec(in4f, out4f);
|
|
+ test_frinta_halfvec(in4f, out4f);
|
|
+
|
|
// round "to even"
|
|
test_frintn(1.5, 2.0);
|
|
test_frintn(2.5, 2.0);
|
|
@@ -87,5 +214,19 @@ int main()
|
|
test_frintn(2.5F, 2.0F);
|
|
test_frintn(-1.5F, -2.0F);
|
|
test_frintn(-2.5F, -2.0F);
|
|
+
|
|
+ test_frintn_fullvec(in1, out1);
|
|
+ test_frintn_fullvec(in2, out1);
|
|
+ test_frintn_fullvec(in3, out3);
|
|
+ test_frintn_fullvec(in4, out3);
|
|
+
|
|
+ test_frintn_fullvec(in1f, out1f);
|
|
+ test_frintn_halfvec(in1f, out1f);
|
|
+ test_frintn_fullvec(in2f, out1f);
|
|
+ test_frintn_halfvec(in2f, out1f);
|
|
+ test_frintn_fullvec(in3f, out3f);
|
|
+ test_frintn_halfvec(in3f, out3f);
|
|
+ test_frintn_fullvec(in4f, out3f);
|
|
+ test_frintn_halfvec(in4f, out3f);
|
|
}
|
|
|
|
--
|
|
2.45.2
|
|
|