From f5d1c336e9276dd5947ef94c9831d9d53673b75b Mon Sep 17 00:00:00 2001 From: Paul Floyd Date: Thu, 9 May 2024 21:01:52 +0200 Subject: [PATCH 05/11] aarch64 frinta and frinta vector instructions The initial fix for Bug 484426 only corrected frinta and frintn scalar instructions. This adds support for the vector variants. (cherry picked from commit 7b66a5b58219ac1a4865da8e371edbdb8d765f32) --- NEWS | 1 + VEX/priv/guest_arm64_toIR.c | 47 ++++++---- none/tests/arm64/frinta_frintn.cpp | 141 +++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 18 deletions(-) diff --git a/NEWS b/NEWS index b65f9206679b..adb52169dd87 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,7 @@ The following bugs have been fixed or resolved on this branch. 486180 [MIPS] 'VexGuestArchState' has no member named 'guest_IP_AT_SYSCALL' 486293 memccpy false positives 486569 linux inotify_init syscall wrapper missing POST entry in syscall_table +n-i-bz aarch64 frinta and frinta vector instructions To see details of a given bug, visit https://bugs.kde.org/show_bug.cgi?id=XXXXXX diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index c7e395b4b63d..27d945d6328d 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -13821,46 +13821,57 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */ /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */ /* rm plan: - FRINTN: tieeven -- !! FIXME KLUDGED !! + FRINTN: tieeven FRINTM: -inf FRINTP: +inf FRINTZ: zero - FRINTA: tieaway -- !! FIXME KLUDGED !! + FRINTA: tieaway FRINTX: per FPCR + "exact = TRUE" FRINTI: per FPCR */ Bool isD = (size & 1) == 1; if (bitQ == 0 && isD) return False; // implied 1d case - IRTemp irrmRM = mk_get_IR_rounding_mode(); - - UChar ch = '?'; - IRTemp irrm = newTemp(Ity_I32); + UChar ch = '?'; + IROp op = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt; + Bool isBinop = True; + IRExpr* irrmE = NULL; switch (ix) { - case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break; - case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break; - case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break; - case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break; + case 1: ch = 'n'; isBinop = False; op = isD ? Iop_RoundF64toIntE : Iop_RoundF32toIntE; break; + case 2: ch = 'm'; irrmE = mkU32(Irrm_NegINF); break; + case 3: ch = 'p'; irrmE = mkU32(Irrm_PosINF); break; + case 4: ch = 'z'; irrmE = mkU32(Irrm_ZERO); break; // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0 - case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break; + case 5: ch = 'a'; isBinop = False; op = isD ? Iop_RoundF64toIntA0 : Iop_RoundF32toIntA0; break; // I am unsure about the following, due to the "integral exact" // description in the manual. What does it mean? (frintx, that is) - case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break; - case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break; + case 6: ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break; + case 8: ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break; default: vassert(0); } - IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt; if (isD) { for (UInt i = 0; i < 2; i++) { - putQRegLane(dd, i, binop(opRND, mkexpr(irrm), - getQRegLane(nn, i, Ity_F64))); + if (isBinop) { + IRTemp irrm = newTemp(Ity_I32); + assign(irrm, irrmE); + putQRegLane(dd, i, binop(op, mkexpr(irrm), + getQRegLane(nn, i, Ity_F64))); + } else { + putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F64))); + } } } else { UInt n = bitQ==1 ? 4 : 2; for (UInt i = 0; i < n; i++) { - putQRegLane(dd, i, binop(opRND, mkexpr(irrm), - getQRegLane(nn, i, Ity_F32))); + if (isBinop) { + IRTemp irrm = newTemp(Ity_I32); + assign(irrm, irrmE); + putQRegLane(dd, i, binop(op, mkexpr(irrm), + getQRegLane(nn, i, Ity_F32))); + } else { + putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F32))); + } } if (bitQ == 0) putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3 diff --git a/none/tests/arm64/frinta_frintn.cpp b/none/tests/arm64/frinta_frintn.cpp index 8e13761eb966..c0803688f698 100644 --- a/none/tests/arm64/frinta_frintn.cpp +++ b/none/tests/arm64/frinta_frintn.cpp @@ -36,6 +36,55 @@ void test_frinta(T input, T expected) } } +template +void test_frinta_fullvec(T* input, T* expected) +{ + T result[2*sizeof(double)/sizeof(T)]; + T* rp = result; + if constexpr (std::is_same_v == true) + { + __asm__ __volatile__( + "ldr q23, [%1];\n" + "frinta v22.2d, v23.2d;\n" + "str q22, [%0];\n" + : "+rm" (rp) + : "r" (input) + : "memory", "v22", "v23"); + assert(result[0] == expected[0]); + assert(result[1] == expected[1]); + } + else + { + __asm__ __volatile__( + "ldr q23, [%1];\n" + "frinta v22.4s, v23.4s;\n" + "str q22, [%0];\n" + : "+rm" (rp) + : "r" (input) + : "memory", "v22", "v23"); + assert(result[0] == expected[0]); + assert(result[1] == expected[1]); + assert(result[2] == expected[2]); + assert(result[3] == expected[3]); + } +} + +void test_frinta_halfvec(float* input, float* expected) +{ + float result[2]; + float* rp = result; + __asm__ __volatile__( + "ldr d23, [%1];\n" + "frinta v22.2s, v23.2s;\n" + "str d22, [%0];\n" + : "+rm" (rp) + : "r" (input) + : "memory", "v22", "v23"); + assert(result[0] == expected[0]); + assert(result[1] == expected[1]); +} + + template void test_frintn(T input, T expected) { @@ -66,6 +115,54 @@ void test_frintn(T input, T expected) } } +template +void test_frintn_fullvec(T* input, T* expected) +{ + T result[2*sizeof(double)/sizeof(T)]; + T* rp = result; + if constexpr (std::is_same_v == true) + { + __asm__ __volatile__( + "ldr q23, [%1];\n" + "frintn v22.2d, v23.2d;\n" + "str q22, [%0];\n" + : "+rm" (rp) + : "r" (input) + : "memory", "v22", "v23"); + assert(result[0] == expected[0]); + assert(result[1] == expected[1]); + } + else + { + __asm__ __volatile__( + "ldr q23, [%1];\n" + "frintn v22.4s, v23.4s;\n" + "str q22, [%0];\n" + : "+rm" (rp) + : "r" (input) + : "memory", "v22", "v23"); + assert(result[0] == expected[0]); + assert(result[1] == expected[1]); + assert(result[2] == expected[2]); + assert(result[3] == expected[3]); + } +} + +void test_frintn_halfvec(float* input, float* expected) +{ + float result[2]; + float* rp = result; + __asm__ __volatile__( + "ldr d23, [%1];\n" + "frintn v22.2s, v23.2s;\n" + "str d22, [%0];\n" + : "+rm" (rp) + : "r" (input) + : "memory", "v22", "v23"); + assert(result[0] == expected[0]); + assert(result[1] == expected[1]); +} + int main() { // round "away from zero" @@ -78,6 +175,36 @@ int main() test_frinta(-1.5F, -2.0F); test_frinta(-2.5F, -3.0F); + double in1[] = {1.5, 1.5}; + double out1[] = {2.0, 2,0}; + test_frinta_fullvec(in1, out1); + double in2[] = {2.5, 2.5}; + double out2[] = {3.0, 3,0}; + test_frinta_fullvec(in2, out2); + double in3[] = {-1.5, -1.5}; + double out3[] = {-2.0, -2,0}; + test_frinta_fullvec(in3, out3); + double in4[] = {-2.5, -2.5}; + double out4[] = {-3.0, -3,0}; + test_frinta_fullvec(in4, out4); + + float in1f[] = {1.5F, 1.5F, 1.5F, 1.5F}; + float out1f[] = {2.0F, 2.0F, 2.0F, 2.0F}; + test_frinta_fullvec(in1f, out1f); + test_frinta_halfvec(in1f, out1f); + float in2f[] = {2.5F, 2.5F, 2.5F, 2.5F}; + float out2f[] = {3.0F, 3.0F, 3.0F, 3.0F}; + test_frinta_fullvec(in2f, out2f); + test_frinta_halfvec(in2f, out2f); + float in3f[] = {-1.5F, -1.5F, -1.5F, -1.5F}; + float out3f[] = {-2.0F, -2.0F, -2.0F, -2.0F}; + test_frinta_fullvec(in3f, out3f); + test_frinta_halfvec(in3f, out3f); + float in4f[] = {-2.5F, -2.5F, -2.5F, -2.5F}; + float out4f[] = {-3.0F, -3.0F, -3.0F, -3.0F}; + test_frinta_fullvec(in4f, out4f); + test_frinta_halfvec(in4f, out4f); + // round "to even" test_frintn(1.5, 2.0); test_frintn(2.5, 2.0); @@ -87,5 +214,19 @@ int main() test_frintn(2.5F, 2.0F); test_frintn(-1.5F, -2.0F); test_frintn(-2.5F, -2.0F); + + test_frintn_fullvec(in1, out1); + test_frintn_fullvec(in2, out1); + test_frintn_fullvec(in3, out3); + test_frintn_fullvec(in4, out3); + + test_frintn_fullvec(in1f, out1f); + test_frintn_halfvec(in1f, out1f); + test_frintn_fullvec(in2f, out1f); + test_frintn_halfvec(in2f, out1f); + test_frintn_fullvec(in3f, out3f); + test_frintn_halfvec(in3f, out3f); + test_frintn_fullvec(in4f, out3f); + test_frintn_halfvec(in4f, out3f); } -- 2.45.2