717 lines
26 KiB
Diff
717 lines
26 KiB
Diff
From 04cdc29b007594a0e58ffef0c9dd87df3ea595ea Mon Sep 17 00:00:00 2001
|
|
From: Mark Wielaard <mark@klomp.org>
|
|
Date: Wed, 14 Oct 2020 06:11:34 -0400
|
|
Subject: [PATCH] arm64 VEX frontend and backend support for
|
|
Iop_M{Add,Sub}F{32,64}
|
|
|
|
The arm64 frontend used to implement the scalar fmadd, fmsub, fnmadd
|
|
and fnmsub iinstructions into separate addition/substraction and
|
|
multiplication instructions, which caused rounding issues.
|
|
|
|
This patch turns them into Iop_M{Add,Sub}F{32,64} instructions
|
|
(with some arguments negated). And the backend now emits fmadd or fmsub
|
|
instructions.
|
|
|
|
Alexandra Hajkova <ahajkova@redhat.com> added tests and fixed up the
|
|
implementation to make sure rounding (and sign) are correct now.
|
|
|
|
https://bugs.kde.org/show_bug.cgi?id=426014
|
|
---
|
|
VEX/priv/guest_arm64_toIR.c | 58 ++++++++---
|
|
VEX/priv/host_arm64_defs.c | 136 +++++++++++++++++++++++++-
|
|
VEX/priv/host_arm64_defs.h | 30 ++++++
|
|
VEX/priv/host_arm64_isel.c | 39 ++++++++
|
|
none/tests/arm64/Makefile.am | 6 +-
|
|
none/tests/arm64/fmadd_sub.c | 98 +++++++++++++++++++
|
|
none/tests/arm64/fmadd_sub.stderr.exp | 0
|
|
none/tests/arm64/fmadd_sub.stdout.exp | 125 +++++++++++++++++++++++
|
|
none/tests/arm64/fmadd_sub.vgtest | 3 +
|
|
9 files changed, 479 insertions(+), 16 deletions(-)
|
|
create mode 100644 none/tests/arm64/fmadd_sub.c
|
|
create mode 100644 none/tests/arm64/fmadd_sub.stderr.exp
|
|
create mode 100644 none/tests/arm64/fmadd_sub.stdout.exp
|
|
create mode 100644 none/tests/arm64/fmadd_sub.vgtest
|
|
|
|
diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
|
|
index 556b85a6a..d242d43c0 100644
|
|
--- a/VEX/priv/guest_arm64_toIR.c
|
|
+++ b/VEX/priv/guest_arm64_toIR.c
|
|
@@ -286,6 +286,12 @@ static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
|
|
return IRExpr_Triop(op, a1, a2, a3);
|
|
}
|
|
|
|
+static IRExpr* qop ( IROp op, IRExpr* a1, IRExpr* a2,
|
|
+ IRExpr* a3, IRExpr* a4 )
|
|
+{
|
|
+ return IRExpr_Qop(op, a1, a2, a3, a4);
|
|
+}
|
|
+
|
|
static IRExpr* loadLE ( IRType ty, IRExpr* addr )
|
|
{
|
|
return IRExpr_Load(Iend_LE, ty, addr);
|
|
@@ -532,6 +538,22 @@ static IROp mkADDF ( IRType ty ) {
|
|
}
|
|
}
|
|
|
|
+static IROp mkFMADDF ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_F32: return Iop_MAddF32;
|
|
+ case Ity_F64: return Iop_MAddF64;
|
|
+ default: vpanic("mkFMADDF");
|
|
+ }
|
|
+}
|
|
+
|
|
+static IROp mkFMSUBF ( IRType ty ) {
|
|
+ switch (ty) {
|
|
+ case Ity_F32: return Iop_MSubF32;
|
|
+ case Ity_F64: return Iop_MSubF64;
|
|
+ default: vpanic("mkFMSUBF");
|
|
+ }
|
|
+}
|
|
+
|
|
static IROp mkSUBF ( IRType ty ) {
|
|
switch (ty) {
|
|
case Ity_F32: return Iop_SubF32;
|
|
@@ -14368,30 +14390,40 @@ Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
|
|
where Fx=Dx when sz=1, Fx=Sx when sz=0
|
|
|
|
-----SPEC------ ----IMPL----
|
|
- fmadd a + n * m a + n * m
|
|
- fmsub a + (-n) * m a - n * m
|
|
- fnmadd (-a) + (-n) * m -(a + n * m)
|
|
- fnmsub (-a) + n * m -(a - n * m)
|
|
+ fmadd a + n * m fmadd (a, n, m)
|
|
+ fmsub a + (-n) * m fmsub (a, n, m)
|
|
+ fnmadd (-a) + (-n) * m fmadd (-a, -n, m)
|
|
+ fnmsub (-a) + n * m fmadd (-a, n, m)
|
|
+
|
|
+ Note Iop_MAdd/SubF32/64 take arguments in the order: rm, N, M, A
|
|
*/
|
|
Bool isD = (ty & 1) == 1;
|
|
UInt ix = (bitO1 << 1) | bitO0;
|
|
IRType ity = isD ? Ity_F64 : Ity_F32;
|
|
- IROp opADD = mkADDF(ity);
|
|
- IROp opSUB = mkSUBF(ity);
|
|
- IROp opMUL = mkMULF(ity);
|
|
+ IROp opFMADD = mkFMADDF(ity);
|
|
+ IROp opFMSUB = mkFMSUBF(ity);
|
|
IROp opNEG = mkNEGF(ity);
|
|
IRTemp res = newTemp(ity);
|
|
IRExpr* eA = getQRegLO(aa, ity);
|
|
IRExpr* eN = getQRegLO(nn, ity);
|
|
IRExpr* eM = getQRegLO(mm, ity);
|
|
IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
|
|
- IRExpr* eNxM = triop(opMUL, rm, eN, eM);
|
|
switch (ix) {
|
|
- case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
|
|
- case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
|
|
- case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
|
|
- case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
|
|
- default: vassert(0);
|
|
+ case 0: /* FMADD */
|
|
+ assign(res, qop(opFMADD, rm, eN, eM, eA));
|
|
+ break;
|
|
+ case 1: /* FMSUB */
|
|
+ assign(res, qop(opFMSUB, rm, eN, eM, eA));
|
|
+ break;
|
|
+ case 2: /* FNMADD */
|
|
+ assign(res, qop(opFMADD, rm, unop(opNEG, eN), eM,
|
|
+ unop(opNEG,eA)));
|
|
+ break;
|
|
+ case 3: /* FNMSUB */
|
|
+ assign(res, qop(opFMADD, rm, eN, eM, unop(opNEG, eA)));
|
|
+ break;
|
|
+ default:
|
|
+ vassert(0);
|
|
}
|
|
putQReg128(dd, mkV128(0x0000));
|
|
putQRegLO(dd, mkexpr(res));
|
|
diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
|
|
index e4ef56986..13b497f60 100644
|
|
--- a/VEX/priv/host_arm64_defs.c
|
|
+++ b/VEX/priv/host_arm64_defs.c
|
|
@@ -546,6 +546,14 @@ static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
|
|
}
|
|
}
|
|
|
|
+static const HChar* showARM64FpTriOp ( ARM64FpTriOp op ) {
|
|
+ switch (op) {
|
|
+ case ARM64fpt_FMADD: return "fmadd";
|
|
+ case ARM64fpt_FMSUB: return "fmsub";
|
|
+ default: vpanic("showARM64FpTriOp");
|
|
+ }
|
|
+}
|
|
+
|
|
static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
|
|
switch (op) {
|
|
case ARM64fpu_NEG: return "neg ";
|
|
@@ -1154,6 +1162,28 @@ ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
|
|
i->ARM64in.VBinS.argR = argR;
|
|
return i;
|
|
}
|
|
+ARM64Instr* ARM64Instr_VTriD ( ARM64FpTriOp op,
|
|
+ HReg dst, HReg arg1, HReg arg2, HReg arg3 ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VTriD;
|
|
+ i->ARM64in.VTriD.op = op;
|
|
+ i->ARM64in.VTriD.dst = dst;
|
|
+ i->ARM64in.VTriD.arg1 = arg1;
|
|
+ i->ARM64in.VTriD.arg2 = arg2;
|
|
+ i->ARM64in.VTriD.arg3 = arg3;
|
|
+ return i;
|
|
+}
|
|
+ARM64Instr* ARM64Instr_VTriS ( ARM64FpTriOp op,
|
|
+ HReg dst, HReg arg1, HReg arg2, HReg arg3 ) {
|
|
+ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
|
|
+ i->tag = ARM64in_VTriS;
|
|
+ i->ARM64in.VTriS.op = op;
|
|
+ i->ARM64in.VTriS.dst = dst;
|
|
+ i->ARM64in.VTriS.arg1 = arg1;
|
|
+ i->ARM64in.VTriS.arg2 = arg2;
|
|
+ i->ARM64in.VTriS.arg3 = arg3;
|
|
+ return i;
|
|
+}
|
|
ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
|
|
ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
|
|
i->tag = ARM64in_VCmpD;
|
|
@@ -1756,6 +1786,26 @@ void ppARM64Instr ( const ARM64Instr* i ) {
|
|
vex_printf(", ");
|
|
ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
|
|
return;
|
|
+ case ARM64in_VTriD:
|
|
+ vex_printf("f%s ", showARM64FpTriOp(i->ARM64in.VTriD.op));
|
|
+ ppHRegARM64(i->ARM64in.VTriD.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VTriD.arg1);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VTriD.arg2);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64(i->ARM64in.VTriD.arg3);
|
|
+ return;
|
|
+ case ARM64in_VTriS:
|
|
+ vex_printf("f%s ", showARM64FpTriOp(i->ARM64in.VTriS.op));
|
|
+ ppHRegARM64asSreg(i->ARM64in.VTriS.dst);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64asSreg(i->ARM64in.VTriS.arg1);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64asSreg(i->ARM64in.VTriS.arg2);
|
|
+ vex_printf(", ");
|
|
+ ppHRegARM64asSreg(i->ARM64in.VTriS.arg3);
|
|
+ return;
|
|
case ARM64in_VCmpD:
|
|
vex_printf("fcmp ");
|
|
ppHRegARM64(i->ARM64in.VCmpD.argL);
|
|
@@ -2197,6 +2247,18 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
|
|
addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
|
|
addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
|
|
return;
|
|
+ case ARM64in_VTriD:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VTriD.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VTriD.arg1);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VTriD.arg2);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VTriD.arg3);
|
|
+ return;
|
|
+ case ARM64in_VTriS:
|
|
+ addHRegUse(u, HRmWrite, i->ARM64in.VTriS.dst);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VTriS.arg1);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VTriS.arg2);
|
|
+ addHRegUse(u, HRmRead, i->ARM64in.VTriS.arg3);
|
|
+ return;
|
|
case ARM64in_VCmpD:
|
|
addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
|
|
addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
|
|
@@ -2454,6 +2516,18 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
|
|
i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
|
|
i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
|
|
return;
|
|
+ case ARM64in_VTriD:
|
|
+ i->ARM64in.VTriD.dst = lookupHRegRemap(m, i->ARM64in.VTriD.dst);
|
|
+ i->ARM64in.VTriD.arg1 = lookupHRegRemap(m, i->ARM64in.VTriD.arg1);
|
|
+ i->ARM64in.VTriD.arg2 = lookupHRegRemap(m, i->ARM64in.VTriD.arg2);
|
|
+ i->ARM64in.VTriD.arg3 = lookupHRegRemap(m, i->ARM64in.VTriD.arg3);
|
|
+ return;
|
|
+ case ARM64in_VTriS:
|
|
+ i->ARM64in.VTriS.dst = lookupHRegRemap(m, i->ARM64in.VTriS.dst);
|
|
+ i->ARM64in.VTriS.arg1 = lookupHRegRemap(m, i->ARM64in.VTriS.arg1);
|
|
+ i->ARM64in.VTriS.arg2 = lookupHRegRemap(m, i->ARM64in.VTriS.arg2);
|
|
+ i->ARM64in.VTriS.arg3 = lookupHRegRemap(m, i->ARM64in.VTriS.arg3);
|
|
+ return;
|
|
case ARM64in_VCmpD:
|
|
i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
|
|
i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
|
|
@@ -2812,7 +2886,8 @@ static inline UInt qregEnc ( HReg r )
|
|
#define X11110011 BITS8(1,1,1,1,0,0,1,1)
|
|
#define X11110101 BITS8(1,1,1,1,0,1,0,1)
|
|
#define X11110111 BITS8(1,1,1,1,0,1,1,1)
|
|
-
|
|
+#define X11111000 BITS8(1,1,1,1,1,0,0,0)
|
|
+#define X11111010 BITS8(1,1,1,1,1,0,1,0)
|
|
|
|
/* --- 4 fields --- */
|
|
|
|
@@ -2972,6 +3047,27 @@ static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
|
|
}
|
|
|
|
|
|
+static inline UInt X_3_8_5_1_5_5_5 ( UInt f1, UInt f2, UInt f3, UInt f4,
|
|
+ UInt f5, UInt f6, UInt f7 ) {
|
|
+ vassert(3+8+5+1+5+5+5 == 32);
|
|
+ vassert(f1 < (1<<3));
|
|
+ vassert(f2 < (1<<8));
|
|
+ vassert(f3 < (1<<5));
|
|
+ vassert(f4 < (1<<1));
|
|
+ vassert(f5 < (1<<5));
|
|
+ vassert(f6 < (1<<5));
|
|
+ vassert(f7 < (1<<5));
|
|
+ UInt w = 0;
|
|
+ w = (w << 3) | f1;
|
|
+ w = (w << 8) | f2;
|
|
+ w = (w << 5) | f3;
|
|
+ w = (w << 1) | f4;
|
|
+ w = (w << 5) | f5;
|
|
+ w = (w << 5) | f6;
|
|
+ w = (w << 5) | f7;
|
|
+ return w;
|
|
+}
|
|
+
|
|
//ZZ #define X0000 BITS4(0,0,0,0)
|
|
//ZZ #define X0001 BITS4(0,0,0,1)
|
|
//ZZ #define X0010 BITS4(0,0,1,0)
|
|
@@ -4339,6 +4435,44 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
|
|
= X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
|
|
goto done;
|
|
}
|
|
+ case ARM64in_VTriD: {
|
|
+ /* 31 20 15 14 9 4
|
|
+ 000 11111 010 m 0 a n d FMADD Dd,Dn,Dm,Da
|
|
+ ---------------- 1 ------ FMSUB -----------
|
|
+ */
|
|
+ UInt dD = dregEnc(i->ARM64in.VTriD.dst);
|
|
+ UInt dN = dregEnc(i->ARM64in.VTriD.arg1);
|
|
+ UInt dM = dregEnc(i->ARM64in.VTriD.arg2);
|
|
+ UInt dA = dregEnc(i->ARM64in.VTriD.arg3);
|
|
+ UInt b15 = 2; /* impossible */
|
|
+ switch (i->ARM64in.VTriD.op) {
|
|
+ case ARM64fpt_FMADD: b15 = 0; break;
|
|
+ case ARM64fpt_FMSUB: b15 = 1; break;
|
|
+ default: goto bad;
|
|
+ }
|
|
+ vassert(b15 < 2);
|
|
+ *p++ = X_3_8_5_1_5_5_5(X000, X11111010, dM, b15, dA, dN, dD);
|
|
+ goto done;
|
|
+ }
|
|
+ case ARM64in_VTriS: {
|
|
+ /* 31 20 15 14 9 4
|
|
+ 000 11111 000 m 0 a n d FMADD Dd,Dn,Dm,Da
|
|
+ ---------------- 1 ------ FMSUB -----------
|
|
+ */
|
|
+ UInt dD = dregEnc(i->ARM64in.VTriD.dst);
|
|
+ UInt dN = dregEnc(i->ARM64in.VTriD.arg1);
|
|
+ UInt dM = dregEnc(i->ARM64in.VTriD.arg2);
|
|
+ UInt dA = dregEnc(i->ARM64in.VTriD.arg3);
|
|
+ UInt b15 = 2; /* impossible */
|
|
+ switch (i->ARM64in.VTriD.op) {
|
|
+ case ARM64fpt_FMADD: b15 = 0; break;
|
|
+ case ARM64fpt_FMSUB: b15 = 1; break;
|
|
+ default: goto bad;
|
|
+ }
|
|
+ vassert(b15 < 2);
|
|
+ *p++ = X_3_8_5_1_5_5_5(X000, X11111000, dM, b15, dA, dN, dD);
|
|
+ goto done;
|
|
+ }
|
|
case ARM64in_VCmpD: {
|
|
/* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */
|
|
UInt dN = dregEnc(i->ARM64in.VCmpD.argL);
|
|
diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h
|
|
index 05dba7ab8..5a82564ce 100644
|
|
--- a/VEX/priv/host_arm64_defs.h
|
|
+++ b/VEX/priv/host_arm64_defs.h
|
|
@@ -289,6 +289,14 @@ typedef
|
|
}
|
|
ARM64FpBinOp;
|
|
|
|
+typedef
|
|
+ enum {
|
|
+ ARM64fpt_FMADD=105,
|
|
+ ARM64fpt_FMSUB,
|
|
+ ARM64fpt_INVALID
|
|
+ }
|
|
+ ARM64FpTriOp;
|
|
+
|
|
typedef
|
|
enum {
|
|
ARM64fpu_NEG=110,
|
|
@@ -498,6 +506,8 @@ typedef
|
|
ARM64in_VUnaryS,
|
|
ARM64in_VBinD,
|
|
ARM64in_VBinS,
|
|
+ ARM64in_VTriD,
|
|
+ ARM64in_VTriS,
|
|
ARM64in_VCmpD,
|
|
ARM64in_VCmpS,
|
|
ARM64in_VFCSel,
|
|
@@ -799,6 +809,22 @@ typedef
|
|
HReg argL;
|
|
HReg argR;
|
|
} VBinS;
|
|
+ /* 64-bit FP ternary arithmetic */
|
|
+ struct {
|
|
+ ARM64FpTriOp op;
|
|
+ HReg dst;
|
|
+ HReg arg1;
|
|
+ HReg arg2;
|
|
+ HReg arg3;
|
|
+ } VTriD;
|
|
+ /* 32-bit FP ternary arithmetic */
|
|
+ struct {
|
|
+ ARM64FpTriOp op;
|
|
+ HReg dst;
|
|
+ HReg arg1;
|
|
+ HReg arg2;
|
|
+ HReg arg3;
|
|
+ } VTriS;
|
|
/* 64-bit FP compare */
|
|
struct {
|
|
HReg argL;
|
|
@@ -970,6 +996,10 @@ extern ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src );
|
|
extern ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src );
|
|
extern ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op, HReg, HReg, HReg );
|
|
extern ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op, HReg, HReg, HReg );
|
|
+extern ARM64Instr* ARM64Instr_VTriD ( ARM64FpTriOp op, HReg dst,
|
|
+ HReg, HReg, HReg );
|
|
+extern ARM64Instr* ARM64Instr_VTriS ( ARM64FpTriOp op, HReg dst,
|
|
+ HReg, HReg, HReg );
|
|
extern ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR );
|
|
extern ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR );
|
|
extern ARM64Instr* ARM64Instr_VFCSel ( HReg dst, HReg argL, HReg argR,
|
|
diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c
|
|
index 2f19eab81..da1218715 100644
|
|
--- a/VEX/priv/host_arm64_isel.c
|
|
+++ b/VEX/priv/host_arm64_isel.c
|
|
@@ -3255,6 +3255,25 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
|
|
}
|
|
}
|
|
|
|
+ if (e->tag == Iex_Qop) {
|
|
+ IRQop* qop = e->Iex.Qop.details;
|
|
+ ARM64FpTriOp triop = ARM64fpt_INVALID;
|
|
+ switch (qop->op) {
|
|
+ case Iop_MAddF64: triop = ARM64fpt_FMADD; break;
|
|
+ case Iop_MSubF64: triop = ARM64fpt_FMSUB; break;
|
|
+ default: break;
|
|
+ }
|
|
+ if (triop != ARM64fpt_INVALID) {
|
|
+ HReg N = iselDblExpr(env, qop->arg2);
|
|
+ HReg M = iselDblExpr(env, qop->arg3);
|
|
+ HReg A = iselDblExpr(env, qop->arg4);
|
|
+ HReg dst = newVRegD(env);
|
|
+ set_FPCR_rounding_mode(env, qop->arg1);
|
|
+ addInstr(env, ARM64Instr_VTriD(triop, dst, N, M, A));
|
|
+ return dst;
|
|
+ }
|
|
+ }
|
|
+
|
|
if (e->tag == Iex_ITE) {
|
|
/* ITE(ccexpr, iftrue, iffalse) */
|
|
ARM64CondCode cc;
|
|
@@ -3450,6 +3469,26 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
|
|
return dst;
|
|
}
|
|
|
|
+ if (e->tag == Iex_Qop) {
|
|
+ IRQop* qop = e->Iex.Qop.details;
|
|
+ ARM64FpTriOp triop = ARM64fpt_INVALID;
|
|
+ switch (qop->op) {
|
|
+ case Iop_MAddF32: triop = ARM64fpt_FMADD; break;
|
|
+ case Iop_MSubF32: triop = ARM64fpt_FMSUB; break;
|
|
+ default: break;
|
|
+ }
|
|
+
|
|
+ if (triop != ARM64fpt_INVALID) {
|
|
+ HReg N = iselFltExpr(env, qop->arg2);
|
|
+ HReg M = iselFltExpr(env, qop->arg3);
|
|
+ HReg A = iselFltExpr(env, qop->arg4);
|
|
+ HReg dst = newVRegD(env);
|
|
+ set_FPCR_rounding_mode(env, qop->arg1);
|
|
+ addInstr(env, ARM64Instr_VTriS(triop, dst, N, M, A));
|
|
+ return dst;
|
|
+ }
|
|
+ }
|
|
+
|
|
ppIRExpr(e);
|
|
vpanic("iselFltExpr_wrk");
|
|
}
|
|
diff --git a/none/tests/arm64/Makefile.am b/none/tests/arm64/Makefile.am
|
|
index 7b3ebbdca..4ecab36ad 100644
|
|
--- a/none/tests/arm64/Makefile.am
|
|
+++ b/none/tests/arm64/Makefile.am
|
|
@@ -10,14 +10,16 @@ EXTRA_DIST = \
|
|
integer.stdout.exp integer.stderr.exp integer.vgtest \
|
|
memory.stdout.exp memory.stderr.exp memory.vgtest \
|
|
atomics_v81.stdout.exp atomics_v81.stderr.exp atomics_v81.vgtest \
|
|
- simd_v81.stdout.exp simd_v81.stderr.exp simd_v81.vgtest
|
|
+ simd_v81.stdout.exp simd_v81.stderr.exp simd_v81.vgtest \
|
|
+ fmadd_sub.stdout.exp fmadd_sub.stderr.exp fmadd_sub.vgtest
|
|
|
|
check_PROGRAMS = \
|
|
allexec \
|
|
cvtf_imm \
|
|
fp_and_simd \
|
|
integer \
|
|
- memory
|
|
+ memory \
|
|
+ fmadd_sub
|
|
|
|
if BUILD_ARMV8_CRC_TESTS
|
|
check_PROGRAMS += crc32
|
|
diff --git a/none/tests/arm64/fmadd_sub.c b/none/tests/arm64/fmadd_sub.c
|
|
new file mode 100644
|
|
index 000000000..dcab22d1b
|
|
--- /dev/null
|
|
+++ b/none/tests/arm64/fmadd_sub.c
|
|
@@ -0,0 +1,98 @@
|
|
+#include <math.h>
|
|
+#include <stdint.h>
|
|
+#include <stdio.h>
|
|
+#include <stdlib.h>
|
|
+
|
|
+#define COUNT 5
|
|
+
|
|
+static void
|
|
+print_float(const char *ident, float x)
|
|
+{
|
|
+ union
|
|
+ {
|
|
+ float f;
|
|
+ uint32_t i;
|
|
+ } u;
|
|
+
|
|
+ u.f = x;
|
|
+ printf("%s = %08x = %.17g\n", ident, u.i, x);
|
|
+}
|
|
+
|
|
+static void
|
|
+print_double(const char *ident, double x)
|
|
+{
|
|
+ union
|
|
+ {
|
|
+ double f;
|
|
+ uint64_t i;
|
|
+ } u;
|
|
+
|
|
+ u.f = x;
|
|
+ printf("%s = %016lx = %.17g\n", ident, u.i, x);
|
|
+}
|
|
+
|
|
+int
|
|
+main(int argc, char **argv)
|
|
+{
|
|
+ float x[] = { 55, 0.98076171874999996, 0, 1, 0xFFFFFFFF } ;
|
|
+ float y[] = { 0.69314718055994529, 1.015625, 0, 1, 0xFFFFFFFF };
|
|
+ float z[] = { 38.123094930796988, 1, 0, 1, 0xFFFFFFFF };
|
|
+ float dst = -5;
|
|
+
|
|
+ double dx[] = { 55, 0.98076171874999996, 0, 1, 0xFFFFFFFF } ;
|
|
+ double dy[] = { 0.69314718055994529, 1.015625, 0, 1, 0xFFFFFFFF };
|
|
+ double dz[] = { 38.123094930796988, 1, 0, 1, 0xFFFFFFFF };
|
|
+ double ddst= -5;
|
|
+
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < COUNT; i++) {
|
|
+ //32bit variant
|
|
+ asm("fmadd %s0, %s1, %s2, %s3\n;" : "=w"(dst) : "w"(x[i]), "w"(y[i]), "w"(z[i]));
|
|
+ printf("FMADD 32bit: dst = z + x * y\n");
|
|
+ printf("%f = %f + %f * %f\n", dst, z[i], x[i], y[i]);
|
|
+ print_float("dst", dst);
|
|
+
|
|
+ // Floating-point negated fused multiply-add
|
|
+ asm("fnmadd %s0, %s1, %s2, %s3\n;" : "=w"(dst) : "w"(x[i]), "w"(y[i]), "w"(z[i]));
|
|
+ printf("FNMADD 32bit: dst = -z + (-x) * y\n");
|
|
+ printf("%f = -%f + (-%f) * %f\n", dst, z[i], x[i], y[i]);
|
|
+ print_float("dst", dst);
|
|
+
|
|
+ asm("fmsub %s0, %s1, %s2, %s3\n;" : "=w"(dst) : "w"(x[i]), "w"(y[i]), "w"(z[i]));
|
|
+ printf("FMSUB 32bit: dst = z + (-x) * y\n");
|
|
+ printf("%f = %f + (-%f) * %f\n", dst, z[i], x[i], y[i]);
|
|
+ print_float("dst", dst);
|
|
+
|
|
+ asm("fnmsub %s0, %s1, %s2, %s3\n;" : "=w"(dst) : "w"(x[i]), "w"(y[i]), "w"(z[i]));
|
|
+ printf("FNMSUB 32bit: dst = -z + x * y\n");
|
|
+ printf("%f = -%f + %f * %f\n", dst, z[i], x[i], y[i]);
|
|
+ print_float("dst", dst);
|
|
+
|
|
+ //64bit variant
|
|
+ asm("fmadd %d0, %d1, %d2, %d3\n;" : "=w"(ddst) : "w"(dx[i]), "w"(dy[i]), "w"(dz[i]));
|
|
+ printf("FMADD 64bit: dst = z + x * y\n");
|
|
+ printf("%f = %f + %f * %f\n", ddst, dz[i], dx[i], dy[i]);
|
|
+ print_double("dst", ddst);
|
|
+
|
|
+ asm("fnmadd %d0, %d1, %d2, %d3\n;" : "=w"(ddst) : "w"(dx[i]), "w"(dy[i]), "w"(dz[i]));
|
|
+ printf("FNMADD 64bit: dst = -z + (-x) * y\n");
|
|
+ printf("%f = -%f - %f * %f\n", ddst, dz[i], dx[i], dy[i]);
|
|
+ print_double("dst", ddst);
|
|
+
|
|
+ asm("fmsub %d0, %d1, %d2, %d3\n;" : "=w"(ddst) : "w"(dx[i]), "w"(dy[i]), "w"(dz[i]));
|
|
+ printf("FMSUB 64bit: dst = z + (-x) * y\n");
|
|
+ printf("%f = %f + (-%f) * %f\n", ddst, dz[i], dx[i], dy[i]);
|
|
+ print_double("dst", ddst);
|
|
+
|
|
+ asm("fnmsub %d0, %d1, %d2, %d3\n;" : "=w"(ddst) : "w"(dx[i]), "w"(dy[i]), "w"(dz[i]));
|
|
+ printf("FNMSUB 64bit: dst = -z + x * y\n");
|
|
+ printf("%f = -%f + %f * %f\n", ddst, dz[i], dx[i], dy[i]);
|
|
+ print_double("dst", ddst);
|
|
+
|
|
+ printf("\n");
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
diff --git a/none/tests/arm64/fmadd_sub.stderr.exp b/none/tests/arm64/fmadd_sub.stderr.exp
|
|
new file mode 100644
|
|
index 000000000..e69de29bb
|
|
diff --git a/none/tests/arm64/fmadd_sub.stdout.exp b/none/tests/arm64/fmadd_sub.stdout.exp
|
|
new file mode 100644
|
|
index 000000000..f1824b12b
|
|
--- /dev/null
|
|
+++ b/none/tests/arm64/fmadd_sub.stdout.exp
|
|
@@ -0,0 +1,125 @@
|
|
+FMADD 32bit: dst = z + x * y
|
|
+76.246193 = 38.123096 + 55.000000 * 0.693147
|
|
+dst = 42987e0d = 76.246192932128906
|
|
+FNMADD 32bit: dst = -z + (-x) * y
|
|
+-76.246193 = -38.123096 + (-55.000000) * 0.693147
|
|
+dst = c2987e0d = -76.246192932128906
|
|
+FMSUB 32bit: dst = z + (-x) * y
|
|
+0.000001 = 38.123096 + (-55.000000) * 0.693147
|
|
+dst = 35c00000 = 1.430511474609375e-06
|
|
+FNMSUB 32bit: dst = -z + x * y
|
|
+-0.000001 = -38.123096 + 55.000000 * 0.693147
|
|
+dst = b5c00000 = -1.430511474609375e-06
|
|
+FMADD 64bit: dst = z + x * y
|
|
+76.246190 = 38.123095 + 55.000000 * 0.693147
|
|
+dst = 40530fc1931f09c9 = 76.246189861593976
|
|
+FNMADD 64bit: dst = -z + (-x) * y
|
|
+-76.246190 = -38.123095 - 55.000000 * 0.693147
|
|
+dst = c0530fc1931f09c9 = -76.246189861593976
|
|
+FMSUB 64bit: dst = z + (-x) * y
|
|
+-0.000000 = 38.123095 + (-55.000000) * 0.693147
|
|
+dst = bce9000000000000 = -2.7755575615628914e-15
|
|
+FNMSUB 64bit: dst = -z + x * y
|
|
+0.000000 = -38.123095 + 55.000000 * 0.693147
|
|
+dst = 3ce9000000000000 = 2.7755575615628914e-15
|
|
+
|
|
+FMADD 32bit: dst = z + x * y
|
|
+1.996086 = 1.000000 + 0.980762 * 1.015625
|
|
+dst = 3fff7fc0 = 1.9960861206054688
|
|
+FNMADD 32bit: dst = -z + (-x) * y
|
|
+-1.996086 = -1.000000 + (-0.980762) * 1.015625
|
|
+dst = bfff7fc0 = -1.9960861206054688
|
|
+FMSUB 32bit: dst = z + (-x) * y
|
|
+0.003914 = 1.000000 + (-0.980762) * 1.015625
|
|
+dst = 3b80401a = 0.00391389150172472
|
|
+FNMSUB 32bit: dst = -z + x * y
|
|
+-0.003914 = -1.000000 + 0.980762 * 1.015625
|
|
+dst = bb80401a = -0.00391389150172472
|
|
+FMADD 64bit: dst = z + x * y
|
|
+1.996086 = 1.000000 + 0.980762 * 1.015625
|
|
+dst = 3fffeff800000000 = 1.9960861206054688
|
|
+FNMADD 64bit: dst = -z + (-x) * y
|
|
+-1.996086 = -1.000000 - 0.980762 * 1.015625
|
|
+dst = bfffeff800000000 = -1.9960861206054688
|
|
+FMSUB 64bit: dst = z + (-x) * y
|
|
+0.003914 = 1.000000 + (-0.980762) * 1.015625
|
|
+dst = 3f70080000000034 = 0.0039138793945312951
|
|
+FNMSUB 64bit: dst = -z + x * y
|
|
+-0.003914 = -1.000000 + 0.980762 * 1.015625
|
|
+dst = bf70080000000034 = -0.0039138793945312951
|
|
+
|
|
+FMADD 32bit: dst = z + x * y
|
|
+0.000000 = 0.000000 + 0.000000 * 0.000000
|
|
+dst = 00000000 = 0
|
|
+FNMADD 32bit: dst = -z + (-x) * y
|
|
+-0.000000 = -0.000000 + (-0.000000) * 0.000000
|
|
+dst = 80000000 = -0
|
|
+FMSUB 32bit: dst = z + (-x) * y
|
|
+0.000000 = 0.000000 + (-0.000000) * 0.000000
|
|
+dst = 00000000 = 0
|
|
+FNMSUB 32bit: dst = -z + x * y
|
|
+0.000000 = -0.000000 + 0.000000 * 0.000000
|
|
+dst = 00000000 = 0
|
|
+FMADD 64bit: dst = z + x * y
|
|
+0.000000 = 0.000000 + 0.000000 * 0.000000
|
|
+dst = 0000000000000000 = 0
|
|
+FNMADD 64bit: dst = -z + (-x) * y
|
|
+-0.000000 = -0.000000 - 0.000000 * 0.000000
|
|
+dst = 8000000000000000 = -0
|
|
+FMSUB 64bit: dst = z + (-x) * y
|
|
+0.000000 = 0.000000 + (-0.000000) * 0.000000
|
|
+dst = 0000000000000000 = 0
|
|
+FNMSUB 64bit: dst = -z + x * y
|
|
+0.000000 = -0.000000 + 0.000000 * 0.000000
|
|
+dst = 0000000000000000 = 0
|
|
+
|
|
+FMADD 32bit: dst = z + x * y
|
|
+2.000000 = 1.000000 + 1.000000 * 1.000000
|
|
+dst = 40000000 = 2
|
|
+FNMADD 32bit: dst = -z + (-x) * y
|
|
+-2.000000 = -1.000000 + (-1.000000) * 1.000000
|
|
+dst = c0000000 = -2
|
|
+FMSUB 32bit: dst = z + (-x) * y
|
|
+0.000000 = 1.000000 + (-1.000000) * 1.000000
|
|
+dst = 00000000 = 0
|
|
+FNMSUB 32bit: dst = -z + x * y
|
|
+0.000000 = -1.000000 + 1.000000 * 1.000000
|
|
+dst = 00000000 = 0
|
|
+FMADD 64bit: dst = z + x * y
|
|
+2.000000 = 1.000000 + 1.000000 * 1.000000
|
|
+dst = 4000000000000000 = 2
|
|
+FNMADD 64bit: dst = -z + (-x) * y
|
|
+-2.000000 = -1.000000 - 1.000000 * 1.000000
|
|
+dst = c000000000000000 = -2
|
|
+FMSUB 64bit: dst = z + (-x) * y
|
|
+0.000000 = 1.000000 + (-1.000000) * 1.000000
|
|
+dst = 0000000000000000 = 0
|
|
+FNMSUB 64bit: dst = -z + x * y
|
|
+0.000000 = -1.000000 + 1.000000 * 1.000000
|
|
+dst = 0000000000000000 = 0
|
|
+
|
|
+FMADD 32bit: dst = z + x * y
|
|
+18446744073709551616.000000 = 4294967296.000000 + 4294967296.000000 * 4294967296.000000
|
|
+dst = 5f800000 = 1.8446744073709552e+19
|
|
+FNMADD 32bit: dst = -z + (-x) * y
|
|
+-18446744073709551616.000000 = -4294967296.000000 + (-4294967296.000000) * 4294967296.000000
|
|
+dst = df800000 = -1.8446744073709552e+19
|
|
+FMSUB 32bit: dst = z + (-x) * y
|
|
+-18446744073709551616.000000 = 4294967296.000000 + (-4294967296.000000) * 4294967296.000000
|
|
+dst = df800000 = -1.8446744073709552e+19
|
|
+FNMSUB 32bit: dst = -z + x * y
|
|
+18446744073709551616.000000 = -4294967296.000000 + 4294967296.000000 * 4294967296.000000
|
|
+dst = 5f800000 = 1.8446744073709552e+19
|
|
+FMADD 64bit: dst = z + x * y
|
|
+18446744069414584320.000000 = 4294967295.000000 + 4294967295.000000 * 4294967295.000000
|
|
+dst = 43efffffffe00000 = 1.8446744069414584e+19
|
|
+FNMADD 64bit: dst = -z + (-x) * y
|
|
+-18446744069414584320.000000 = -4294967295.000000 - 4294967295.000000 * 4294967295.000000
|
|
+dst = c3efffffffe00000 = -1.8446744069414584e+19
|
|
+FMSUB 64bit: dst = z + (-x) * y
|
|
+-18446744060824649728.000000 = 4294967295.000000 + (-4294967295.000000) * 4294967295.000000
|
|
+dst = c3efffffffa00000 = -1.844674406082465e+19
|
|
+FNMSUB 64bit: dst = -z + x * y
|
|
+18446744060824649728.000000 = -4294967295.000000 + 4294967295.000000 * 4294967295.000000
|
|
+dst = 43efffffffa00000 = 1.844674406082465e+19
|
|
+
|
|
diff --git a/none/tests/arm64/fmadd_sub.vgtest b/none/tests/arm64/fmadd_sub.vgtest
|
|
new file mode 100644
|
|
index 000000000..b4c53eea4
|
|
--- /dev/null
|
|
+++ b/none/tests/arm64/fmadd_sub.vgtest
|
|
@@ -0,0 +1,3 @@
|
|
+prog: fmadd_sub
|
|
+prereq: test -x fmadd_sub
|
|
+vgopts: -q
|
|
--
|
|
2.18.4
|
|
|