valgrind/SOURCES/0019-arm64-Fix-fcvtas-instruction.patch

259 lines
15 KiB
Diff

From 323a62b56b828d59eece2336af9ee6abf473d712 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= <bernhardu@mailbox.org>
Date: Thu, 27 Jun 2024 22:51:09 +0200
Subject: [PATCH 19/21] arm64: Fix fcvtas instruction.
(cherry picked from commit de4c79ffbcd2d5e89495cee8feadf77d5f3a6ef2)
---
NEWS | 1 +
VEX/priv/guest_arm64_toIR.c | 74 ++++++++++++++++++++-----------------
VEX/priv/host_arm64_defs.c | 23 +++++++-----
VEX/priv/host_arm64_defs.h | 3 +-
VEX/priv/host_arm64_isel.c | 12 +++---
5 files changed, 65 insertions(+), 48 deletions(-)
diff --git a/NEWS b/NEWS
index cd89e50829bd..d1419b9d3c12 100644
--- a/NEWS
+++ b/NEWS
@@ -19,6 +19,7 @@ The following bugs have been fixed or resolved on this branch.
487439 SIGILL in JDK11, JDK17
488379 --track-fds=yes errors that cannot be suppressed with --xml-file=
489088 Valgrind throws unhandled instruction bytes: 0xC5 0x79 0xD6 0xE0 0xC5
+489338 arm64: Instruction fcvtas should round 322.5 to 323, but result is 322.
n-i-bz aarch64 frinta and frinta vector instructions
To see details of a given bug, visit
diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
index 27d945d6328d..750383e1eb0f 100644
--- a/VEX/priv/guest_arm64_toIR.c
+++ b/VEX/priv/guest_arm64_toIR.c
@@ -15533,7 +15533,7 @@ Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
} else {
vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
switch (rm) {
- case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
+ case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST_TIE_AWAY_0; break;
default: vassert(0);
}
}
@@ -15557,45 +15557,53 @@ Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
IROp iop = iops[ix];
// A bit of ATCery: bounce all cases we haven't seen an example of.
if (/* F32toI32S */
- (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
- || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
- || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
- || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
+ (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
+ || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
+ || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
+ || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST) /* FCVTNS W,S */
+ || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAS W,S */
/* F32toI32U */
- || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
- || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
- || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
- || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
+ || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
+ || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
+ || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
+ || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST) /* FCVTNU W,S */
+ || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAU W,S */
/* F32toI64S */
- || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
- || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
- || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
- || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
+ || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
+ || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
+ || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
+ || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST) /* FCVTNS X,S */
+ || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAS X,S */
/* F32toI64U */
- || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
- || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
- || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
- || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
+ || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
+ || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
+ || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
+ || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST) /* FCVTNU X,S */
+ || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAU X,S */
/* F64toI32S */
- || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
- || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
- || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
- || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
+ || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
+ || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
+ || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
+ || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST) /* FCVTNS W,D */
+ || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAS W,D */
/* F64toI32U */
- || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
- || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
- || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
- || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
+ || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
+ || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
+ || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
+ || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST) /* FCVTNU W,D */
+ || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAU W,D */
/* F64toI64S */
- || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
- || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
- || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
- || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
+ || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
+ || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
+ || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
+ || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST) /* FCVTNS X,D */
+ || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAS X,D */
/* F64toI64U */
- || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
- || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
- || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
- || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
+ || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
+ || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
+ || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
+ || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST) /* FCVTNU X,D */
+ || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAU X,D */
) {
/* validated */
} else {
diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
index 0b59c87cd3bd..dc5d198e0dc3 100644
--- a/VEX/priv/host_arm64_defs.c
+++ b/VEX/priv/host_arm64_defs.c
@@ -1142,13 +1142,14 @@ ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
return i;
}
ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
- UChar armRM ) {
+ UChar armRM, Bool tiesToAway ) {
ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
i->tag = ARM64in_VCvtF2I;
i->ARM64in.VCvtF2I.how = how;
i->ARM64in.VCvtF2I.rD = rD;
i->ARM64in.VCvtF2I.rS = rS;
i->ARM64in.VCvtF2I.armRM = armRM;
+ i->ARM64in.VCvtF2I.tiesToAway = tiesToAway;
vassert(armRM <= 3);
return i;
}
@@ -4463,47 +4464,51 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
---------------- 01 -------------- FCVTP-------- (round to +inf)
---------------- 10 -------------- FCVTM-------- (round to -inf)
---------------- 11 -------------- FCVTZ-------- (round to zero)
+ ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
+ ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
Rd is Xd when sf==1, Wd when sf==0
Fn is Dn when x==1, Sn when x==0
20:19 carry the rounding mode, using the same encoding as FPCR
+ 18 enable translation to FCVTA{S,U}
*/
UInt rD = iregEnc(i->ARM64in.VCvtF2I.rD);
UInt rN = dregEnc(i->ARM64in.VCvtF2I.rS);
ARM64CvtOp how = i->ARM64in.VCvtF2I.how;
UChar armRM = i->ARM64in.VCvtF2I.armRM;
+ UChar bit18 = i->ARM64in.VCvtF2I.tiesToAway ? 4 : 0;
/* Just handle cases as they show up. */
switch (how) {
case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
- *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3),
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3) | bit18,
X000000, rN, rD);
break;
case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
- *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3),
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3) | bit18,
X000000, rN, rD);
break;
case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
- *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3),
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3) | bit18,
X000000, rN, rD);
break;
case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
- *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3) | bit18,
X000000, rN, rD);
break;
case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
- *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3) | bit18,
X000000, rN, rD);
break;
case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
- *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
+ *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3) | bit18,
X000000, rN, rD);
break;
case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
- *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3) | bit18,
X000000, rN, rD);
break;
case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
- *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
+ *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3) | bit18,
X000000, rN, rD);
break;
default:
diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h
index e78d8045ffaa..f24a2f4ba654 100644
--- a/VEX/priv/host_arm64_defs.h
+++ b/VEX/priv/host_arm64_defs.h
@@ -816,6 +816,7 @@ typedef
HReg rS; // src, a D or S register
UChar armRM; // ARM encoded RM:
// 00=nearest, 01=+inf, 10=-inf, 11=zero
+ Bool tiesToAway;
} VCvtF2I;
/* Convert between 32-bit and 64-bit FP values (both ways). (FCVT) */
struct {
@@ -1062,7 +1063,7 @@ extern ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN,
extern ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN );
extern ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS );
extern ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
- UChar armRM );
+ UChar armRM, Bool tiesToAway );
extern ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src );
extern ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src );
extern ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src );
diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c
index 645358586f34..34c526559010 100644
--- a/VEX/priv/host_arm64_isel.c
+++ b/VEX/priv/host_arm64_isel.c
@@ -1921,17 +1921,19 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
UInt irrm = arg1con->Ico.U32;
/* Find the ARM-encoded equivalent for |irrm|. */
UInt armrm = 4; /* impossible */
+ Bool tiesToAway = False;
switch (irrm) {
- case Irrm_NEAREST: armrm = 0; break;
- case Irrm_NegINF: armrm = 2; break;
- case Irrm_PosINF: armrm = 1; break;
- case Irrm_ZERO: armrm = 3; break;
+ case Irrm_NEAREST: armrm = 0; break;
+ case Irrm_NegINF: armrm = 2; break;
+ case Irrm_PosINF: armrm = 1; break;
+ case Irrm_ZERO: armrm = 3; break;
+ case Irrm_NEAREST_TIE_AWAY_0: armrm = 0; tiesToAway = True; break;
default: goto irreducible;
}
HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
(env, e->Iex.Binop.arg2);
HReg dst = newVRegI(env);
- addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
+ addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm, tiesToAway));
return dst;
}
} /* local scope */
--
2.45.2