From 323a62b56b828d59eece2336af9ee6abf473d712 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Thu, 27 Jun 2024 22:51:09 +0200 Subject: [PATCH 19/21] arm64: Fix fcvtas instruction. (cherry picked from commit de4c79ffbcd2d5e89495cee8feadf77d5f3a6ef2) --- NEWS | 1 + VEX/priv/guest_arm64_toIR.c | 74 ++++++++++++++++++++----------------- VEX/priv/host_arm64_defs.c | 23 +++++++----- VEX/priv/host_arm64_defs.h | 3 +- VEX/priv/host_arm64_isel.c | 12 +++--- 5 files changed, 65 insertions(+), 48 deletions(-) diff --git a/NEWS b/NEWS index cd89e50829bd..d1419b9d3c12 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,7 @@ The following bugs have been fixed or resolved on this branch. 487439 SIGILL in JDK11, JDK17 488379 --track-fds=yes errors that cannot be suppressed with --xml-file= 489088 Valgrind throws unhandled instruction bytes: 0xC5 0x79 0xD6 0xE0 0xC5 +489338 arm64: Instruction fcvtas should round 322.5 to 323, but result is 322. n-i-bz aarch64 frinta and frinta vector instructions To see details of a given bug, visit diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index 27d945d6328d..750383e1eb0f 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -15533,7 +15533,7 @@ Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn) } else { vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1)); switch (rm) { - case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break; + case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST_TIE_AWAY_0; break; default: vassert(0); } } @@ -15557,45 +15557,53 @@ Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn) IROp iop = iops[ix]; // A bit of ATCery: bounce all cases we haven't seen an example of. if (/* F32toI32S */ - (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */ - || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */ - || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */ - || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */ + (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */ + || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */ + || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */ + || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST) /* FCVTNS W,S */ + || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAS W,S */ /* F32toI32U */ - || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */ - || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */ - || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */ - || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */ + || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */ + || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */ + || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */ + || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST) /* FCVTNU W,S */ + || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAU W,S */ /* F32toI64S */ - || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */ - || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */ - || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */ - || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */ + || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */ + || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */ + || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */ + || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST) /* FCVTNS X,S */ + || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAS X,S */ /* F32toI64U */ - || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */ - || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */ - || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */ - || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */ + || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */ + || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */ + || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */ + || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST) /* FCVTNU X,S */ + || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAU X,S */ /* F64toI32S */ - || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */ - || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */ - || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */ - || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */ + || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */ + || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */ + || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */ + || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST) /* FCVTNS W,D */ + || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAS W,D */ /* F64toI32U */ - || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */ - || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */ - || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */ - || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */ + || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */ + || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */ + || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */ + || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST) /* FCVTNU W,D */ + || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAU W,D */ /* F64toI64S */ - || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */ - || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */ - || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */ - || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */ + || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */ + || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */ + || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */ + || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST) /* FCVTNS X,D */ + || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAS X,D */ /* F64toI64U */ - || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */ - || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */ - || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */ - || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */ + || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */ + || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */ + || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */ + || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST) /* FCVTNU X,D */ + || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST_TIE_AWAY_0)/* FCVTAU X,D */ ) { /* validated */ } else { diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 0b59c87cd3bd..dc5d198e0dc3 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -1142,13 +1142,14 @@ ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) { return i; } ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS, - UChar armRM ) { + UChar armRM, Bool tiesToAway ) { ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); i->tag = ARM64in_VCvtF2I; i->ARM64in.VCvtF2I.how = how; i->ARM64in.VCvtF2I.rD = rD; i->ARM64in.VCvtF2I.rS = rS; i->ARM64in.VCvtF2I.armRM = armRM; + i->ARM64in.VCvtF2I.tiesToAway = tiesToAway; vassert(armRM <= 3); return i; } @@ -4463,47 +4464,51 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, ---------------- 01 -------------- FCVTP-------- (round to +inf) ---------------- 10 -------------- FCVTM-------- (round to -inf) ---------------- 11 -------------- FCVTZ-------- (round to zero) + ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away) + ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away) Rd is Xd when sf==1, Wd when sf==0 Fn is Dn when x==1, Sn when x==0 20:19 carry the rounding mode, using the same encoding as FPCR + 18 enable translation to FCVTA{S,U} */ UInt rD = iregEnc(i->ARM64in.VCvtF2I.rD); UInt rN = dregEnc(i->ARM64in.VCvtF2I.rS); ARM64CvtOp how = i->ARM64in.VCvtF2I.how; UChar armRM = i->ARM64in.VCvtF2I.armRM; + UChar bit18 = i->ARM64in.VCvtF2I.tiesToAway ? 4 : 0; /* Just handle cases as they show up. */ switch (how) { case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */ - *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3), + *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3) | bit18, X000000, rN, rD); break; case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */ - *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3), + *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3) | bit18, X000000, rN, rD); break; case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */ - *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3), + *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3) | bit18, X000000, rN, rD); break; case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */ - *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3), + *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3) | bit18, X000000, rN, rD); break; case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */ - *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3), + *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3) | bit18, X000000, rN, rD); break; case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */ - *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3), + *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3) | bit18, X000000, rN, rD); break; case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */ - *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3), + *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3) | bit18, X000000, rN, rD); break; case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */ - *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3), + *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3) | bit18, X000000, rN, rD); break; default: diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index e78d8045ffaa..f24a2f4ba654 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -816,6 +816,7 @@ typedef HReg rS; // src, a D or S register UChar armRM; // ARM encoded RM: // 00=nearest, 01=+inf, 10=-inf, 11=zero + Bool tiesToAway; } VCvtF2I; /* Convert between 32-bit and 64-bit FP values (both ways). (FCVT) */ struct { @@ -1062,7 +1063,7 @@ extern ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, extern ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ); extern ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ); extern ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS, - UChar armRM ); + UChar armRM, Bool tiesToAway ); extern ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ); extern ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ); extern ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ); diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 645358586f34..34c526559010 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -1921,17 +1921,19 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) UInt irrm = arg1con->Ico.U32; /* Find the ARM-encoded equivalent for |irrm|. */ UInt armrm = 4; /* impossible */ + Bool tiesToAway = False; switch (irrm) { - case Irrm_NEAREST: armrm = 0; break; - case Irrm_NegINF: armrm = 2; break; - case Irrm_PosINF: armrm = 1; break; - case Irrm_ZERO: armrm = 3; break; + case Irrm_NEAREST: armrm = 0; break; + case Irrm_NegINF: armrm = 2; break; + case Irrm_PosINF: armrm = 1; break; + case Irrm_ZERO: armrm = 3; break; + case Irrm_NEAREST_TIE_AWAY_0: armrm = 0; tiesToAway = True; break; default: goto irreducible; } HReg src = (srcIsD ? iselDblExpr : iselFltExpr) (env, e->Iex.Binop.arg2); HReg dst = newVRegI(env); - addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm)); + addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm, tiesToAway)); return dst; } } /* local scope */ -- 2.45.2