From d573af02098d17f6b16584a6b20ed4aedd190a33 Mon Sep 17 00:00:00 2001 From: Vadim Barkov Date: Fri, 5 Oct 2018 13:51:49 +0300 Subject: [PATCH 2/2] Vector floating point implementation (code) --- VEX/priv/guest_s390_defs.h | 10 +- VEX/priv/guest_s390_helpers.c | 47 ++ VEX/priv/guest_s390_toIR.c | 797 ++++++++++++++++++++++++++++++++-- VEX/priv/host_s390_defs.c | 225 +++++++++- VEX/priv/host_s390_defs.h | 16 +- VEX/priv/host_s390_isel.c | 112 ++++- 6 files changed, 1168 insertions(+), 39 deletions(-) diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h index 3bfecbe31..d72cc9f6d 100644 --- a/VEX/priv/guest_s390_defs.h +++ b/VEX/priv/guest_s390_defs.h @@ -281,7 +281,11 @@ enum { S390_VEC_OP_VMALH = 13, S390_VEC_OP_VCH = 14, S390_VEC_OP_VCHL = 15, - S390_VEC_OP_LAST = 16 // supposed to be the last element in enum + S390_VEC_OP_VFCE = 16, + S390_VEC_OP_VFCH = 17, + S390_VEC_OP_VFCHE = 18, + S390_VEC_OP_VFTCI = 19, + S390_VEC_OP_LAST = 20 // supposed to be the last element in enum } s390x_vec_op_t; /* Arguments of s390x_dirtyhelper_vec_op(...) which are packed into one @@ -300,8 +304,10 @@ typedef union { unsigned int m4 : 4; // field m4 of insn or zero if it's missing unsigned int m5 : 4; // field m5 of insn or zero if it's missing + unsigned int m6 : 4; // field m6 of insn or zero if it's missing + unsigned int i3 : 12; // field i3 of insn or zero if it's missing unsigned int read_only: 1; // don't write result to Guest State - unsigned int reserved : 27; // reserved for future + unsigned int reserved : 11; // reserved for future }; ULong serialized; } s390x_vec_op_details_t; diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c index d9773e73e..5e5565682 100644 --- a/VEX/priv/guest_s390_helpers.c +++ b/VEX/priv/guest_s390_helpers.c @@ -2498,6 +2498,10 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, {0xe7, 0xa9}, /* VMALH */ {0xe7, 0xfb}, /* VCH */ {0xe7, 0xf9}, /* VCHL */ + {0xe7, 0xe8}, /* VFCE */ + {0xe7, 0xeb}, /* VFCH */ + {0xe7, 0xea}, /* VFCHE */ + {0xe7, 0x4a} /* VFTCI */ }; union { @@ -2525,6 +2529,28 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, unsigned int rxb : 4; unsigned int op2 : 8; } VRRd; + struct { + unsigned int op1 : 8; + unsigned int v1 : 4; + unsigned int v2 : 4; + unsigned int v3 : 4; + unsigned int : 4; + unsigned int m6 : 4; + unsigned int m5 : 4; + unsigned int m4 : 4; + unsigned int rxb : 4; + unsigned int op2 : 8; + } VRRc; + struct { + unsigned int op1 : 8; + unsigned int v1 : 4; + unsigned int v2 : 4; + unsigned int i3 : 12; + unsigned int m5 : 4; + unsigned int m4 : 4; + unsigned int rxb : 4; + unsigned int op2 : 8; + } VRIe; UChar bytes[6]; } the_insn; @@ -2578,6 +2604,27 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, the_insn.VRRd.m6 = d->m5; break; + case S390_VEC_OP_VFCE: + case S390_VEC_OP_VFCH: + case S390_VEC_OP_VFCHE: + the_insn.VRRc.v1 = 1; + the_insn.VRRc.v2 = 2; + the_insn.VRRc.v3 = 3; + the_insn.VRRc.rxb = 0b1110; + the_insn.VRRc.m4 = d->m4; + the_insn.VRRc.m5 = d->m5; + the_insn.VRRc.m6 = d->m6; + break; + + case S390_VEC_OP_VFTCI: + the_insn.VRIe.v1 = 1; + the_insn.VRIe.v2 = 2; + the_insn.VRIe.rxb = 0b1100; + the_insn.VRIe.i3 = d->i3; + the_insn.VRIe.m4 = d->m4; + the_insn.VRIe.m5 = d->m5; + break; + default: vex_printf("operation = %d\n", d->op); vpanic("s390x_dirtyhelper_vec_op: unknown operation"); diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index c594ad51b..fd80cd747 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -1770,6 +1770,11 @@ s390_vr_get_type(const UChar m) /* Determine if Zero Search (ZS) flag is set in m field */ #define s390_vr_is_zs_set(m) (((m) & 0b0010) != 0) +/* Check if "Single-Element-Control" bit is set. + Used in vector FP instructions. + */ +#define s390_vr_is_single_element_control_set(m) (((m) & 0x8) != 0) + /* Generates arg1 < arg2 (or arg1 <= arg2 if allow_equal == True) expression. Arguments must have V128 type and are treated as unsigned 128-bit numbers. */ @@ -2001,12 +2006,14 @@ s390_vr_offset_by_index(UInt archreg,IRType type, UChar index) return vr_offset(archreg) + sizeof(UShort) * index; case Ity_I32: + case Ity_F32: if(index > 3) { goto invalidIndex; } return vr_offset(archreg) + sizeof(UInt) * index; case Ity_I64: + case Ity_F64: if(index > 1) { goto invalidIndex; } @@ -2237,8 +2244,8 @@ encode_bfp_rounding_mode(UChar mode) case S390_BFP_ROUND_PER_FPC: rm = get_bfp_rounding_mode_from_fpc(); break; - case S390_BFP_ROUND_NEAREST_AWAY: /* not supported */ - case S390_BFP_ROUND_PREPARE_SHORT: /* not supported */ + case S390_BFP_ROUND_NEAREST_AWAY: rm = mkU32(Irrm_NEAREST_TIE_AWAY_0); break; + case S390_BFP_ROUND_PREPARE_SHORT: rm = mkU32(Irrm_PREPARE_SHORTER); break; case S390_BFP_ROUND_NEAREST_EVEN: rm = mkU32(Irrm_NEAREST); break; case S390_BFP_ROUND_ZERO: rm = mkU32(Irrm_ZERO); break; case S390_BFP_ROUND_POSINF: rm = mkU32(Irrm_PosINF); break; @@ -3524,6 +3531,24 @@ s390_format_VRI_VVIM(const HChar *(*irgen)(UChar v1, UChar v3, UShort i2, UChar s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), mnm, v1, v3, i2, m4); } +static void +s390_format_VRI_VVIMM(const HChar *(*irgen)(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5), + UChar v1, UChar v2, UShort i3, UChar m4, UChar m5, UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + mnm = irgen(v1, v2, i3, m4, m5); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) + s390_disasm(ENC6(MNM, VR, VR, UINT, UINT, UINT), mnm, v1, v2, i3, m4, m5); +} static void s390_format_VRS_RRDVM(const HChar *(*irgen)(UChar r1, IRTemp op2addr, UChar v3, @@ -3680,7 +3705,7 @@ s390_format_VRV_VVRDMT(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar m3) static void -s390_format_VRRd_VVVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, +s390_format_VRR_VVVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6), UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6, UChar rxb) @@ -3794,6 +3819,84 @@ s390_format_VRRd_VVVVM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, } +static void +s390_format_VRRa_VVMMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5), + UChar v1, UChar v2, UChar m3, UChar m4, UChar m5, UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + mnm = irgen(v1, v2, m3, m4, m5); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) + s390_disasm(ENC6(MNM, VR, VR, UINT, UINT, UINT), mnm, v1, v2, m3, m4, m5); +} + +static void +s390_format_VRRa_VVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5), + UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + v3 = s390_vr_getVRindex(v3, 3, rxb); + mnm = irgen(v1, v2, v3, m4, m5); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), mnm, v1, v2, v3, m4, m5); +} + +static void +s390_format_VRRa_VVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar m3, UChar m4), + UChar v1, UChar v2, UChar m3, UChar m4, UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + mnm = irgen(v1, v2, m3, m4); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) + s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), mnm, v1, v2, m3, m4); +} + +static void +s390_format_VRRa_VVVMMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6), + UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6, UChar rxb) +{ + const HChar *mnm; + + if (!s390_host_has_vx) { + emulation_failure(EmFail_S390X_vx); + return; + } + + v1 = s390_vr_getVRindex(v1, 1, rxb); + v2 = s390_vr_getVRindex(v2, 2, rxb); + v3 = s390_vr_getVRindex(v3, 3, rxb); + mnm = irgen(v1, v2, v3, m4, m5, m6); + + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), mnm, v1, v2, v3, m4, m5, m6); +} + /*------------------------------------------------------------*/ /*--- Build IR for opcodes ---*/ /*------------------------------------------------------------*/ @@ -17900,6 +18003,548 @@ s390_irgen_VMALH(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) return "vmalh"; } +static void +s390_vector_fp_from_or_to_operation(IROp op, IRType fromType, IRType toType, UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + const Bool isSingleElementOperation = s390_vr_is_single_element_control_set(m4); + UChar maxIndex = (UNLIKELY(isSingleElementOperation)) ? 0 : 1; + + /* for Iop_F32toF64 we do this: + f32[0] -> f64[0] + f32[2] -> f64[1] + + for Iop_F64toF32 we do this: + f64[0] -> f32[0] + f64[1] -> f32[2] + + The magic below with scaling factors is used to achive the logic described above. + */ + const UChar sourceIndexScaleFactor = (op == Iop_F32toF64) ? 2 : 1; + const UChar destinationIndexScaleFactor = (op == Iop_F64toF32) ? 2 : 1; + + const Bool isUnary = (op == Iop_F32toF64); + for (UChar i = 0; i <= maxIndex; i++) { + IRExpr* argument = get_vr(v2, fromType, i * sourceIndexScaleFactor); + IRExpr* result; + if (LIKELY(!isUnary)) { + result = binop(op, + mkexpr(encode_bfp_rounding_mode(m5)), + argument); + } else { + result = unop(op, argument); + } + put_vr(v1, toType, i * destinationIndexScaleFactor, result); + } + + if (UNLIKELY(isSingleElementOperation)) { + put_vr_dw1(v1, mkU64(0)); + } +} + +static const HChar * +s390_irgen_VCDG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + vassert(m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_from_or_to_operation(Iop_I64StoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5); + + return "vcdg"; +} + +static const HChar * +s390_irgen_VCDLG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + vassert(m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_from_or_to_operation(Iop_I64UtoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5); + + return "vcdlg"; +} + +static const HChar * +s390_irgen_VCGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + vassert(m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_from_or_to_operation(Iop_F64toI64S, Ity_F64, Ity_I64, v1, v2, m3, m4, m5); + + return "vcgd"; +} + +static const HChar * +s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + vassert(m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_from_or_to_operation(Iop_F64toI64U, Ity_F64, Ity_I64, v1, v2, m3, m4, m5); + + return "vclgd"; +} + +static const HChar * +s390_irgen_VFI(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + vassert(m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + emulation_warning(EmWarn_S390X_fpext_rounding); + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_from_or_to_operation(Iop_RoundF64toInt, Ity_F64, Ity_F64, v1, v2, m3, m4, m5); + + return "vcgld"; +} + +static const HChar * +s390_irgen_VLDE(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + vassert(m3 == 2); + + s390_vector_fp_from_or_to_operation(Iop_F32toF64, Ity_F32, Ity_F64, v1, v2, m3, m4, m5); + + return "vlde"; +} + +static const HChar * +s390_irgen_VLED(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + vassert(m3 == 3); + + if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { + m5 = S390_BFP_ROUND_PER_FPC; + } + + s390_vector_fp_from_or_to_operation(Iop_F64toF32, Ity_F64, Ity_F32, v1, v2, m3, m4, m5); + + return "vlde"; +} + +static const HChar * +s390_irgen_VFPSO(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + vassert(m3 == 3); + + IRExpr* result; + switch (m5) { + case 0: { + /* Invert sign */ + if (LIKELY(!s390_vr_is_single_element_control_set(m4))) { + result = unop(Iop_Neg64Fx2, get_vr_qw(v2)); + } + else { + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, unop(Iop_NegF64, get_vr(v2, Ity_F64, 0))), mkU64(0)); + } + break; + } + + case 1: { + /* Set sign to negative */ + IRExpr* highHalf = mkU64(0x8000000000000000ULL); + if (LIKELY(!s390_vr_is_single_element_control_set(m4))) { + IRExpr* lowHalf = highHalf; + IRExpr* mask = binop(Iop_64HLtoV128, highHalf, lowHalf); + result = binop(Iop_OrV128, get_vr_qw(v2), mask); + } + else { + result = binop(Iop_64HLtoV128, binop(Iop_Or64, get_vr_dw0(v2), highHalf), mkU64(0ULL)); + } + + break; + } + + case 2: { + /* Set sign to positive */ + if (LIKELY(!s390_vr_is_single_element_control_set(m4))) { + result = unop(Iop_Abs64Fx2, get_vr_qw(v2)); + } + else { + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, unop(Iop_AbsF64, get_vr(v2, Ity_F64, 0))), mkU64(0)); + } + + break; + } + + default: + vpanic("s390_irgen_VFPSO: Invalid m5 value"); + } + + put_vr_qw(v1, result); + if(UNLIKELY(s390_vr_is_single_element_control_set(m4))) { + put_vr_dw1(v1, mkU64(0ULL)); + } + + return "vfpso"; +} + +static void s390x_vec_fp_binary_op(IROp generalOp, IROp singleElementOp, + UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + IRExpr* result; + if (LIKELY(!s390_vr_is_single_element_control_set(m5))) { + result = triop(generalOp, get_bfp_rounding_mode_from_fpc(), get_vr_qw(v2), get_vr_qw(v3)); + } else { + IRExpr* highHalf = triop(singleElementOp, get_bfp_rounding_mode_from_fpc(), get_vr(v2, Ity_F64, 0), get_vr(v3, Ity_F64, 0)); + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), mkU64(0ULL)); + } + + put_vr_qw(v1, result); +} + +static void s390x_vec_fp_unary_op(IROp generalOp, IROp singleElementOp, + UChar v1, UChar v2, UChar m3, UChar m4) +{ + IRExpr* result; + if (LIKELY(!s390_vr_is_single_element_control_set(m4))) { + result = binop(generalOp, get_bfp_rounding_mode_from_fpc(), get_vr_qw(v2)); + } + else { + IRExpr* highHalf = binop(singleElementOp, get_bfp_rounding_mode_from_fpc(), get_vr(v2, Ity_F64, 0)); + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), mkU64(0ULL)); + } + + put_vr_qw(v1, result); +} + + +static void +s390_vector_fp_mulAddOrSub_operation(IROp addOrSub, IROp singleElementOp, + UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + const Bool isSingleElementOperation = s390_vr_is_single_element_control_set(m5); + + IRTemp irrm_temp = newTemp(Ity_I32); + assign(irrm_temp, get_bfp_rounding_mode_from_fpc()); + IRExpr* irrm = mkexpr(irrm_temp); + + IRExpr* result; + if (LIKELY(!isSingleElementOperation)) { + IRExpr* mulResult = triop(Iop_Mul64Fx2, + irrm, + get_vr_qw(v2), + get_vr_qw(v3)); + result = triop(addOrSub, + irrm, + mulResult, + get_vr_qw(v4)); + + } else { + IRExpr* highHalf = qop(singleElementOp, + irrm, + get_vr(v2, Ity_F64, 0), + get_vr(v3, Ity_F64, 0), + get_vr(v4, Ity_F64, 0)); + result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), mkU64(0ULL)); + } + + put_vr_qw(v1, result); +} + +static const HChar * +s390_irgen_VFA(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + vassert(m4 == 3); + s390x_vec_fp_binary_op(Iop_Add64Fx2, Iop_AddF64, v1, v2, v3, m4, m5); + return "vfa"; +} + +static const HChar * +s390_irgen_VFS(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + vassert(m4 == 3); + s390x_vec_fp_binary_op(Iop_Sub64Fx2, Iop_SubF64, v1, v2, v3, m4, m5); + return "vfs"; +} + +static const HChar * +s390_irgen_VFM(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + vassert(m4 == 3); + s390x_vec_fp_binary_op(Iop_Mul64Fx2, Iop_MulF64, v1, v2, v3, m4, m5); + return "vfm"; +} + +static const HChar * +s390_irgen_VFD(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + vassert(m4 == 3); + s390x_vec_fp_binary_op(Iop_Div64Fx2, Iop_DivF64, v1, v2, v3, m4, m5); + return "vfd"; +} + +static const HChar * +s390_irgen_VFSQ(UChar v1, UChar v2, UChar m3, UChar m4) +{ + vassert(m3 == 3); + s390x_vec_fp_unary_op(Iop_Sqrt64Fx2, Iop_SqrtF64, v1, v2, m3, m4); + + return "vfsq"; +} + +static const HChar * +s390_irgen_VFMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + vassert(m6 == 3); + s390_vector_fp_mulAddOrSub_operation(Iop_Add64Fx2, Iop_MAddF64, v1, v2, v3, v4, m5, m6); + return "vfma"; +} + +static const HChar * +s390_irgen_VFMS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + vassert(m6 == 3); + s390_vector_fp_mulAddOrSub_operation(Iop_Sub64Fx2, Iop_MSubF64, v1, v2, v3, v4, m5, m6); + return "vfms"; +} + +static const HChar * +s390_irgen_WFC(UChar v1, UChar v2, UChar m3, UChar m4) +{ + vassert(m3 == 3); + vassert(m4 == 0); + + IRTemp cc_vex = newTemp(Ity_I32); + assign(cc_vex, binop(Iop_CmpF64, get_vr(v1, Ity_F64, 0), get_vr(v2, Ity_F64, 0))); + + IRTemp cc_s390 = newTemp(Ity_I32); + assign(cc_s390, convert_vex_bfpcc_to_s390(cc_vex)); + s390_cc_thunk_put1(S390_CC_OP_SET, cc_s390, False); + + return "wfc"; +} + +static const HChar * +s390_irgen_WFK(UChar v1, UChar v2, UChar m3, UChar m4) +{ + s390_irgen_WFC(v1, v2, m3, m4); + + return "wfk"; +} + +static const HChar * +s390_irgen_VFCE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + vassert(m4 == 3); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + if (!s390_vr_is_cs_set(m6)) { + if (LIKELY(!isSingleElementOp)) { + put_vr_qw(v1, binop(Iop_CmpEQ64Fx2, get_vr_qw(v2), get_vr_qw(v3))); + } else { + IRExpr* comparationResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0), get_vr(v3, Ity_F64, 0)); + IRExpr* result = mkite(binop(Iop_CmpEQ32, comparationResult, mkU32(Ircr_EQ)), + mkU64(0xffffffffffffffffULL), + mkU64(0ULL)); + put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); + } + } else { + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFCE; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + details.m6 = m6; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + const UChar sizeOfElement = (!isSingleElementOp) ? sizeof(V128) : sizeof(ULong); + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeOfElement; + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeOfElement; + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + } + + return "vfce"; +} + +static const HChar * +s390_irgen_VFCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + vassert(m4 == 3); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + if (!s390_vr_is_cs_set(m6)) { + if (LIKELY(!isSingleElementOp)) { + put_vr_qw(v1, binop(Iop_CmpLE64Fx2, get_vr_qw(v3), get_vr_qw(v2))); + } else { + IRExpr* comparationResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0), get_vr(v3, Ity_F64, 0)); + IRExpr* result = mkite(binop(Iop_CmpEQ32, comparationResult, mkU32(Ircr_GT)), + mkU64(0xffffffffffffffffULL), + mkU64(0ULL)); + put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); + } + } + else { + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFCH; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + details.m6 = m6; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + const UChar sizeOfElement = (!isSingleElementOp) ? sizeof(V128) : sizeof(ULong); + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeOfElement; + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeOfElement; + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + } + + return "vfch"; +} + +static const HChar * +s390_irgen_VFCHE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + vassert(m4 == 3); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + if (!s390_vr_is_cs_set(m6)) { + if (LIKELY(!isSingleElementOp)) { + put_vr_qw(v1, binop(Iop_CmpLT64Fx2, get_vr_qw(v3), get_vr_qw(v2))); + } + else { + IRExpr* comparationResult = binop(Iop_CmpF64, get_vr(v3, Ity_F64, 0), get_vr(v2, Ity_F64, 0)); + IRExpr* result = mkite(binop(Iop_CmpEQ32, comparationResult, mkU32(Ircr_LT)), + mkU64(0xffffffffffffffffULL), + mkU64(0ULL)); + put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); + } + } + else { + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFCHE; + details.v1 = v1; + details.v2 = v2; + details.v3 = v3; + details.m4 = m4; + details.m5 = m5; + details.m6 = m6; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + const UChar sizeOfElement = (!isSingleElementOp) ? sizeof(V128) : sizeof(ULong); + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeOfElement; + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); + d->fxState[1].size = sizeOfElement; + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + } + + return "vfche"; +} + +static const HChar * +s390_irgen_VFTCI(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5) +{ + vassert(m4 == 3); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); + + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; + details.op = S390_VEC_OP_VFTCI; + details.v1 = v1; + details.v2 = v2; + details.i3 = i3; + details.m4 = m4; + details.m5 = m5; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + + const UChar sizeOfElement = (!isSingleElementOp) ? sizeof(V128) : sizeof(ULong); + d->nFxState = 2; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = sizeOfElement; + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); + d->fxState[1].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); + + return "vftci"; +} + /* New insns are added here. If an insn is contingent on a facility being installed also check whether the list of supported facilities in function @@ -19362,6 +20007,18 @@ s390_decode_6byte_and_irgen(const UChar *bytes) unsigned int rxb : 4; unsigned int op2 : 8; } VRR; + struct { + unsigned int op1 : 8; + unsigned int v1 : 4; + unsigned int v2 : 4; + unsigned int v3 : 4; + unsigned int : 4; + unsigned int m5 : 4; + unsigned int m4 : 4; + unsigned int m3 : 4; + unsigned int rxb : 4; + unsigned int op2 : 8; + } VRRa; struct { unsigned int op1 : 8; unsigned int v1 : 4; @@ -19374,6 +20031,18 @@ s390_decode_6byte_and_irgen(const UChar *bytes) unsigned int rxb : 4; unsigned int op2 : 8; } VRRd; + struct { + unsigned int op1 : 8; + unsigned int v1 : 4; + unsigned int v2 : 4; + unsigned int v3 : 4; + unsigned int m6 : 4; + unsigned int : 4; + unsigned int m5 : 4; + unsigned int v4 : 4; + unsigned int rxb : 4; + unsigned int op2 : 8; + } VRRe; struct { unsigned int op1 : 8; unsigned int v1 : 4; @@ -19394,6 +20063,16 @@ s390_decode_6byte_and_irgen(const UChar *bytes) unsigned int rxb : 4; unsigned int op2 : 8; } VRId; + struct { + unsigned int op1 : 8; + unsigned int v1 : 4; + unsigned int v2 : 4; + unsigned int i3 : 12; + unsigned int m5 : 4; + unsigned int m4 : 4; + unsigned int rxb : 4; + unsigned int op2 : 8; + } VRIe; struct { unsigned int op1 : 8; unsigned int v1 : 4; @@ -19979,7 +20658,10 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe70000000046ULL: s390_format_VRI_VIM(s390_irgen_VGM, ovl.fmt.VRI.v1, ovl.fmt.VRI.i2, ovl.fmt.VRI.m3, ovl.fmt.VRI.rxb); goto ok; - case 0xe7000000004aULL: /* VFTCI */ goto unimplemented; + case 0xe7000000004aULL: s390_format_VRI_VVIMM(s390_irgen_VFTCI, ovl.fmt.VRIe.v1, + ovl.fmt.VRIe.v2, ovl.fmt.VRIe.i3, + ovl.fmt.VRIe.m4, ovl.fmt.VRIe.m5, + ovl.fmt.VRIe.rxb); goto ok; case 0xe7000000004dULL: s390_format_VRI_VVIM(s390_irgen_VREP, ovl.fmt.VRI.v1, ovl.fmt.VRI.v3, ovl.fmt.VRI.i2, ovl.fmt.VRI.m3, ovl.fmt.VRI.rxb); goto ok; @@ -20092,19 +20774,25 @@ s390_decode_6byte_and_irgen(const UChar *bytes) ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; case 0xe70000000085ULL: /* VBPERM */ goto unimplemented; - case 0xe7000000008aULL: s390_format_VRRd_VVVVMM(s390_irgen_VSTRC, ovl.fmt.VRRd.v1, - ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, - ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, - ovl.fmt.VRRd.m6, - ovl.fmt.VRRd.rxb); goto ok; + case 0xe7000000008aULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRC, ovl.fmt.VRRd.v1, + ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, + ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, + ovl.fmt.VRRd.m6, + ovl.fmt.VRRd.rxb); goto ok; case 0xe7000000008cULL: s390_format_VRR_VVVV(s390_irgen_VPERM, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; case 0xe7000000008dULL: s390_format_VRR_VVVV(s390_irgen_VSEL, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; - case 0xe7000000008eULL: /* VFMS */ goto unimplemented; - case 0xe7000000008fULL: /* VFMA */ goto unimplemented; + case 0xe7000000008eULL: s390_format_VRR_VVVVMM(s390_irgen_VFMS, ovl.fmt.VRRe.v1, + ovl.fmt.VRRe.v2, ovl.fmt.VRRe.v3, + ovl.fmt.VRRe.v4, ovl.fmt.VRRe.m5, + ovl.fmt.VRRe.m6, ovl.fmt.VRRe.rxb); goto ok; + case 0xe7000000008fULL: s390_format_VRR_VVVVMM(s390_irgen_VFMA, ovl.fmt.VRRe.v1, + ovl.fmt.VRRe.v2, ovl.fmt.VRRe.v3, + ovl.fmt.VRRe.v4, ovl.fmt.VRRe.m5, + ovl.fmt.VRRe.m6, ovl.fmt.VRRe.rxb); goto ok; case 0xe70000000094ULL: s390_format_VRR_VVVM(s390_irgen_VPK, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.r3, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; @@ -20189,17 +20877,47 @@ s390_decode_6byte_and_irgen(const UChar *bytes) ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3, ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5, ovl.fmt.VRRd.rxb); goto ok; - case 0xe700000000c0ULL: /* VCLGD */ goto unimplemented; - case 0xe700000000c1ULL: /* VCDLG */ goto unimplemented; - case 0xe700000000c2ULL: /* VCGD */ goto unimplemented; - case 0xe700000000c3ULL: /* VCDG */ goto unimplemented; - case 0xe700000000c4ULL: /* VLDE */ goto unimplemented; - case 0xe700000000c5ULL: /* VLED */ goto unimplemented; - case 0xe700000000c7ULL: /* VFI */ goto unimplemented; - case 0xe700000000caULL: /* WFK */ goto unimplemented; - case 0xe700000000cbULL: /* WFC */ goto unimplemented; - case 0xe700000000ccULL: /* VFPSO */ goto unimplemented; - case 0xe700000000ceULL: /* VFSQ */ goto unimplemented; + case 0xe700000000c0ULL: s390_format_VRRa_VVMMM(s390_irgen_VCLGD, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c1ULL: s390_format_VRRa_VVMMM(s390_irgen_VCDLG, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c2ULL: s390_format_VRRa_VVMMM(s390_irgen_VCGD, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c3ULL: s390_format_VRRa_VVMMM(s390_irgen_VCDG, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c4ULL: s390_format_VRRa_VVMMM(s390_irgen_VLDE, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c5ULL: s390_format_VRRa_VVMMM(s390_irgen_VLED, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000c7ULL: s390_format_VRRa_VVMMM(s390_irgen_VFI, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000caULL: s390_format_VRRa_VVMM(s390_irgen_WFK, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000cbULL: s390_format_VRRa_VVMM(s390_irgen_WFC, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000ccULL: s390_format_VRRa_VVMMM(s390_irgen_VFPSO, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000ceULL: s390_format_VRRa_VVMM(s390_irgen_VFSQ, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3, + ovl.fmt.VRRa.m4, ovl.fmt.VRRa.rxb); goto ok; case 0xe700000000d4ULL: s390_format_VRR_VVM(s390_irgen_VUPLL, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; @@ -20226,13 +20944,34 @@ s390_decode_6byte_and_irgen(const UChar *bytes) case 0xe700000000dfULL: s390_format_VRR_VVM(s390_irgen_VLP, ovl.fmt.VRR.v1, ovl.fmt.VRR.v2, ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb); goto ok; - case 0xe700000000e2ULL: /* VFS */ goto unimplemented; - case 0xe700000000e3ULL: /* VFA */ goto unimplemented; - case 0xe700000000e5ULL: /* VFD */ goto unimplemented; - case 0xe700000000e7ULL: /* VFM */ goto unimplemented; - case 0xe700000000e8ULL: /* VFCE */ goto unimplemented; - case 0xe700000000eaULL: /* VFCHE */ goto unimplemented; - case 0xe700000000ebULL: /* VFCH */ goto unimplemented; + case 0xe700000000e2ULL: s390_format_VRRa_VVVMM(s390_irgen_VFS, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000e3ULL: s390_format_VRRa_VVVMM(s390_irgen_VFA, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000e5ULL: s390_format_VRRa_VVVMM(s390_irgen_VFD, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000e7ULL: s390_format_VRRa_VVVMM(s390_irgen_VFM, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000e8ULL: s390_format_VRRa_VVVMMM(s390_irgen_VFCE, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.m5, ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000eaULL: s390_format_VRRa_VVVMMM(s390_irgen_VFCHE, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.m5, ovl.fmt.VRRa.rxb); goto ok; + case 0xe700000000ebULL: s390_format_VRRa_VVVMMM(s390_irgen_VFCH, ovl.fmt.VRRa.v1, + ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3, + ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4, + ovl.fmt.VRRa.m5, ovl.fmt.VRRa.rxb); goto ok; case 0xe700000000eeULL: /* VFMIN */ goto unimplemented; case 0xe700000000efULL: /* VFMAX */ goto unimplemented; case 0xe700000000f0ULL: s390_format_VRR_VVVM(s390_irgen_VAVGL, ovl.fmt.VRR.v1, diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c index 6c22ac843..666d8a48f 100644 --- a/VEX/priv/host_s390_defs.c +++ b/VEX/priv/host_s390_defs.c @@ -1714,6 +1714,22 @@ emit_VRR_VVM(UChar *p, ULong op, UChar v1, UChar v2, UChar m4) return emit_6bytes(p, the_insn); } +static UChar * +emit_VRR_VVMMM(UChar *p, ULong op, UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + ULong the_insn = op; + ULong rxb = s390_update_rxb(0, 1, &v1); + rxb = s390_update_rxb(rxb, 2, &v2); + + the_insn |= ((ULong)v1) << 36; + the_insn |= ((ULong)v2) << 32; + the_insn |= ((ULong)m5) << 20; + the_insn |= ((ULong)m4) << 16; + the_insn |= ((ULong)m3) << 12; + the_insn |= ((ULong)rxb) << 8; + + return emit_6bytes(p, the_insn); +} static UChar * emit_VRR_VVVM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar m4) @@ -1765,6 +1781,25 @@ emit_VRR_VVVV(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar v4) return emit_6bytes(p, the_insn); } +static UChar * +emit_VRRe_VVVVMM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + ULong the_insn = op; + ULong rxb = s390_update_rxb(0, 1, &v1); + rxb = s390_update_rxb(rxb, 2, &v2); + rxb = s390_update_rxb(rxb, 3, &v3); + rxb = s390_update_rxb(rxb, 4, &v4); + + the_insn |= ((ULong)v1) << 36; + the_insn |= ((ULong)v2) << 32; + the_insn |= ((ULong)v3) << 28; + the_insn |= ((ULong)m6) << 24; + the_insn |= ((ULong)m5) << 16; + the_insn |= ((ULong)v4) << 12; + the_insn |= ((ULong)rxb) << 8; + + return emit_6bytes(p, the_insn); +} static UChar * emit_VRR_VRR(UChar *p, ULong op, UChar v1, UChar r2, UChar r3) @@ -1780,6 +1815,31 @@ emit_VRR_VRR(UChar *p, ULong op, UChar v1, UChar r2, UChar r3) return emit_6bytes(p, the_insn); } +static UChar * +emit_VRR_VVVMMM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + ULong the_insn = op; + ULong rxb = s390_update_rxb(0, 1, &v1); + rxb = s390_update_rxb(rxb, 2, &v2); + rxb = s390_update_rxb(rxb, 3, &v3); + + the_insn |= ((ULong)v1) << 36; + the_insn |= ((ULong)v2) << 32; + the_insn |= ((ULong)v3) << 28; + the_insn |= ((ULong)m6) << 20; + the_insn |= ((ULong)m5) << 16; + the_insn |= ((ULong)m4) << 12; + the_insn |= ((ULong)rxb) << 8; + + return emit_6bytes(p, the_insn); +} + +static UChar* +emit_VRR_VVVMM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + return emit_VRR_VVVMMM(p, op, v1, v2, v3, m4, m5, 0); +} + /*------------------------------------------------------------*/ /*--- Functions to emit particular instructions ---*/ /*------------------------------------------------------------*/ @@ -6060,6 +6120,105 @@ s390_emit_VLVGP(UChar *p, UChar v1, UChar r2, UChar r3) return emit_VRR_VRR(p, 0xE70000000062ULL, v1, r2, r3); } +static UChar * +s390_emit_VFPSO(UChar *p, UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, UINT, UINT, UINT), "vfpso", v1, v2, m3, m4, m5); + + return emit_VRR_VVMMM(p, 0xE700000000CCULL, v1, v2, m3, m4, m5); +} + +static UChar * +s390_emit_VFA(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfa", v1, v2, v3, m4, m5); + + return emit_VRR_VVVMM(p, 0xE700000000e3ULL, v1, v2, v3, m4, m5); +} + +static UChar * +s390_emit_VFS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfs", v1, v2, v3, m4, m5); + + return emit_VRR_VVVMM(p, 0xE700000000e2ULL, v1, v2, v3, m4, m5); +} + +static UChar * +s390_emit_VFM(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfm", v1, v2, v3, m4, m5); + + return emit_VRR_VVVMM(p, 0xE700000000e7ULL, v1, v2, v3, m4, m5); +} + +static UChar * +s390_emit_VFD(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfd", v1, v2, v3, m4, m5); + + return emit_VRR_VVVMM(p, 0xE700000000e5ULL, v1, v2, v3, m4, m5); +} + +static UChar * +s390_emit_VFSQ(UChar *p, UChar v1, UChar v2, UChar m3, UChar m4) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vfsq", v1, v2, m3, m4); + + return emit_VRR_VVMMM(p, 0xE700000000CEULL, v1, v2, m3, m4, 0); +} + +static UChar * +s390_emit_VFMA(UChar *p, UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, VR, UINT, UINT), "vfma", v1, v2, v3, v4, m5, m6); + + return emit_VRRe_VVVVMM(p, 0xE7000000008fULL, v1, v2, v3, v4, m5, m6); +} + +static UChar * +s390_emit_VFMS(UChar *p, UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, VR, UINT, UINT), "vfms", v1, v2, v3, v4, m5, m6); + + return emit_VRRe_VVVVMM(p, 0xE7000000008eULL, v1, v2, v3, v4, m5, m6); +} + +static UChar * +s390_emit_VFCE(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, UINT, UINT, UINT), "vfce", v1, v2, v3, m4, m5, m6); + + return emit_VRR_VVVMMM(p, 0xE700000000e8ULL, v1, v2, v3, m4, m5, m6); +} + +static UChar * +s390_emit_VFCH(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, UINT, UINT, UINT), "vfce", v1, v2, v3, m4, m5, m6); + + return emit_VRR_VVVMMM(p, 0xE700000000ebULL, v1, v2, v3, m4, m5, m6); +} + +static UChar * +s390_emit_VFCHE(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) +{ + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC7(MNM, VR, VR, VR, UINT, UINT, UINT), "vfce", v1, v2, v3, m4, m5, m6); + + return emit_VRR_VVVMMM(p, 0xE700000000eaULL, v1, v2, v3, m4, m5, m6); +} + /*---------------------------------------------------------------*/ /*--- Constructors for the various s390_insn kinds ---*/ /*---------------------------------------------------------------*/ @@ -7204,7 +7363,6 @@ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t tag, HReg dst, { s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn)); - vassert(size == 16); insn->tag = S390_INSN_VEC_TRIOP; insn->size = size; @@ -7511,6 +7669,18 @@ s390_insn_as_string(const s390_insn *insn) op = "v-vunpacku"; break; + case S390_VEC_FLOAT_NEG: + op = "v-vfloatneg"; + break; + + case S390_VEC_FLOAT_SQRT: + op = "v-vfloatsqrt"; + break; + + case S390_VEC_FLOAT_ABS: + op = "v-vfloatabs"; + break; + default: goto fail; } @@ -7883,6 +8053,13 @@ s390_insn_as_string(const s390_insn *insn) case S390_VEC_PWSUM_DW: op = "v-vpwsumdw"; break; case S390_VEC_PWSUM_QW: op = "v-vpwsumqw"; break; case S390_VEC_INIT_FROM_GPRS: op = "v-vinitfromgprs"; break; + case S390_VEC_FLOAT_ADD: op = "v-vfloatadd"; break; + case S390_VEC_FLOAT_SUB: op = "v-vfloatsub"; break; + case S390_VEC_FLOAT_MUL: op = "v-vfloatmul"; break; + case S390_VEC_FLOAT_DIV: op = "v-vfloatdiv"; break; + case S390_VEC_FLOAT_COMPARE_EQUAL: op = "v-vfloatcmpeq"; break; + case S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL: op = "v-vfloatcmple"; break; + case S390_VEC_FLOAT_COMPARE_LESS: op = "v-vfloatcmpl"; break; default: goto fail; } s390_sprintf(buf, "%M %R, %R, %R", op, insn->variant.vec_binop.dst, @@ -7892,6 +8069,8 @@ s390_insn_as_string(const s390_insn *insn) case S390_INSN_VEC_TRIOP: switch (insn->variant.vec_triop.tag) { case S390_VEC_PERM: op = "v-vperm"; break; + case S390_VEC_FLOAT_MADD: op = "v-vfloatmadd"; break; + case S390_VEC_FLOAT_MSUB: op = "v-vfloatmsub"; break; default: goto fail; } s390_sprintf(buf, "%M %R, %R, %R, %R", op, insn->variant.vec_triop.dst, @@ -9039,6 +9218,27 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn) return s390_emit_VPOPCT(buf, v1, v2, s390_getM_from_size(insn->size)); } + case S390_VEC_FLOAT_NEG: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + vassert(insn->size == 8); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 0); + } + case S390_VEC_FLOAT_ABS: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + vassert(insn->size == 8); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 2); + } + case S390_VEC_FLOAT_SQRT: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + vassert(insn->size == 8); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VFSQ(buf, v1, v2, s390_getM_from_size(insn->size), 0); + } default: vpanic("s390_insn_unop_emit"); } @@ -11052,6 +11252,21 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn) return s390_emit_VSUMQ(buf, v1, v2, v3, s390_getM_from_size(size)); case S390_VEC_INIT_FROM_GPRS: return s390_emit_VLVGP(buf, v1, v2, v3); + case S390_VEC_FLOAT_ADD: + return s390_emit_VFA(buf, v1, v2, v3, s390_getM_from_size(size), 0); + case S390_VEC_FLOAT_SUB: + return s390_emit_VFS(buf, v1, v2, v3, s390_getM_from_size(size), 0); + case S390_VEC_FLOAT_MUL: + return s390_emit_VFM(buf, v1, v2, v3, s390_getM_from_size(size), 0); + case S390_VEC_FLOAT_DIV: + return s390_emit_VFD(buf, v1, v2, v3, s390_getM_from_size(size), 0); + case S390_VEC_FLOAT_COMPARE_EQUAL: + return s390_emit_VFCE(buf, v1, v2, v3, s390_getM_from_size(size), 0, 0); + case S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL: + return s390_emit_VFCH(buf, v1, v3, v2, s390_getM_from_size(size), 0, 0); + case S390_VEC_FLOAT_COMPARE_LESS: + return s390_emit_VFCHE(buf, v1, v3, v2, s390_getM_from_size(size), 0, 0); + default: goto fail; } @@ -11073,8 +11288,14 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn) UChar v4 = hregNumber(insn->variant.vec_triop.op3); switch (tag) { - case S390_VEC_PERM: + case S390_VEC_PERM: { + vassert(insn->size == 16); return s390_emit_VPERM(buf, v1, v2, v3, v4); + } + case S390_VEC_FLOAT_MADD: + return s390_emit_VFMA(buf, v1, v2, v3, v4, 0, 3); + case S390_VEC_FLOAT_MSUB: + return s390_emit_VFMS(buf, v1, v2, v3, v4, 0, 3); default: goto fail; } diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index 7ea01010e..40f0472a2 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -202,7 +202,10 @@ typedef enum { S390_VEC_ABS, S390_VEC_COUNT_LEADING_ZEROES, S390_VEC_COUNT_TRAILING_ZEROES, - S390_VEC_COUNT_ONES + S390_VEC_COUNT_ONES, + S390_VEC_FLOAT_NEG, + S390_VEC_FLOAT_ABS, + S390_VEC_FLOAT_SQRT } s390_unop_t; /* The kind of ternary BFP operations */ @@ -394,11 +397,20 @@ typedef enum { S390_VEC_PWSUM_QW, S390_VEC_INIT_FROM_GPRS, + S390_VEC_FLOAT_ADD, + S390_VEC_FLOAT_SUB, + S390_VEC_FLOAT_MUL, + S390_VEC_FLOAT_DIV, + S390_VEC_FLOAT_COMPARE_EQUAL, + S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL, + S390_VEC_FLOAT_COMPARE_LESS } s390_vec_binop_t; /* The vector operations with three operands */ typedef enum { - S390_VEC_PERM + S390_VEC_PERM, + S390_VEC_FLOAT_MADD, + S390_VEC_FLOAT_MSUB } s390_vec_triop_t; /* The details of a CDAS insn. Carved out to keep the size of diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c index bc34f90ff..48cc8625c 100644 --- a/VEX/priv/host_s390_isel.c +++ b/VEX/priv/host_s390_isel.c @@ -787,10 +787,12 @@ get_bfp_rounding_mode(ISelEnv *env, IRExpr *irrm) IRRoundingMode mode = irrm->Iex.Const.con->Ico.U32; switch (mode) { - case Irrm_NEAREST: return S390_BFP_ROUND_NEAREST_EVEN; - case Irrm_ZERO: return S390_BFP_ROUND_ZERO; - case Irrm_PosINF: return S390_BFP_ROUND_POSINF; - case Irrm_NegINF: return S390_BFP_ROUND_NEGINF; + case Irrm_NEAREST_TIE_AWAY_0: return S390_BFP_ROUND_NEAREST_AWAY; + case Irrm_PREPARE_SHORTER: return S390_BFP_ROUND_PREPARE_SHORT; + case Irrm_NEAREST: return S390_BFP_ROUND_NEAREST_EVEN; + case Irrm_ZERO: return S390_BFP_ROUND_ZERO; + case Irrm_PosINF: return S390_BFP_ROUND_POSINF; + case Irrm_NegINF: return S390_BFP_ROUND_NEGINF; default: vpanic("get_bfp_rounding_mode"); } @@ -3871,6 +3873,17 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) vec_op = S390_VEC_COUNT_ONES; goto Iop_V_wrk; + case Iop_Neg64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_NEG; + goto Iop_V_wrk; + + case Iop_Abs64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_ABS; + goto Iop_V_wrk; + + Iop_V_wrk: { dst = newVRegV(env); reg1 = s390_isel_vec_expr(env, arg); @@ -4388,6 +4401,28 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) vec_op = S390_VEC_ELEM_ROLL_V; goto Iop_VV_wrk; + case Iop_CmpEQ64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_COMPARE_EQUAL; + goto Iop_VV_wrk; + + case Iop_CmpLE64Fx2: { + size = 8; + vec_op = S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL; + goto Iop_VV_wrk; + } + + case Iop_CmpLT64Fx2: { + size = 8; + vec_op = S390_VEC_FLOAT_COMPARE_LESS; + goto Iop_VV_wrk; + } + + case Iop_Sqrt64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_SQRT; + goto Iop_irrm_V_wrk; + case Iop_ShlN8x16: size = 1; shift_op = S390_VEC_ELEM_SHL_INT; @@ -4493,6 +4528,14 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) return dst; } + Iop_irrm_V_wrk: { + set_bfp_rounding_mode_in_fpc(env, arg1); + reg1 = s390_isel_vec_expr(env, arg2); + + addInstr(env, s390_insn_unop(size, vec_op, dst, s390_opnd_reg(reg1))); + return dst; + } + case Iop_64HLtoV128: reg1 = s390_isel_int_expr(env, arg1); reg2 = s390_isel_int_expr(env, arg2); @@ -4516,6 +4559,7 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) IRExpr* arg1 = expr->Iex.Triop.details->arg1; IRExpr* arg2 = expr->Iex.Triop.details->arg2; IRExpr* arg3 = expr->Iex.Triop.details->arg3; + IROp vec_op; switch (op) { case Iop_SetElem8x16: size = 1; @@ -4551,6 +4595,66 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) dst, reg1, reg2, reg3)); return dst; + case Iop_Add64Fx2: + size = 8; + + /* Add64Fx2(Mul64Fx2(arg1, arg2), arg3) -> MAdd(arg1, arg2, arg3) */ + if (UNLIKELY((arg2->tag == Iex_Triop) + && (arg2->Iex.Triop.details->op == Iop_Mul64Fx2) + && (arg1 == arg2->Iex.Triop.details->arg1)) + ) { + vec_op = S390_VEC_FLOAT_MADD; + goto Iop_irrm_MAddOrSub; + } + + vec_op = S390_VEC_FLOAT_ADD; + goto Iop_irrm_VV_wrk; + + case Iop_Sub64Fx2: + size = 8; + + /* Sub64Fx2(Mul64Fx2(arg1, arg2), arg3) -> MSub(arg1, arg2, arg3) */ + if (UNLIKELY((arg2->tag == Iex_Triop) + && (arg2->Iex.Triop.details->op == Iop_Mul64Fx2) + && (arg1 == arg2->Iex.Triop.details->arg1)) + ) { + vec_op = S390_VEC_FLOAT_MSUB; + goto Iop_irrm_MAddOrSub; + } + + vec_op = S390_VEC_FLOAT_SUB; + goto Iop_irrm_VV_wrk; + + case Iop_Mul64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_MUL; + goto Iop_irrm_VV_wrk; + case Iop_Div64Fx2: + size = 8; + vec_op = S390_VEC_FLOAT_DIV; + goto Iop_irrm_VV_wrk; + + Iop_irrm_VV_wrk: { + set_bfp_rounding_mode_in_fpc(env, arg1); + reg1 = s390_isel_vec_expr(env, arg2); + reg2 = s390_isel_vec_expr(env, arg3); + + addInstr(env, s390_insn_vec_binop(size, vec_op, + dst, reg1, reg2)); + + return dst; + } + + Iop_irrm_MAddOrSub: { + reg1 = s390_isel_vec_expr(env, arg2->Iex.Triop.details->arg2); + reg2 = s390_isel_vec_expr(env, arg2->Iex.Triop.details->arg3); + + set_bfp_rounding_mode_in_fpc(env, arg1); + addInstr(env, + s390_insn_vec_triop(size, vec_op, dst, reg1, reg2, s390_isel_vec_expr(env, arg3))); + return dst; + } + default: goto irreducible; } -- 2.19.0.windows.1