From 4a7f0982142414783303455781dbb51872521b00 Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Thu, 3 Jun 2021 14:58:26 +0200 Subject: [PATCH] 3.17.0-6 - z15 support Add valgrind-3.17.0-s390-prep.patch Add valgrind-3.17.0-s390-z15.patch Add valgrind-3.17.0-s390-z13-vec-fix.patch Resolves: #1920584 Valgrind support of IBM Z hardware z15 --- valgrind-3.17.0-s390-prep.patch | 2283 ++++++++++++++++++++++ valgrind-3.17.0-s390-z13-vec-fix.patch | 46 + valgrind-3.17.0-s390-z15.patch | 2413 ++++++++++++++++++++++++ valgrind.spec | 45 +- 4 files changed, 4786 insertions(+), 1 deletion(-) create mode 100644 valgrind-3.17.0-s390-prep.patch create mode 100644 valgrind-3.17.0-s390-z13-vec-fix.patch create mode 100644 valgrind-3.17.0-s390-z15.patch diff --git a/valgrind-3.17.0-s390-prep.patch b/valgrind-3.17.0-s390-prep.patch new file mode 100644 index 0000000..8f2dbb1 --- /dev/null +++ b/valgrind-3.17.0-s390-prep.patch @@ -0,0 +1,2283 @@ +commit d74a637206ef5532ccd2ccb2e31ee2762f184e60 +Author: Andreas Arnez +Date: Wed Apr 28 18:52:30 2021 +0200 + + Bug 433863 - s390x: Remove memcheck test cases for cs, cds, and csg + + The fix for bug 429864 - "s390x: C++ atomic test_and_set yields + false-positive memcheck diagnostics" changes the memcheck behavior at + various compare-and-swap instructions. The comparison between the old and + expected value now always yields a defined result, even if the input + values are (partially) undefined. However, some existing test cases + explicitly verify that memcheck complains about the use of uninitialised + values here. These test cases are no longer valid. Remove them. + +diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am +index 67ae8c293..e4e69eb38 100644 +--- a/memcheck/tests/s390x/Makefile.am ++++ b/memcheck/tests/s390x/Makefile.am +@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am + + dist_noinst_SCRIPTS = filter_stderr + +-INSN_TESTS = cs csg cds cdsg cu21 cu42 ltgjhe ++INSN_TESTS = cdsg cu21 cu42 ltgjhe + + check_PROGRAMS = $(INSN_TESTS) + +@@ -14,7 +14,3 @@ EXTRA_DIST = \ + AM_CFLAGS += @FLAG_M64@ + AM_CXXFLAGS += @FLAG_M64@ + AM_CCASFLAGS += @FLAG_M64@ +- +-cs_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@ +-csg_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@ +-cds_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@ +diff --git a/memcheck/tests/s390x/cds.c b/memcheck/tests/s390x/cds.c +deleted file mode 100644 +index ec5c533e0..000000000 +--- a/memcheck/tests/s390x/cds.c ++++ /dev/null +@@ -1,82 +0,0 @@ +-#include +-#include +- +-typedef struct { +- uint64_t high; +- uint64_t low; +-} quad_word; +- +-void +-test(quad_word op1_init, uint64_t op2_init, quad_word op3_init) +-{ +- int cc; // unused +- quad_word op1 = op1_init; +- uint64_t op2 = op2_init; +- quad_word op3 = op3_init; +- +- __asm__ volatile ( +- "lmg %%r0,%%r1,%1\n\t" +- "lmg %%r2,%%r3,%3\n\t" +- "cds %%r0,%%r2,%2\n\t" // cds 1st,3rd,2nd +- "stmg %%r0,%%r1,%1\n" // store r0,r1 to op1 +- "stmg %%r2,%%r3,%3\n" // store r2,r3 to op3 +- : "=d" (cc), "+QS" (op1), "+QS" (op2), "+QS" (op3) +- : +- : "r0", "r1", "r2", "r3", "cc"); +- +-} +- +-// Return a quad-word that only bits low[32:63] are undefined +-quad_word +-make_undefined(void) +-{ +- quad_word val; +- +- val.high = 0; +- val.low |= 0xFFFFFFFF00000000ull; +- +- return val; +-} +- +-void op1_undefined(void) +-{ +- quad_word op1, op3; +- uint64_t op2; +- +- // op1 undefined +- op1 = make_undefined(); +- op2 = 42; +- op3.high = op3.low = 0xdeadbeefdeadbabeull; +- test(op1, op2, op3); // complaint +-} +- +-void op2_undefined(void) +-{ +- quad_word op1, op3; +- uint64_t op2; +- +- op1.high = op1.low = 42; +- // op2 undefined +- op3.high = op3.low = 0xdeadbeefdeadbabeull; +- test(op1, op2, op3); // complaint +-} +- +-void op3_undefined(void) +-{ +- quad_word op1, op3; +- uint64_t op2; +- +- op1.high = op1.low = 42; +- op2 = 100; +- op3 = make_undefined(); +- test(op1, op2, op3); // no complaint; op3 is just copied around +-} +- +-int main () +-{ +- op1_undefined(); +- op2_undefined(); +- op3_undefined(); +- +- return 0; +-} +diff --git a/memcheck/tests/s390x/cds.stderr.exp b/memcheck/tests/s390x/cds.stderr.exp +deleted file mode 100644 +index e72de94c8..000000000 +--- a/memcheck/tests/s390x/cds.stderr.exp ++++ /dev/null +@@ -1,10 +0,0 @@ +-Conditional jump or move depends on uninitialised value(s) +- at 0x........: test (cds.c:17) +- by 0x........: op1_undefined (cds.c:50) +- by 0x........: main (cds.c:77) +- +-Conditional jump or move depends on uninitialised value(s) +- at 0x........: test (cds.c:17) +- by 0x........: op2_undefined (cds.c:61) +- by 0x........: main (cds.c:78) +- +diff --git a/memcheck/tests/s390x/cds.stdout.exp b/memcheck/tests/s390x/cds.stdout.exp +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/memcheck/tests/s390x/cds.vgtest b/memcheck/tests/s390x/cds.vgtest +deleted file mode 100644 +index 5195887e2..000000000 +--- a/memcheck/tests/s390x/cds.vgtest ++++ /dev/null +@@ -1,2 +0,0 @@ +-prog: cds +-vgopts: -q +diff --git a/memcheck/tests/s390x/cs.c b/memcheck/tests/s390x/cs.c +deleted file mode 100644 +index 9a298cef9..000000000 +--- a/memcheck/tests/s390x/cs.c ++++ /dev/null +@@ -1,32 +0,0 @@ +-#include +-#include +-#include +- +-void +-test(int32_t op1_init, int32_t op2_init, int32_t op3_init) +-{ +- register int32_t op1 asm("8") = op1_init; +- register int32_t op3 asm("9") = op3_init; +- +- int32_t op2 = op2_init; +- int cc = 1; +- +- __asm__ volatile ( +- "cs 8,9,%1\n\t" +- "ipm %0\n\t" +- "srl %0,28\n\t" +- : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3) +- : +- : "cc"); +-} +- +-int main () +-{ +- int op1, op2, op3; +- +- test(op1, 0x10000000, 0x12345678); // complaint +- test(0x10000000, op2, 0x12345678); // complaint +- test(0x10000000, 0x01000000, op3); // no complaint +- +- return 0; +-} +diff --git a/memcheck/tests/s390x/cs.stderr.exp b/memcheck/tests/s390x/cs.stderr.exp +deleted file mode 100644 +index e45dc99cd..000000000 +--- a/memcheck/tests/s390x/cs.stderr.exp ++++ /dev/null +@@ -1,8 +0,0 @@ +-Conditional jump or move depends on uninitialised value(s) +- at 0x........: test (cs.c:14) +- by 0x........: main (cs.c:27) +- +-Conditional jump or move depends on uninitialised value(s) +- at 0x........: test (cs.c:14) +- by 0x........: main (cs.c:28) +- +diff --git a/memcheck/tests/s390x/cs.stdout.exp b/memcheck/tests/s390x/cs.stdout.exp +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/memcheck/tests/s390x/cs.vgtest b/memcheck/tests/s390x/cs.vgtest +deleted file mode 100644 +index 323cce80c..000000000 +--- a/memcheck/tests/s390x/cs.vgtest ++++ /dev/null +@@ -1,2 +0,0 @@ +-prog: cs +-vgopts: -q +diff --git a/memcheck/tests/s390x/csg.c b/memcheck/tests/s390x/csg.c +deleted file mode 100644 +index 7f9d8c88e..000000000 +--- a/memcheck/tests/s390x/csg.c ++++ /dev/null +@@ -1,32 +0,0 @@ +-#include +-#include +-#include +- +-void +-test(int64_t op1_init, int64_t op2_init, int64_t op3_init) +-{ +- register int64_t op1 asm("8") = op1_init; +- register int64_t op3 asm("9") = op3_init; +- +- int64_t op2 = op2_init; +- int cc = 1; +- +- __asm__ volatile ( +- "csg 8,9,%1\n\t" +- "ipm %0\n\t" +- "srl %0,28\n\t" +- : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3) +- : +- : "cc"); +-} +- +-int main () +-{ +- int64_t op1, op2, op3; +- +- test(op1, 0x1000000000000000ull, 0x1234567887654321ull); // complaint +- test(0x1000000000000000ull, op2, 0x1234567887654321ull); // complaint +- test(0x1000000000000000ull, 0x1000000000000000ull, op3); // no complaint +- +- return 0; +-} +diff --git a/memcheck/tests/s390x/csg.stderr.exp b/memcheck/tests/s390x/csg.stderr.exp +deleted file mode 100644 +index fda2021ce..000000000 +--- a/memcheck/tests/s390x/csg.stderr.exp ++++ /dev/null +@@ -1,8 +0,0 @@ +-Conditional jump or move depends on uninitialised value(s) +- at 0x........: test (csg.c:14) +- by 0x........: main (csg.c:27) +- +-Conditional jump or move depends on uninitialised value(s) +- at 0x........: test (csg.c:14) +- by 0x........: main (csg.c:28) +- +diff --git a/memcheck/tests/s390x/csg.stdout.exp b/memcheck/tests/s390x/csg.stdout.exp +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/memcheck/tests/s390x/csg.vgtest b/memcheck/tests/s390x/csg.vgtest +deleted file mode 100644 +index 6de75c1d6..000000000 +--- a/memcheck/tests/s390x/csg.vgtest ++++ /dev/null +@@ -1,2 +0,0 @@ +-prog: csg +-vgopts: -q + +commit 18ddcc47c951427efd3b790ba2481159b9bd1598 +Author: Andreas Arnez +Date: Wed Apr 7 16:48:29 2021 +0200 + + s390x: Support "expensive" comparisons Iop_ExpCmpNE32/64 + + Add support for Iop_ExpCmpNE32 and Iop_ExpCmpNE64 in the s390x instruction + selector. Handle them exactly like the "inexpensive" variants Iop_CmpNE32 + and Iop_CmpNE64. + +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index 2000ec224..5f79280c0 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -3611,6 +3611,8 @@ s390_isel_cc(ISelEnv *env, IRExpr *cond) + + case Iop_CmpNE32: + case Iop_CmpNE64: ++ case Iop_ExpCmpNE32: ++ case Iop_ExpCmpNE64: + case Iop_CasCmpNE32: + case Iop_CasCmpNE64: + result = S390_CC_NE; + +commit 5db3f929c43bf46f4707178706cfe90f43acdd19 +Author: Andreas Arnez +Date: Wed Apr 7 12:30:20 2021 +0200 + + s390x: Add convenience function mkV128() + + Provide mkV128() as a short-hand notation for creating a vector constant from + a bit pattern, similar to other such functions like mkU64(). + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 339377007..7d54cb551 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -376,6 +376,13 @@ mkU64(ULong value) + return IRExpr_Const(IRConst_U64(value)); + } + ++/* Create an expression node for a 128-bit vector constant */ ++static __inline__ IRExpr * ++mkV128(UShort value) ++{ ++ return IRExpr_Const(IRConst_V128(value)); ++} ++ + /* Create an expression node for a 32-bit floating point constant + whose value is given by a bit pattern. */ + static __inline__ IRExpr * +@@ -16249,7 +16256,7 @@ s390_irgen_VLGV(UChar r1, IRTemp op2addr, UChar v3, UChar m4) + static const HChar * + s390_irgen_VGBM(UChar v1, UShort i2, UChar m3 __attribute__((unused))) + { +- put_vr_qw(v1, IRExpr_Const(IRConst_V128(i2))); ++ put_vr_qw(v1, mkV128(i2)); + + return "vgbm"; + } +@@ -18160,11 +18167,11 @@ s390_irgen_VSUM(UChar v1, UChar v2, UChar v3, UChar m4) + switch(type) { + case Ity_I8: + sum = unop(Iop_PwAddL16Ux8, unop(Iop_PwAddL8Ux16, get_vr_qw(v2))); +- mask = IRExpr_Const(IRConst_V128(0b0001000100010001)); ++ mask = mkV128(0b0001000100010001); + break; + case Ity_I16: + sum = unop(Iop_PwAddL16Ux8, get_vr_qw(v2)); +- mask = IRExpr_Const(IRConst_V128(0b0011001100110011)); ++ mask = mkV128(0b0011001100110011); + break; + default: + vpanic("s390_irgen_VSUM: invalid type "); +@@ -18185,11 +18192,11 @@ s390_irgen_VSUMG(UChar v1, UChar v2, UChar v3, UChar m4) + switch(type) { + case Ity_I16: + sum = unop(Iop_PwAddL32Ux4, unop(Iop_PwAddL16Ux8, get_vr_qw(v2))); +- mask = IRExpr_Const(IRConst_V128(0b0000001100000011)); ++ mask = mkV128(0b0000001100000011); + break; + case Ity_I32: + sum = unop(Iop_PwAddL32Ux4, get_vr_qw(v2)); +- mask = IRExpr_Const(IRConst_V128(0b0000111100001111)); ++ mask = mkV128(0b0000111100001111); + break; + default: + vpanic("s390_irgen_VSUMG: invalid type "); +@@ -18210,11 +18217,11 @@ s390_irgen_VSUMQ(UChar v1, UChar v2, UChar v3, UChar m4) + switch(type) { + case Ity_I32: + sum = unop(Iop_PwAddL64Ux2, unop(Iop_PwAddL32Ux4, get_vr_qw(v2))); +- mask = IRExpr_Const(IRConst_V128(0b0000000000001111)); ++ mask = mkV128(0b0000000000001111); + break; + case Ity_I64: + sum = unop(Iop_PwAddL64Ux2, get_vr_qw(v2)); +- mask = IRExpr_Const(IRConst_V128(0b0000000011111111)); ++ mask = mkV128(0b0000000011111111); + break; + default: + vpanic("s390_irgen_VSUMQ: invalid type "); +@@ -18943,8 +18950,8 @@ s390_irgen_VFCx(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6, + assign(cond, binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp))); + } + put_vr_qw(v1, mkite(mkexpr(cond), +- IRExpr_Const(IRConst_V128(0xffff)), +- IRExpr_Const(IRConst_V128(0)))); ++ mkV128(0xffff), ++ mkV128(0))); + if (s390_vr_is_cs_set(m6)) { + IRTemp cc = newTemp(Ity_I64); + assign(cc, mkite(mkexpr(cond), mkU64(0), mkU64(3))); + +commit e78bd78d3043729033b426218ab8c6dae9c51e96 +Author: Andreas Arnez +Date: Thu Mar 18 18:01:10 2021 +0100 + + Bug 434296 - s390x: Rework IR conversion of VSTRC, VFAE, and VFEE + + The z/Architecture instructions "vector string range compare" (VSTRC), + "vector find any element equal" (VFAE), and "vector find element + equal" (VFEE) are each implemented with a dirty helper that executes the + instruction. Unfortunately this approach leads to memcheck false + positives, because these instructions may yield a defined result even if + parts of the input vectors are undefined. There are multiple ways this + can happen: Wherever the flags in the fourth operand to VSTRC indicate + "match always" or "match never", the corresponding elements in the third + operand don't affect the result. The same is true for the elements + following the first zero-element in the second operand if the ZS flag is + set, or for the elements following the first matching element, if any. + + Re-implement the instructions without dirty helpers and transform into + lengthy IR instead. + +diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h +index 905429015..49b6cd5dd 100644 +--- a/VEX/priv/guest_s390_defs.h ++++ b/VEX/priv/guest_s390_defs.h +@@ -265,11 +265,8 @@ typedef enum { + S390_VEC_OP_INVALID = 0, + S390_VEC_OP_VPKS, + S390_VEC_OP_VPKLS, +- S390_VEC_OP_VFAE, +- S390_VEC_OP_VFEE, + S390_VEC_OP_VFENE, + S390_VEC_OP_VISTR, +- S390_VEC_OP_VSTRC, + S390_VEC_OP_VCEQ, + S390_VEC_OP_VTM, + S390_VEC_OP_VGFM, +diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c +index b71b621ae..63d2e8ce5 100644 +--- a/VEX/priv/guest_s390_helpers.c ++++ b/VEX/priv/guest_s390_helpers.c +@@ -2538,11 +2538,8 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + {0x00, 0x00}, /* invalid */ + [S390_VEC_OP_VPKS] = {0xe7, 0x97}, + [S390_VEC_OP_VPKLS] = {0xe7, 0x95}, +- [S390_VEC_OP_VFAE] = {0xe7, 0x82}, +- [S390_VEC_OP_VFEE] = {0xe7, 0x80}, + [S390_VEC_OP_VFENE] = {0xe7, 0x81}, + [S390_VEC_OP_VISTR] = {0xe7, 0x5c}, +- [S390_VEC_OP_VSTRC] = {0xe7, 0x8a}, + [S390_VEC_OP_VCEQ] = {0xe7, 0xf8}, + [S390_VEC_OP_VTM] = {0xe7, 0xd8}, + [S390_VEC_OP_VGFM] = {0xe7, 0xb4}, +@@ -2630,8 +2627,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + + case S390_VEC_OP_VPKS: + case S390_VEC_OP_VPKLS: +- case S390_VEC_OP_VFAE: +- case S390_VEC_OP_VFEE: + case S390_VEC_OP_VFENE: + case S390_VEC_OP_VCEQ: + case S390_VEC_OP_VGFM: +@@ -2645,7 +2640,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + the_insn.VRR.m5 = d->m5; + break; + +- case S390_VEC_OP_VSTRC: + case S390_VEC_OP_VGFMA: + case S390_VEC_OP_VMAH: + case S390_VEC_OP_VMALH: +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 7d54cb551..26a947813 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -17156,90 +17156,205 @@ s390_irgen_PPNO(UChar r1, UChar r2) + return "ppno"; + } + +-static const HChar * +-s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +-{ +- IRDirty* d; +- IRTemp cc = newTemp(Ity_I64); ++enum s390_VStrX { ++ s390_VStrX_VSTRC, ++ s390_VStrX_VFAE, ++ s390_VStrX_VFEE ++}; + +- /* Check for specification exception */ +- vassert(m4 < 3); ++#define S390_VEC_OP3(m, op0, op1, op2) \ ++ (m) == 0 ? op0 : (m) == 1 ? op1 : (m) == 2 ? op2 : Iop_INVALID; + +- s390x_vec_op_details_t details = { .serialized = 0ULL }; +- details.op = S390_VEC_OP_VFAE; +- details.v1 = v1; +- details.v2 = v2; +- details.v3 = v3; +- details.m4 = m4; +- details.m5 = m5; +- +- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", +- &s390x_dirtyhelper_vec_op, +- mkIRExprVec_2(IRExpr_GSPTR(), +- mkU64(details.serialized))); ++/* Helper function for transforming VSTRC, VFAE, or VFEE. These instructions ++ share much of the same logic. */ ++static void ++s390_irgen_VStrX(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, ++ UChar m6, enum s390_VStrX which_insn) ++{ ++ IRTemp op2 = newTemp(Ity_V128); ++ IRTemp op3 = newTemp(Ity_V128); ++ IRExpr* tmp; ++ IRExpr* match = NULL; ++ UChar bitwidth = 8 << m5; ++ UChar n_elem = 16 >> m5; ++ IROp sub_op = S390_VEC_OP3(m5, Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4); ++ IROp sar_op = S390_VEC_OP3(m5, Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4); ++ IROp shl_op = S390_VEC_OP3(m5, Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4); ++ IROp dup_op = S390_VEC_OP3(m5, Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4); ++ IROp cmpeq_op = S390_VEC_OP3(m5, Iop_CmpEQ8x16, ++ Iop_CmpEQ16x8, Iop_CmpEQ32x4); ++ IROp cmpgt_op = S390_VEC_OP3(m5, Iop_CmpGT8Ux16, ++ Iop_CmpGT16Ux8, Iop_CmpGT32Ux4); ++ IROp getelem_op = S390_VEC_OP3(m5, Iop_GetElem8x16, ++ Iop_GetElem16x8, Iop_GetElem32x4); ++ ++ assign(op2, get_vr_qw(v2)); ++ assign(op3, get_vr_qw(v3)); ++ ++ switch (which_insn) { ++ ++ case s390_VStrX_VSTRC: { ++ IRTemp op4 = newTemp(Ity_V128); ++ assign(op4, get_vr_qw(v4)); ++ ++ /* Mask off insignificant range boundaries from op3, i.e., all those for ++ which the corresponding field in op4 has all or no bits set ("match ++ always" / "match never"). */ ++ IRTemp bounds = newTemp(Ity_V128); ++ tmp = unop(Iop_NotV128, ++ binop(cmpeq_op, mkV128(0), ++ binop(sar_op, ++ binop(sub_op, ++ binop(sar_op, mkexpr(op4), ++ mkU8(bitwidth - 3)), ++ mkV128(-1)), ++ mkU8(1)))); ++ assign(bounds, binop(Iop_AndV128, mkexpr(op3), tmp)); ++ ++ IRTemp flags_eq = newTemp(Ity_V128); ++ IRTemp flags_lt = newTemp(Ity_V128); ++ IRTemp flags_gt = newTemp(Ity_V128); ++ assign(flags_eq, binop(sar_op, mkexpr(op4), mkU8(bitwidth - 1))); ++ assign(flags_lt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(1)), ++ mkU8(bitwidth - 1))); ++ assign(flags_gt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(2)), ++ mkU8(bitwidth - 1))); ++ ++ for (UChar idx = 0; idx < n_elem; idx += 2) { ++ /* Match according to the even/odd pairs in op3 and op4 at idx */ ++ IRTemp part[2]; ++ ++ for (UChar j = 0; j < 2; j++) { ++ IRTemp a = newTemp(Ity_V128); ++ assign(a, unop(dup_op, ++ binop(getelem_op, mkexpr(bounds), mkU8(idx + j)))); ++ ++ IRExpr* m[] = { ++ binop(cmpeq_op, mkexpr(op2), mkexpr(a)), ++ binop(cmpgt_op, mkexpr(a), mkexpr(op2)), ++ binop(cmpgt_op, mkexpr(op2), mkexpr(a)) ++ }; ++ IRExpr* f[] = { ++ unop(dup_op, binop(getelem_op, mkexpr(flags_eq), mkU8(idx + j))), ++ unop(dup_op, binop(getelem_op, mkexpr(flags_lt), mkU8(idx + j))), ++ unop(dup_op, binop(getelem_op, mkexpr(flags_gt), mkU8(idx + j))) ++ }; ++ part[j] = newTemp(Ity_V128); ++ assign(part[j], binop(Iop_OrV128, ++ binop(Iop_OrV128, ++ binop(Iop_AndV128, f[0], m[0]), ++ binop(Iop_AndV128, f[1], m[1])), ++ binop(Iop_AndV128, f[2], m[2]))); ++ } ++ tmp = binop(Iop_AndV128, mkexpr(part[0]), mkexpr(part[1])); ++ match = idx == 0 ? tmp : binop(Iop_OrV128, match, tmp); ++ } ++ break; ++ } + +- d->nFxState = 3; +- vex_bzero(&d->fxState, sizeof(d->fxState)); +- d->fxState[0].fx = Ifx_Read; +- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); +- d->fxState[0].size = sizeof(V128); +- d->fxState[1].fx = Ifx_Read; +- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); +- d->fxState[1].size = sizeof(V128); +- d->fxState[2].fx = Ifx_Write; +- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); +- d->fxState[2].size = sizeof(V128); ++ case s390_VStrX_VFAE: ++ for (UChar idx = 0; idx < n_elem; idx++) { ++ IRTemp a = newTemp(Ity_V128); ++ assign(a, binop(cmpeq_op, mkexpr(op2), ++ unop(dup_op, ++ binop(getelem_op, mkexpr(op3), mkU8(idx))))); ++ match = idx == 0 ? mkexpr(a) : binop(Iop_OrV128, match, mkexpr(a)); ++ } ++ break; + +- stmt(IRStmt_Dirty(d)); ++ case s390_VStrX_VFEE: ++ match = binop(cmpeq_op, mkexpr(op2), mkexpr(op3)); ++ break; + +- if (s390_vr_is_cs_set(m5)) { +- s390_cc_set(cc); ++ default: ++ vpanic("s390_irgen_VStrX: unknown insn"); + } + +- return "vfae"; +-} ++ /* Invert first intermediate result if requested */ ++ if (m6 & 8) ++ match = unop(Iop_NotV128, match); + +-static const HChar * +-s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +-{ +- IRDirty* d; +- IRTemp cc = newTemp(Ity_I64); ++ IRTemp inter1 = newTemp(Ity_V128); ++ IRTemp inter2 = newTemp(Ity_V128); ++ IRTemp accu = newTemp(Ity_V128); ++ assign(inter1, match); + +- /* Check for specification exception */ +- vassert(m4 < 3); +- vassert((m5 & 0b1100) == 0); ++ /* Determine second intermediate and accumulated result */ ++ if (s390_vr_is_zs_set(m6)) { ++ assign(inter2, binop(cmpeq_op, mkexpr(op2), mkV128(0))); ++ assign(accu, binop(Iop_OrV128, mkexpr(inter1), mkexpr(inter2))); ++ } else { ++ assign(inter2, mkV128(0)); ++ assign(accu, mkexpr(inter1)); ++ } + +- s390x_vec_op_details_t details = { .serialized = 0ULL }; +- details.op = S390_VEC_OP_VFEE; +- details.v1 = v1; +- details.v2 = v2; +- details.v3 = v3; +- details.m4 = m4; +- details.m5 = m5; ++ IRTemp accu0 = newTemp(Ity_I64); ++ IRTemp is_match0 = newTemp(Ity_I1); ++ IRTemp mismatch_bits = newTemp(Ity_I64); + +- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", +- &s390x_dirtyhelper_vec_op, +- mkIRExprVec_2(IRExpr_GSPTR(), +- mkU64(details.serialized))); ++ assign(accu0, unop(Iop_V128HIto64, mkexpr(accu))); ++ assign(is_match0, binop(Iop_ExpCmpNE64, mkexpr(accu0), mkU64(0))); ++ assign(mismatch_bits, unop(Iop_ClzNat64, ++ mkite(mkexpr(is_match0), mkexpr(accu0), ++ unop(Iop_V128to64, mkexpr(accu))))); + +- d->nFxState = 3; +- vex_bzero(&d->fxState, sizeof(d->fxState)); +- d->fxState[0].fx = Ifx_Read; +- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); +- d->fxState[0].size = sizeof(V128); +- d->fxState[1].fx = Ifx_Read; +- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); +- d->fxState[1].size = sizeof(V128); +- d->fxState[2].fx = Ifx_Write; +- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); +- d->fxState[2].size = sizeof(V128); ++ if (m6 & 4) { ++ put_vr_qw(v1, mkexpr(inter1)); ++ } else { ++ /* Determine byte position of first match */ ++ tmp = binop(Iop_Add64, ++ binop(Iop_Shr64, mkexpr(mismatch_bits), mkU8(3)), ++ mkite(mkexpr(is_match0), mkU64(0), mkU64(8))); ++ put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0))); ++ } + +- stmt(IRStmt_Dirty(d)); ++ if (s390_vr_is_cs_set(m6)) { ++ /* Set condition code depending on... ++ zero found ++ n y ++ +------ ++ match n | 3 0 ++ found y | 1 2 */ + +- if (s390_vr_is_cs_set(m5)) { ++ IRTemp cc = newTemp(Ity_I64); ++ ++ tmp = binop(Iop_Shr64, ++ mkite(mkexpr(is_match0), ++ unop(Iop_V128HIto64, mkexpr(inter1)), ++ unop(Iop_V128to64, mkexpr(inter1))), ++ unop(Iop_64to8, ++ binop(Iop_Sub64, mkU64(63), mkexpr(mismatch_bits)))); ++ tmp = binop(Iop_Shl64, tmp, mkU8(1)); ++ if (s390_vr_is_zs_set(m6)) { ++ tmp = binop(Iop_Xor64, tmp, ++ mkite(binop(Iop_ExpCmpNE64, mkU64(0), ++ binop(Iop_Or64, ++ unop(Iop_V128HIto64, mkexpr(inter2)), ++ unop(Iop_V128to64, mkexpr(inter2)))), ++ mkU64(0), ++ mkU64(3))); ++ } else { ++ tmp = binop(Iop_Xor64, tmp, mkU64(3)); ++ } ++ assign(cc, tmp); + s390_cc_set(cc); + } ++ dis_res->hint = Dis_HintVerbose; ++} + ++static const HChar * ++s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) ++{ ++ s390_insn_assert("vfae", m4 <= 2); ++ s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFAE); ++ return "vfae"; ++} ++ ++static const HChar * ++s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) ++{ ++ s390_insn_assert("vfee", m4 < 3 && m5 == (m5 & 3)); ++ s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFEE); + return "vfee"; + } + +@@ -17406,47 +17521,8 @@ s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5) + static const HChar * + s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) + { +- IRDirty* d; +- IRTemp cc = newTemp(Ity_I64); +- +- /* Check for specification exception */ +- vassert(m5 < 3); +- +- s390x_vec_op_details_t details = { .serialized = 0ULL }; +- details.op = S390_VEC_OP_VSTRC; +- details.v1 = v1; +- details.v2 = v2; +- details.v3 = v3; +- details.v4 = v4; +- details.m4 = m5; +- details.m5 = m6; +- +- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", +- &s390x_dirtyhelper_vec_op, +- mkIRExprVec_2(IRExpr_GSPTR(), +- mkU64(details.serialized))); +- +- d->nFxState = 4; +- vex_bzero(&d->fxState, sizeof(d->fxState)); +- d->fxState[0].fx = Ifx_Read; +- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); +- d->fxState[0].size = sizeof(V128); +- d->fxState[1].fx = Ifx_Read; +- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); +- d->fxState[1].size = sizeof(V128); +- d->fxState[2].fx = Ifx_Read; +- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128); +- d->fxState[2].size = sizeof(V128); +- d->fxState[3].fx = Ifx_Write; +- d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); +- d->fxState[3].size = sizeof(V128); +- +- stmt(IRStmt_Dirty(d)); +- +- if (s390_vr_is_cs_set(m6)) { +- s390_cc_set(cc); +- } +- ++ s390_insn_assert("vstrc", m5 <= 2); ++ s390_irgen_VStrX(v1, v2, v3, v4, m5, m6, s390_VStrX_VSTRC); + return "vstrc"; + } + + +commit 4f17a067c4f8245c05611d6e8aa36e8841bab376 +Author: Andreas Arnez +Date: Tue Mar 2 14:12:29 2021 +0100 + + Bug 434296 - s390x: Rework IR conversion of VFENE + + So far the z/Architecture instruction "vector find element not + equal" (VFENE) is transformed to a loop. This can cause spurious + "conditional jump or move depends on uninitialised value(s)" messages by + memcheck. Re-implement without a loop. + +diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h +index 49b6cd5dd..caec3108e 100644 +--- a/VEX/priv/guest_s390_defs.h ++++ b/VEX/priv/guest_s390_defs.h +@@ -265,7 +265,6 @@ typedef enum { + S390_VEC_OP_INVALID = 0, + S390_VEC_OP_VPKS, + S390_VEC_OP_VPKLS, +- S390_VEC_OP_VFENE, + S390_VEC_OP_VISTR, + S390_VEC_OP_VCEQ, + S390_VEC_OP_VTM, +diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c +index 63d2e8ce5..2188ce5c1 100644 +--- a/VEX/priv/guest_s390_helpers.c ++++ b/VEX/priv/guest_s390_helpers.c +@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + {0x00, 0x00}, /* invalid */ + [S390_VEC_OP_VPKS] = {0xe7, 0x97}, + [S390_VEC_OP_VPKLS] = {0xe7, 0x95}, +- [S390_VEC_OP_VFENE] = {0xe7, 0x81}, + [S390_VEC_OP_VISTR] = {0xe7, 0x5c}, + [S390_VEC_OP_VCEQ] = {0xe7, 0xf8}, + [S390_VEC_OP_VTM] = {0xe7, 0xd8}, +@@ -2627,7 +2626,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + + case S390_VEC_OP_VPKS: + case S390_VEC_OP_VPKLS: +- case S390_VEC_OP_VFENE: + case S390_VEC_OP_VCEQ: + case S390_VEC_OP_VGFM: + case S390_VEC_OP_VCH: +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 26a947813..c8dc3ec18 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -17361,120 +17361,86 @@ s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) + static const HChar * + s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) + { +- const Bool negateComparison = True; +- const IRType type = s390_vr_get_type(m4); ++ s390_insn_assert("vfene", m4 < 3 && m5 == (m5 & 3)); + +- /* Check for specification exception */ +- vassert(m4 < 3); +- vassert((m5 & 0b1100) == 0); +- +- static const IROp elementGetters[] = { +- Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4 ++ static const IROp compare_op[3] = { ++ Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4 + }; +- IROp getter = elementGetters[m4]; +- +- static const IROp elementComparators[] = { +- Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32 ++ static const IROp abs_op[3] = { ++ Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4 + }; +- IROp comparator = elementComparators[m4]; +- +- static const IROp resultConverter[] = {Iop_64to8, Iop_64to16, Iop_64to32}; +- IROp converter = resultConverter[m4]; +- +- IRTemp isZeroElem; +- +- IRTemp counter = newTemp(Ity_I64); +- assign(counter, get_counter_dw0()); +- +- IRTemp arg1 = newTemp(type); +- assign(arg1, binop(getter, get_vr_qw(v2), unop(Iop_64to8, mkexpr(counter)))); +- IRTemp arg2 = newTemp(type); +- assign(arg2, binop(getter, get_vr_qw(v3), unop(Iop_64to8, mkexpr(counter)))); ++ IRTemp op2 = newTemp(Ity_V128); ++ IRTemp op3 = newTemp(Ity_V128); ++ IRTemp op2zero = newTemp(Ity_V128); ++ IRTemp diff = newTemp(Ity_V128); ++ IRTemp diff0 = newTemp(Ity_I64); ++ IRTemp neq0 = newTemp(Ity_I1); ++ IRTemp samebits = newTemp(Ity_I64); ++ IRExpr* tmp; + +- IRTemp isGoodPair = newTemp(Ity_I1); +- if(negateComparison) { +- assign(isGoodPair, unop(Iop_Not1, binop(comparator, mkexpr(arg1), +- mkexpr(arg2)))); +- } else { +- assign(isGoodPair, binop(comparator, mkexpr(arg1), mkexpr(arg2))); +- } ++ assign(op2, get_vr_qw(v2)); ++ assign(op3, get_vr_qw(v3)); + +- if(s390_vr_is_zs_set(m5)) { +- isZeroElem = newTemp(Ity_I1); +- assign(isZeroElem, binop(comparator, mkexpr(arg1), +- unop(converter, mkU64(0)))); ++ tmp = mkV128(0); ++ if (s390_vr_is_zs_set(m5)) { ++ tmp = binop(compare_op[m4], mkexpr(op2), tmp); ++ if (s390_vr_is_cs_set(m5) && v3 != v2) { ++ /* Count leading equal bits in the terminating element too */ ++ tmp = unop(abs_op[m4], tmp); ++ } ++ assign(op2zero, tmp); ++ tmp = mkexpr(op2zero); + } +- +- static const UChar invalidIndices[] = {16, 8, 4}; +- const UChar invalidIndex = invalidIndices[m4]; +- IRTemp endOfVectorIsReached = newTemp(Ity_I1); +- assign(endOfVectorIsReached, binop(Iop_CmpEQ64, mkexpr(counter), +- mkU64(invalidIndex))); +- +- put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); +- IRExpr* shouldBreak = binop(Iop_Or32, +- unop(Iop_1Uto32, mkexpr(isGoodPair)), +- unop(Iop_1Uto32, mkexpr(endOfVectorIsReached)) +- ); +- if(s390_vr_is_zs_set(m5)) { +- shouldBreak = binop(Iop_Or32, +- shouldBreak, +- unop(Iop_1Uto32, mkexpr(isZeroElem))); +- } +- iterate_if(binop(Iop_CmpEQ32, shouldBreak, mkU32(0))); +- +- IRExpr* foundIndex = binop(Iop_Sub64, get_counter_dw0(), mkU64(1)); +- if(m4 > 0) { +- /* We should return index of byte but we found index of element in +- general case. +- if byte elem (m4 == 0) then indexOfByte = indexOfElement +- if halfword elem (m4 == 1) then indexOfByte = 2 * indexOfElement +- = indexOfElement << 1 +- if word elem (m4 == 2) then indexOfByte = 4 * indexOfElement +- = indexOfElement << 2 +- */ +- foundIndex = binop(Iop_Shl64, foundIndex, mkU8(m4)); ++ if (v3 != v2) { ++ tmp = binop(Iop_XorV128, mkexpr(op2), mkexpr(op3)); ++ if (s390_vr_is_zs_set(m5)) ++ tmp = binop(Iop_OrV128, tmp, mkexpr(op2zero)); + } + +- IRTemp result = newTemp(Ity_I64); +- assign(result, mkite(mkexpr(endOfVectorIsReached), +- mkU64(16), +- foundIndex)); +- put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0))); ++ assign(diff, tmp); ++ assign(diff0, unop(Iop_V128HIto64, mkexpr(diff))); ++ assign(neq0, binop(Iop_ExpCmpNE64, mkexpr(diff0), mkU64(0))); ++ assign(samebits, unop(Iop_ClzNat64, ++ mkite(mkexpr(neq0), mkexpr(diff0), ++ unop(Iop_V128to64, mkexpr(diff))))); + ++ /* Determine the byte size of the initial equal-elements sequence */ ++ tmp = binop(Iop_Shr64, mkexpr(samebits), mkU8(m4 + 3)); ++ if (m4 != 0) ++ tmp = binop(Iop_Shl64, tmp, mkU8(m4)); ++ tmp = binop(Iop_Add64, tmp, mkite(mkexpr(neq0), mkU64(0), mkU64(8))); ++ put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0))); + + if (s390_vr_is_cs_set(m5)) { +- static const IROp to64Converters[] = {Iop_8Uto64, Iop_16Uto64, Iop_32Uto64}; +- IROp to64Converter = to64Converters[m4]; +- +- IRExpr* arg1IsLessThanArg2 = binop(Iop_CmpLT64U, +- unop(to64Converter, mkexpr(arg1)), +- unop(to64Converter, mkexpr(arg2))); +- +- IRExpr* ccexp = mkite(binop(Iop_CmpEQ32, +- unop(Iop_1Uto32, mkexpr(isGoodPair)), +- mkU32(1)), +- mkite(arg1IsLessThanArg2, mkU64(1), mkU64(2)), +- mkU64(3)); +- +- if(s390_vr_is_zs_set(m5)) { +- IRExpr* arg2IsZero = binop(comparator, mkexpr(arg2), +- unop(converter, mkU64(0))); +- IRExpr* bothArgsAreZero = binop(Iop_And32, +- unop(Iop_1Uto32, mkexpr(isZeroElem)), +- unop(Iop_1Uto32, arg2IsZero)); +- ccexp = mkite(binop(Iop_CmpEQ32, bothArgsAreZero, mkU32(1)), +- mkU64(0), +- ccexp); +- } ++ /* Set condition code like follows -- ++ 0: operands equal up to and including zero element ++ 1: op2 < op3 2: op2 > op3 3: op2 = op3 */ + IRTemp cc = newTemp(Ity_I64); +- assign(cc, ccexp); +- ++ if (v3 == v2) { ++ tmp = mkU64(0); ++ } else { ++ IRTemp shift = newTemp(Ity_I8); ++ IRExpr* op2half = mkite(mkexpr(neq0), ++ unop(Iop_V128HIto64, mkexpr(op2)), ++ unop(Iop_V128to64, mkexpr(op2))); ++ IRExpr* op3half = mkite(mkexpr(neq0), ++ unop(Iop_V128HIto64, mkexpr(op3)), ++ unop(Iop_V128to64, mkexpr(op3))); ++ assign(shift, unop(Iop_64to8, ++ binop(Iop_Sub64, mkU64(63), mkexpr(samebits)))); ++ tmp = binop(Iop_Or64, ++ binop(Iop_Shl64, ++ binop(Iop_And64, mkU64(1), ++ binop(Iop_Shr64, op2half, mkexpr(shift))), ++ mkU8(1)), ++ binop(Iop_And64, mkU64(1), ++ binop(Iop_Shr64, op3half, mkexpr(shift)))); ++ } ++ assign(cc, mkite(binop(Iop_CmpEQ64, mkexpr(samebits), mkU64(64)), ++ mkU64(3), tmp)); + s390_cc_set(cc); + } +- +- +- put_counter_dw0(mkU64(0)); ++ dis_res->hint = Dis_HintVerbose; + return "vfene"; + } + + +commit 9bd78ebd8bb5cd4ebb3f081ceba46836cc485551 +Author: Andreas Arnez +Date: Tue Apr 27 20:13:26 2021 +0200 + + Bug 434296 - s390x: Rework IR conversion of VISTR + + The z/Architecture instruction VISTR is currently transformed to a dirty + helper that executes the instruction. This can cause false positives with + memcheck if the input string contains undefined characters after the + string terminator. Implement without a dirty helper and emulate the + instruction instead. + +diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h +index caec3108e..24f3798c1 100644 +--- a/VEX/priv/guest_s390_defs.h ++++ b/VEX/priv/guest_s390_defs.h +@@ -265,7 +265,6 @@ typedef enum { + S390_VEC_OP_INVALID = 0, + S390_VEC_OP_VPKS, + S390_VEC_OP_VPKLS, +- S390_VEC_OP_VISTR, + S390_VEC_OP_VCEQ, + S390_VEC_OP_VTM, + S390_VEC_OP_VGFM, +diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c +index 2188ce5c1..1e04f601a 100644 +--- a/VEX/priv/guest_s390_helpers.c ++++ b/VEX/priv/guest_s390_helpers.c +@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + {0x00, 0x00}, /* invalid */ + [S390_VEC_OP_VPKS] = {0xe7, 0x97}, + [S390_VEC_OP_VPKLS] = {0xe7, 0x95}, +- [S390_VEC_OP_VISTR] = {0xe7, 0x5c}, + [S390_VEC_OP_VCEQ] = {0xe7, 0xf8}, + [S390_VEC_OP_VTM] = {0xe7, 0xd8}, + [S390_VEC_OP_VGFM] = {0xe7, 0xb4}, +@@ -2610,14 +2609,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + the_insn.VRR.op2 = opcodes[d->op][1]; + + switch(d->op) { +- case S390_VEC_OP_VISTR: +- the_insn.VRR.v1 = 1; +- the_insn.VRR.v2 = 2; +- the_insn.VRR.rxb = 0b1100; +- the_insn.VRR.m4 = d->m4; +- the_insn.VRR.m5 = d->m5; +- break; +- + case S390_VEC_OP_VTM: + the_insn.VRR.v1 = 2; + the_insn.VRR.v2 = 3; +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index c8dc3ec18..dfea54259 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -17447,40 +17447,34 @@ s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) + static const HChar * + s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5) + { +- IRDirty* d; +- IRTemp cc = newTemp(Ity_I64); +- +- /* Check for specification exception */ +- vassert(m3 < 3); +- vassert((m5 & 0b1110) == 0); ++ s390_insn_assert("vistr", m3 < 3 && m5 == (m5 & 1)); + +- s390x_vec_op_details_t details = { .serialized = 0ULL }; +- details.op = S390_VEC_OP_VISTR; +- details.v1 = v1; +- details.v2 = v2; +- details.m4 = m3; +- details.m5 = m5; +- +- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", +- &s390x_dirtyhelper_vec_op, +- mkIRExprVec_2(IRExpr_GSPTR(), +- mkU64(details.serialized))); ++ static const IROp compare_op[3] = { ++ Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4 ++ }; ++ IRExpr* t; ++ IRTemp op2 = newTemp(Ity_V128); ++ IRTemp op2term = newTemp(Ity_V128); ++ IRTemp mask = newTemp(Ity_V128); + +- d->nFxState = 2; +- vex_bzero(&d->fxState, sizeof(d->fxState)); +- d->fxState[0].fx = Ifx_Read; +- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); +- d->fxState[0].size = sizeof(V128); +- d->fxState[1].fx = Ifx_Write; +- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); +- d->fxState[1].size = sizeof(V128); ++ assign(op2, get_vr_qw(v2)); ++ assign(op2term, binop(compare_op[m3], mkexpr(op2), mkV128(0))); ++ t = mkexpr(op2term); + +- stmt(IRStmt_Dirty(d)); ++ for (UChar i = m3; i < 4; i++) { ++ IRTemp s = newTemp(Ity_V128); ++ assign(s, binop(Iop_OrV128, t, binop(Iop_ShrV128, t, mkU8(8 << i)))); ++ t = mkexpr(s); ++ } ++ assign(mask, unop(Iop_NotV128, t)); ++ put_vr_qw(v1, binop(Iop_AndV128, mkexpr(op2), mkexpr(mask))); + + if (s390_vr_is_cs_set(m5)) { ++ IRTemp cc = newTemp(Ity_I64); ++ assign(cc, binop(Iop_And64, mkU64(3), unop(Iop_V128to64, mkexpr(mask)))); + s390_cc_set(cc); + } +- ++ dis_res->hint = Dis_HintVerbose; + return "vistr"; + } + + +commit 32312d588b77c5b5b5a0145bb0cc6f795b447790 +Author: Andreas Arnez +Date: Fri Apr 16 12:44:44 2021 +0200 + + Bug 434296 - s390x: Add memcheck test cases for vector string insns + + Bug 434296 addresses memcheck false positives with the vector string + instructions VISTR, VSTRC, VFAE, VFEE, and VFENE. Add test cases that + verify the fix for that bug. Without the fix, memcheck yields many + complains with these tests, most of which are false positives. + +diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am +index e4e69eb38..d183841ef 100644 +--- a/memcheck/tests/s390x/Makefile.am ++++ b/memcheck/tests/s390x/Makefile.am +@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am + + dist_noinst_SCRIPTS = filter_stderr + +-INSN_TESTS = cdsg cu21 cu42 ltgjhe ++INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr + + check_PROGRAMS = $(INSN_TESTS) + +@@ -14,3 +14,7 @@ EXTRA_DIST = \ + AM_CFLAGS += @FLAG_M64@ + AM_CXXFLAGS += @FLAG_M64@ + AM_CCASFLAGS += @FLAG_M64@ ++ ++vstrc_CFLAGS = $(AM_CFLAGS) -march=z13 ++vfae_CFLAGS = $(AM_CFLAGS) -march=z13 ++vistr_CFLAGS = $(AM_CFLAGS) -march=z13 +diff --git a/memcheck/tests/s390x/vfae.c b/memcheck/tests/s390x/vfae.c +new file mode 100644 +index 000000000..68781e7fb +--- /dev/null ++++ b/memcheck/tests/s390x/vfae.c +@@ -0,0 +1,72 @@ ++#include ++#include ++ ++#define VECTOR __attribute__ ((vector_size (16))) ++ ++typedef char VECTOR char_v; ++ ++volatile char tmp; ++static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV"; ++ ++static char_v to_char_vec(const char *str) ++{ ++ char_v v; ++ char buf[17]; ++ int len = strlen(str); ++ ++ memcpy(buf, str, (len && str[len - 1] == '~') ? len - 1 : len + 1); ++ v = *(char_v *) buf; ++ return v; ++} ++ ++#define GENERATE_TEST(mnem) \ ++static void test_ ## mnem ## _char(const char *str, const char *match, \ ++ int expect_res, int expect_cc) \ ++{ \ ++ int cc; \ ++ char_v v1; \ ++ char_v v2 = to_char_vec(str); \ ++ char_v v3 = to_char_vec(match); \ ++ \ ++ __asm__( \ ++ "cr 0,0\n\t" /* Clear CC */ \ ++ #mnem " %[v1],%[v2],%[v3],0,3\n\t" \ ++ "ipm %[cc]\n\t" \ ++ "srl %[cc],28" \ ++ : [v1] "=v" (v1), \ ++ [cc] "=d" (cc) \ ++ : [v2] "v" (v2), \ ++ [v3] "v" (v3) \ ++ : "cc"); \ ++ \ ++ tmp = hex_digit[v1[7] & 0x1f]; \ ++ if (expect_res >= 0 && v1[7] != expect_res) \ ++ printf("result %u != %d\n", v1[7], expect_res); \ ++ \ ++ tmp = hex_digit[cc & 0xf]; \ ++ if (expect_cc >= 0 && cc != expect_cc) \ ++ printf("CC %d != %d\n", cc, expect_cc); \ ++} ++ ++GENERATE_TEST(vfae) ++ ++GENERATE_TEST(vfee) ++ ++GENERATE_TEST(vfene) ++ ++int main() ++{ ++ test_vfae_char("not found", "................", 9, 0); ++ test_vfae_char("xy", "zzzzzzzzyyyyyyyy", 1, 2); ++ test_vfae_char("incomplete~", "xxxxxxxxxxxxxxxx", -1, -1); ++ ++ test_vfee_char("same char here", "..........here", 10, 2); ++ test_vfee_char("and here too ...", "_________t~", 9, 1); ++ test_vfee_char("equality!~", "========!!~", 8, -1); ++ ++ test_vfene_char("strings equal", "strings equal", 13, 0); ++ test_vfene_char(hex_digit, hex_digit, 16, 3); ++ test_vfene_char("undef~", "undefined", -1, -1); ++ test_vfene_char("active~", "actually ok", 3, 1); ++ return 0; ++} +diff --git a/memcheck/tests/s390x/vfae.stderr.exp b/memcheck/tests/s390x/vfae.stderr.exp +new file mode 100644 +index 000000000..8aad3c87f +--- /dev/null ++++ b/memcheck/tests/s390x/vfae.stderr.exp +@@ -0,0 +1,20 @@ ++Use of uninitialised value of size 8 ++ at 0x........: test_vfae_char (vfae.c:51) ++ by 0x........: main (vfae.c:61) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vfae_char (vfae.c:51) ++ by 0x........: main (vfae.c:61) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vfee_char (vfae.c:53) ++ by 0x........: main (vfae.c:65) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vfene_char (vfae.c:55) ++ by 0x........: main (vfae.c:69) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vfene_char (vfae.c:55) ++ by 0x........: main (vfae.c:69) ++ +diff --git a/memcheck/tests/s390x/vfae.stdout.exp b/memcheck/tests/s390x/vfae.stdout.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/memcheck/tests/s390x/vfae.vgtest b/memcheck/tests/s390x/vfae.vgtest +new file mode 100644 +index 000000000..ae36c22fe +--- /dev/null ++++ b/memcheck/tests/s390x/vfae.vgtest +@@ -0,0 +1,2 @@ ++prog: vfae ++vgopts: -q +diff --git a/memcheck/tests/s390x/vistr.c b/memcheck/tests/s390x/vistr.c +new file mode 100644 +index 000000000..7ed59b94b +--- /dev/null ++++ b/memcheck/tests/s390x/vistr.c +@@ -0,0 +1,76 @@ ++#include ++#include ++ ++#define VECTOR __attribute__ ((vector_size (16))) ++ ++typedef char VECTOR char_v; ++ ++volatile char tmp; ++static const char *hex_digit = "0123456789abcdef"; ++ ++static char_v to_char_vec(const char *str, char_v *maskp) ++{ ++ char buf[17]; ++ char_v v; ++ char_v mask = {0}; ++ ++ for (int i = 0; i < sizeof(buf); i++) { ++ char ch = str[i]; ++ if (ch == '\0') ++ break; ++ else if (ch == '$') { ++ buf[i] = '\0'; ++ mask[i] = -1; ++ } else if (ch != '~') { ++ buf[i] = ch; ++ mask[i] = -1; ++ } ++ } ++ v = *(char_v *) buf; ++ *maskp = mask; ++ return v; ++} ++ ++static void test_vistr_char(const char *str, const char *expect_res, ++ int expect_cc) ++{ ++ int cc, count; ++ char_v v1, mask; ++ char_v v2 = to_char_vec(str, &mask); ++ char_v exp_v1 = to_char_vec(expect_res, &mask); ++ char equal[16]; ++ ++ __asm__( ++ "cr 0,0\n\t" /* Clear CC */ ++ "vistr %[v1],%[v2],0,1\n\t" ++ "ipm %[cc]\n\t" ++ "srl %[cc],28" ++ : [v1] "=v" (v1), ++ [cc] "=d" (cc) ++ : [v2] "v" (v2) ++ : "cc"); ++ ++ *(char_v *) equal = (v1 & mask) == (exp_v1 & mask); ++ if (memchr(equal, 0, sizeof(equal))) ++ printf("Result doesn't match `%s'\n", expect_res); ++ ++ count = 0; ++ for (int i = 0; i < 16; i++) { ++ if (v1[i] == 0) count++; ++ } ++ tmp = hex_digit[count]; ++ ++ tmp = hex_digit[cc & 0xf]; ++ if (expect_cc >= 0 && cc != expect_cc) ++ printf("CC %d != %d\n", cc, expect_cc); ++} ++ ++int main() ++{ ++ test_vistr_char("terminated$====~", "terminated$$$$$$", 0); ++ test_vistr_char("undef~~~~~~~~~~~", "undef", -1); ++ test_vistr_char("undef, 2nd half~", "undef, 2nd half", -1); ++ test_vistr_char("Not. Terminated.", "Not. Terminated.", 3); ++ test_vistr_char("partiallyOK~~$~~", "partiallyOK~~$$$", 0); ++ return 0; ++} +diff --git a/memcheck/tests/s390x/vistr.stderr.exp b/memcheck/tests/s390x/vistr.stderr.exp +new file mode 100644 +index 000000000..e4f35fd74 +--- /dev/null ++++ b/memcheck/tests/s390x/vistr.stderr.exp +@@ -0,0 +1,20 @@ ++Conditional jump or move depends on uninitialised value(s) ++ at 0x........: test_vistr_char (vistr.c:59) ++ by 0x........: main (vistr.c:71) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vistr_char (vistr.c:63) ++ by 0x........: main (vistr.c:71) ++ ++Conditional jump or move depends on uninitialised value(s) ++ at 0x........: test_vistr_char (vistr.c:59) ++ by 0x........: main (vistr.c:72) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vistr_char (vistr.c:63) ++ by 0x........: main (vistr.c:72) ++ ++Conditional jump or move depends on uninitialised value(s) ++ at 0x........: test_vistr_char (vistr.c:59) ++ by 0x........: main (vistr.c:74) ++ +diff --git a/memcheck/tests/s390x/vistr.vgtest b/memcheck/tests/s390x/vistr.vgtest +new file mode 100644 +index 000000000..f99749d85 +--- /dev/null ++++ b/memcheck/tests/s390x/vistr.vgtest +@@ -0,0 +1,2 @@ ++prog: vistr ++vgopts: -q +diff --git a/memcheck/tests/s390x/vstrc.c b/memcheck/tests/s390x/vstrc.c +new file mode 100644 +index 000000000..268e2f858 +--- /dev/null ++++ b/memcheck/tests/s390x/vstrc.c +@@ -0,0 +1,92 @@ ++#include ++#include ++ ++#define VECTOR __attribute__ ((vector_size (16))) ++ ++typedef char VECTOR char_v; ++ ++struct vstrc_char_rng { ++ unsigned char range[16]; ++ unsigned char flags[16]; ++}; ++ ++#define RNG_FLAG_EQ 0x80 ++#define RNG_FLAG_LT 0x40 ++#define RNG_FLAG_GT 0x20 ++#define RNG_FLAG_ANY 0xe0 ++#define RNG_FLAG_NONE 0x00 ++ ++volatile char tmp; ++static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV"; ++ ++static void test_vstrc_char(const char *str, const struct vstrc_char_rng *rng, ++ int expect_res, int expect_cc) ++{ ++ int cc; ++ char_v v1; ++ char_v v2 = *(const char_v *) str; ++ char_v v3 = *(const char_v *) rng->range; ++ char_v v4 = *(const char_v *) rng->flags; ++ ++ __asm__( ++ "cr 0,0\n\t" /* Clear CC */ ++ "vstrc %[v1],%[v2],%[v3],%[v4],0,3\n\t" ++ "ipm %[cc]\n\t" ++ "srl %[cc],28" ++ : [v1] "=v" (v1), ++ [cc] "=d" (cc) ++ : [v2] "v" (v2), ++ [v3] "v" (v3), ++ [v4] "v" (v4) ++ : "cc"); ++ ++ tmp = hex_digit[v1[7] & 0x1f]; ++ if (expect_res >= 0 && v1[7] != expect_res) ++ printf("result %u != %d\n", v1[7], expect_res); ++ ++ tmp = hex_digit[cc & 0xf]; ++ if (expect_cc >= 0 && cc != expect_cc) ++ printf("CC %d != %d\n", cc, expect_cc); ++} ++ ++int main() ++{ ++ struct vstrc_char_rng rng; ++ char buf[16]; ++ ++ memset(rng.flags, RNG_FLAG_NONE, 16); ++ ++ rng.range[4] = 'z'; ++ rng.flags[4] = RNG_FLAG_GT | RNG_FLAG_EQ; ++ rng.flags[5] = RNG_FLAG_ANY; ++ /* OK: match at the 'z' */ ++ test_vstrc_char("find the z", &rng, 9, 2); ++ ++ rng.flags[12] = RNG_FLAG_GT | RNG_FLAG_EQ; ++ rng.flags[13] = RNG_FLAG_LT | RNG_FLAG_EQ; ++ /* Bad: undefined range */ ++ test_vstrc_char("undefined", &rng, -1, -1); ++ ++ rng.range[12] = 'a'; ++ rng.range[13] = 'c'; ++ /* OK: match at the 'a' */ ++ test_vstrc_char("get the abc", &rng, 8, 2); ++ ++ rng.flags[12] = RNG_FLAG_LT; ++ rng.flags[13] = RNG_FLAG_GT; ++ /* OK: no match up to null terminator */ ++ test_vstrc_char("no match", &rng, 8, 0); ++ ++ /* OK: no match, no null terminator */ ++ test_vstrc_char("0123456789abcdef", &rng, 16, 3); ++ ++ buf[0] = 'x'; ++ /* Bad: undefined string */ ++ test_vstrc_char(buf, &rng, -1, -1); ++ ++ buf[1] = 'z'; ++ /* Bad: valid match, but CC undefined */ ++ test_vstrc_char(buf, &rng, 1, -1); ++ ++ return 0; ++} +diff --git a/memcheck/tests/s390x/vstrc.stderr.exp b/memcheck/tests/s390x/vstrc.stderr.exp +new file mode 100644 +index 000000000..c1125bea1 +--- /dev/null ++++ b/memcheck/tests/s390x/vstrc.stderr.exp +@@ -0,0 +1,20 @@ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrc_char (vstrc.c:43) ++ by 0x........: main (vstrc.c:68) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrc_char (vstrc.c:47) ++ by 0x........: main (vstrc.c:68) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrc_char (vstrc.c:43) ++ by 0x........: main (vstrc.c:85) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrc_char (vstrc.c:47) ++ by 0x........: main (vstrc.c:85) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrc_char (vstrc.c:47) ++ by 0x........: main (vstrc.c:89) ++ +diff --git a/memcheck/tests/s390x/vstrc.stdout.exp b/memcheck/tests/s390x/vstrc.stdout.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/memcheck/tests/s390x/vstrc.vgtest b/memcheck/tests/s390x/vstrc.vgtest +new file mode 100644 +index 000000000..26f5db99b +--- /dev/null ++++ b/memcheck/tests/s390x/vstrc.vgtest +@@ -0,0 +1,2 @@ ++prog: vstrc ++vgopts: -q + +commit a0bb049ace14ab52d386bb1d49a399f39eec4986 +Author: Andreas Arnez +Date: Tue Mar 23 14:55:09 2021 +0100 + + s390x: Improve handling of amodes without base register + + Addressing modes without a base or index register represent constants. + They can occur in some special cases such as shift operations and when + accessing individual vector elements. Perform some minor improvements to + the handling of such amodes. + +diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c +index 6e0734ae0..2587f81a1 100644 +--- a/VEX/priv/host_s390_defs.c ++++ b/VEX/priv/host_s390_defs.c +@@ -360,7 +360,8 @@ s390_amode_is_sane(const s390_amode *am) + { + switch (am->tag) { + case S390_AMODE_B12: +- return is_virtual_gpr(am->b) && fits_unsigned_12bit(am->d); ++ return (is_virtual_gpr(am->b) || sameHReg(am->b, s390_hreg_gpr(0))) && ++ fits_unsigned_12bit(am->d); + + case S390_AMODE_B20: + return is_virtual_gpr(am->b) && fits_signed_20bit(am->d); +@@ -378,47 +379,31 @@ s390_amode_is_sane(const s390_amode *am) + } + } + ++static Bool ++s390_amode_is_constant(const s390_amode *am) ++{ ++ return am->tag == S390_AMODE_B12 && sameHReg(am->b, s390_hreg_gpr(0)); ++} ++ + + /* Record the register use of an amode */ + static void + s390_amode_get_reg_usage(HRegUsage *u, const s390_amode *am) + { +- switch (am->tag) { +- case S390_AMODE_B12: +- case S390_AMODE_B20: +- addHRegUse(u, HRmRead, am->b); +- return; +- +- case S390_AMODE_BX12: +- case S390_AMODE_BX20: ++ if (!sameHReg(am->b, s390_hreg_gpr(0))) + addHRegUse(u, HRmRead, am->b); ++ if (!sameHReg(am->x, s390_hreg_gpr(0))) + addHRegUse(u, HRmRead, am->x); +- return; +- +- default: +- vpanic("s390_amode_get_reg_usage"); +- } + } + + + static void + s390_amode_map_regs(HRegRemap *m, s390_amode *am) + { +- switch (am->tag) { +- case S390_AMODE_B12: +- case S390_AMODE_B20: +- am->b = lookupHRegRemap(m, am->b); +- return; +- +- case S390_AMODE_BX12: +- case S390_AMODE_BX20: ++ if (!sameHReg(am->b, s390_hreg_gpr(0))) + am->b = lookupHRegRemap(m, am->b); ++ if (!sameHReg(am->x, s390_hreg_gpr(0))) + am->x = lookupHRegRemap(m, am->x); +- return; +- +- default: +- vpanic("s390_amode_map_regs"); +- } + } + + +@@ -653,6 +638,16 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off) + insn->variant.alu.dst, vreg_opnd); + } + ++ /* v-vgetelem , */ ++ if (insn->tag == S390_INSN_VEC_AMODEOP ++ && insn->variant.vec_amodeop.tag == S390_VEC_GET_ELEM ++ && insn->size == 8 ++ && sameHReg(insn->variant.vec_amodeop.op1, vreg) ++ && s390_amode_is_constant(insn->variant.vec_amodeop.op2)) { ++ vreg_am->d += 8 * insn->variant.vec_amodeop.op2->d; ++ return s390_insn_load(insn->size, insn->variant.vec_amodeop.dst, vreg_am); ++ } ++ + /* v- , */ + if (insn->tag == S390_INSN_UNOP + && insn->variant.unop.src.tag == S390_OPND_REG +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index 5f79280c0..ceca6836e 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -312,7 +312,18 @@ s390_isel_amode_wrk(ISelEnv *env, IRExpr *expr, + Bool no_index __attribute__((unused)), + Bool short_displacement) + { +- if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) { ++ if (expr->tag == Iex_Unop && expr->Iex.Unop.op == Iop_8Uto64 && ++ expr->Iex.Unop.arg->tag == Iex_Const) { ++ UChar value = expr->Iex.Unop.arg->Iex.Const.con->Ico.U8; ++ return s390_amode_b12((Int)value, s390_hreg_gpr(0)); ++ ++ } else if (expr->tag == Iex_Const) { ++ ULong value = expr->Iex.Const.con->Ico.U64; ++ if (ulong_fits_unsigned_12bit(value)) { ++ return s390_amode_b12((Int)value, s390_hreg_gpr(0)); ++ } ++ ++ } else if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) { + IRExpr *arg1 = expr->Iex.Binop.arg1; + IRExpr *arg2 = expr->Iex.Binop.arg2; + + +commit fd935e238d907d9c523a311ba795077d95ad6912 +Author: Andreas Arnez +Date: Fri Mar 26 19:27:47 2021 +0100 + + s390x: Rework insn "v-vdup" and add "v-vrep" + + So far the only s390x insn for filling a vector with copies of the same + element is "v-vdup" (S390_VEC_DUPLICATE), which replicates the first + element of its vector argument. This is fairly restrictive and can lead + to unnecessarily long code sequences. + + Redefine "v-vdup" to replicate any scalar value instead. And add + "v-vrep" (S390_INSN_VEC_REPLICATE) for replicating any given element of a + vector. Select the latter for suitable expressions like + + Iop_Dup8x16(Iop_GetElem8x16(vector_expr, i)) + + This improves the generated code for some vector string instructions, + where a lot of element replications are performed. + +diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c +index 2587f81a1..c764d6ef9 100644 +--- a/VEX/priv/host_s390_defs.c ++++ b/VEX/priv/host_s390_defs.c +@@ -670,6 +670,14 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off) + insn->variant.unop.dst, vreg_opnd); + } + ++ /* v-vrep ,, */ ++ if (insn->tag == S390_INSN_VEC_REPLICATE ++ && sameHReg(insn->variant.vec_replicate.op1, vreg)) { ++ vreg_am->d += insn->size * insn->variant.vec_replicate.idx; ++ return s390_insn_unop(insn->size, S390_VEC_DUPLICATE, ++ insn->variant.vec_replicate.dst, vreg_opnd); ++ } ++ + no_match: + return NULL; + } +@@ -1050,6 +1058,11 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn) + addHRegUse(u, HRmRead, insn->variant.vec_triop.op3); + break; + ++ case S390_INSN_VEC_REPLICATE: ++ addHRegUse(u, HRmWrite, insn->variant.vec_replicate.dst); ++ addHRegUse(u, HRmRead, insn->variant.vec_replicate.op1); ++ break; ++ + default: + vpanic("s390_insn_get_reg_usage"); + } +@@ -1433,6 +1446,14 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn) + insn->variant.vec_triop.op3 = + lookupHRegRemap(m, insn->variant.vec_triop.op3); + break; ++ ++ case S390_INSN_VEC_REPLICATE: ++ insn->variant.vec_replicate.dst = ++ lookupHRegRemap(m, insn->variant.vec_replicate.dst); ++ insn->variant.vec_replicate.op1 = ++ lookupHRegRemap(m, insn->variant.vec_replicate.op1); ++ break; ++ + default: + vpanic("s390_insn_map_regs"); + } +@@ -1767,7 +1788,39 @@ emit_VRI_VI(UChar *p, ULong op, UChar v1, UShort i2) + + + static UChar * +-emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2) ++emit_VRI_VIM(UChar *p, ULong op, UChar v1, UShort i2, UChar m3) ++{ ++ ULong the_insn = op; ++ ULong rxb = s390_update_rxb(0, 1, &v1); ++ ++ the_insn |= ((ULong)v1) << 36; ++ the_insn |= ((ULong)i2) << 16; ++ the_insn |= ((ULong)m3) << 12; ++ the_insn |= ((ULong)rxb)<< 8; ++ ++ return emit_6bytes(p, the_insn); ++} ++ ++ ++static UChar * ++emit_VRI_VVMM(UChar *p, ULong op, UChar v1, UChar v3, UShort i2, UChar m4) ++{ ++ ULong the_insn = op; ++ ULong rxb = s390_update_rxb(0, 1, &v1); ++ rxb = s390_update_rxb(rxb, 2, &v3); ++ ++ the_insn |= ((ULong)v1) << 36; ++ the_insn |= ((ULong)v3) << 32; ++ the_insn |= ((ULong)i2) << 16; ++ the_insn |= ((ULong)m4) << 12; ++ the_insn |= ((ULong)rxb) << 8; ++ ++ return emit_6bytes(p, the_insn); ++} ++ ++ ++static UChar * ++emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2, UChar m3) + { + ULong the_insn = op; + ULong rxb = s390_update_rxb(0, 1, &v1); +@@ -1776,6 +1829,7 @@ emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2) + the_insn |= ((ULong)x2) << 32; + the_insn |= ((ULong)b2) << 28; + the_insn |= ((ULong)d2) << 16; ++ the_insn |= ((ULong)m3) << 12; + the_insn |= ((ULong)rxb)<< 8; + + return emit_6bytes(p, the_insn); +@@ -5782,7 +5836,7 @@ s390_emit_VL(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2) + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC3(MNM, VR, UDXB), "vl", v1, d2, x2, b2); + +- return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2); ++ return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2, 0); + } + + static UChar * +@@ -5795,13 +5849,23 @@ s390_emit_VLR(UChar *p, UChar v1, UChar v2) + } + + ++static UChar * ++s390_emit_VLREP(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2, UShort m3) ++{ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) ++ s390_disasm(ENC4(MNM, VR, UDXB, UINT), "vlrep", v1, d2, x2, b2, m3); ++ ++ return emit_VRX(p, 0xE70000000005ULL, v1, x2, b2, d2, m3); ++} ++ ++ + static UChar * + s390_emit_VST(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2) + { + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC3(MNM, VR, UDXB), "vst", v1, d2, x2, b2); + +- return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2); ++ return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2, 0); + } + + +@@ -5912,15 +5976,24 @@ s390_emit_VPKLS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) + + + static UChar * +-s390_emit_VREP(UChar *p, UChar v1, UChar v3, UChar m3) ++s390_emit_VREP(UChar *p, UChar v1, UChar v3, UShort i2, UChar m4) + { + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) +- s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, 0, m3); ++ s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, i2, m4); + +- return emit_VRR_VVM(p, 0xE7000000004DULL, v1, v3, m3); ++ return emit_VRI_VVMM(p, 0xE7000000004DULL, v1, v3, i2, m4); + } + + ++static UChar * ++s390_emit_VREPI(UChar *p, UChar v1, UShort i2, UChar m3) ++{ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) ++ s390_disasm(ENC4(MNM, VR, UINT, UINT), "vrepi", v1, i2, m3); ++ ++ return emit_VRI_VIM(p, 0xE70000000045ULL, v1, i2, m3); ++} ++ + + static UChar * + s390_emit_VUPH(UChar *p, UChar v1, UChar v3, UChar m3) +@@ -7560,6 +7633,20 @@ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t tag, HReg dst, + return insn; + } + ++s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1, ++ UChar idx) ++{ ++ s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn)); ++ ++ insn->tag = S390_INSN_VEC_REPLICATE; ++ insn->size = size; ++ insn->variant.vec_replicate.dst = dst; ++ insn->variant.vec_replicate.op1 = op1; ++ insn->variant.vec_replicate.idx = idx; ++ ++ return insn; ++} ++ + /*---------------------------------------------------------------*/ + /*--- Debug print ---*/ + /*---------------------------------------------------------------*/ +@@ -8284,6 +8371,13 @@ s390_insn_as_string(const s390_insn *insn) + insn->variant.vec_triop.op3); + break; + ++ case S390_INSN_VEC_REPLICATE: ++ s390_sprintf(buf, "%M %R, %R, %I", "v-vrep", ++ insn->variant.vec_replicate.dst, ++ insn->variant.vec_replicate.op1, ++ insn->variant.vec_replicate.idx); ++ break; ++ + default: goto fail; + } + +@@ -9386,6 +9480,56 @@ s390_negate_emit(UChar *buf, const s390_insn *insn) + } + + ++static UChar * ++s390_vec_duplicate_emit(UChar *buf, const s390_insn *insn) ++{ ++ UChar v1 = hregNumber(insn->variant.unop.dst); ++ s390_opnd_RMI opnd = insn->variant.unop.src; ++ UChar r2; ++ ++ switch (opnd.tag) { ++ case S390_OPND_AMODE: { ++ s390_amode* am = opnd.variant.am; ++ UInt b = hregNumber(am->b); ++ UInt x = hregNumber(am->x); ++ UInt d = am->d; ++ ++ if (fits_unsigned_12bit(d)) { ++ return s390_emit_VLREP(buf, v1, x, b, d, ++ s390_getM_from_size(insn->size)); ++ } ++ buf = s390_emit_load_mem(buf, insn->size, R0, am); ++ r2 = R0; ++ goto duplicate_from_gpr; ++ } ++ ++ case S390_OPND_IMMEDIATE: { ++ ULong val = opnd.variant.imm; ++ ++ if (ulong_fits_signed_16bit(val)) { ++ return s390_emit_VREPI(buf, v1, val, s390_getM_from_size(insn->size)); ++ } ++ buf = s390_emit_load_64imm(buf, R0, val); ++ r2 = R0; ++ goto duplicate_from_gpr; ++ } ++ ++ case S390_OPND_REG: ++ r2 = hregNumber(opnd.variant.reg); ++ ++ duplicate_from_gpr: ++ buf = s390_emit_VLVGP(buf, v1, r2, r2); ++ if (insn->size != 8) { ++ buf = s390_emit_VREP(buf, v1, v1, 8 / insn->size - 1, ++ s390_getM_from_size(insn->size)); ++ } ++ return buf; ++ } ++ ++ vpanic("s390_vec_duplicate_emit"); ++} ++ ++ + static UChar * + s390_insn_unop_emit(UChar *buf, const s390_insn *insn) + { +@@ -9405,12 +9549,7 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn) + UShort i2 = insn->variant.unop.src.variant.imm; + return s390_emit_VGBM(buf, v1, i2); + } +- case S390_VEC_DUPLICATE: { +- vassert(insn->variant.unop.src.tag == S390_OPND_REG); +- UChar v1 = hregNumber(insn->variant.unop.dst); +- UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); +- return s390_emit_VREP(buf, v1, v2, s390_getM_from_size(insn->size)); +- } ++ case S390_VEC_DUPLICATE: return s390_vec_duplicate_emit(buf, insn); + case S390_VEC_UNPACKLOWS: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + vassert(insn->size < 8); +@@ -11595,6 +11734,16 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn) + } + + ++static UChar * ++s390_insn_vec_replicate_emit(UChar *buf, const s390_insn *insn) ++{ ++ UChar v1 = hregNumber(insn->variant.vec_replicate.dst); ++ UChar v2 = hregNumber(insn->variant.vec_replicate.op1); ++ UShort idx = (UShort) insn->variant.vec_replicate.idx; ++ return s390_emit_VREP(buf, v1, v2, idx, s390_getM_from_size(insn->size)); ++} ++ ++ + Int + emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn, + Bool mode64, VexEndness endness_host, +@@ -11791,6 +11940,11 @@ emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn, + case S390_INSN_VEC_TRIOP: + end = s390_insn_vec_triop_emit(buf, insn); + break; ++ ++ case S390_INSN_VEC_REPLICATE: ++ end = s390_insn_vec_replicate_emit(buf, insn); ++ break; ++ + fail: + default: + vpanic("emit_S390Instr"); +diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h +index 9b69f4d38..063fd3800 100644 +--- a/VEX/priv/host_s390_defs.h ++++ b/VEX/priv/host_s390_defs.h +@@ -166,7 +166,8 @@ typedef enum { + S390_INSN_VEC_AMODEINTOP, + S390_INSN_VEC_UNOP, + S390_INSN_VEC_BINOP, +- S390_INSN_VEC_TRIOP ++ S390_INSN_VEC_TRIOP, ++ S390_INSN_VEC_REPLICATE + } s390_insn_tag; + + +@@ -738,6 +739,11 @@ typedef struct { + HReg op2; /* 128-bit second operand */ + HReg op3; /* 128-bit third operand */ + } vec_triop; ++ struct { ++ HReg dst; /* 128-bit result */ ++ HReg op1; /* 128-bit first operand */ ++ UChar idx; /* index of element to replicate */ ++ } vec_replicate; + } variant; + } s390_insn; + +@@ -853,6 +859,7 @@ s390_insn *s390_insn_vec_binop(UChar size, s390_vec_binop_t, HReg dst, HReg op1, + HReg op2); + s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t, HReg dst, HReg op1, + HReg op2, HReg op3); ++s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1, UChar idx); + + const HChar *s390_insn_as_string(const s390_insn *); + +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index ceca6836e..968122596 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -3778,12 +3778,12 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + } + /* --------- UNARY OP --------- */ + case Iex_Unop: { +- UChar size_for_int_arg = 0; + HReg dst = INVALID_HREG; + HReg reg1 = INVALID_HREG; + s390_unop_t vec_unop = S390_UNOP_T_INVALID; + s390_vec_binop_t vec_binop = S390_VEC_BINOP_T_INVALID; + IROp op = expr->Iex.Unop.op; ++ IROp arg_op = Iop_INVALID; + IRExpr* arg = expr->Iex.Unop.arg; + switch(op) { + case Iop_NotV128: +@@ -3839,59 +3839,63 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + } + + case Iop_Dup8x16: +- size = size_for_int_arg = 1; +- vec_unop = S390_VEC_DUPLICATE; +- goto Iop_V_int_wrk; ++ size = 1; ++ arg_op = Iop_GetElem8x16; ++ goto Iop_V_dup_wrk; + case Iop_Dup16x8: +- size = size_for_int_arg = 2; +- vec_unop = S390_VEC_DUPLICATE; +- goto Iop_V_int_wrk; ++ size = 2; ++ arg_op = Iop_GetElem16x8; ++ goto Iop_V_dup_wrk; + case Iop_Dup32x4: +- size = size_for_int_arg = 4; +- vec_unop = S390_VEC_DUPLICATE; +- goto Iop_V_int_wrk; ++ size = 4; ++ arg_op = Iop_GetElem32x4; ++ goto Iop_V_dup_wrk; ++ ++ Iop_V_dup_wrk: { ++ dst = newVRegV(env); ++ if (arg->tag == Iex_Binop && arg->Iex.Binop.op == arg_op && ++ arg->Iex.Binop.arg2->tag == Iex_Const) { ++ ULong idx; ++ idx = get_const_value_as_ulong(arg->Iex.Binop.arg2-> Iex.Const.con); ++ reg1 = s390_isel_vec_expr(env, arg->Iex.Binop.arg1); ++ addInstr(env, s390_insn_vec_replicate(size, dst, reg1, (UChar)idx)); ++ } else { ++ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg); ++ addInstr(env, s390_insn_unop(size, S390_VEC_DUPLICATE, dst, src)); ++ } ++ return dst; ++ } + + case Iop_Widen8Sto16x8: + size = 1; +- size_for_int_arg = 8; + vec_unop = S390_VEC_UNPACKLOWS; +- goto Iop_V_int_wrk; ++ goto Iop_V_widen_wrk; + case Iop_Widen16Sto32x4: + size = 2; +- size_for_int_arg = 8; + vec_unop = S390_VEC_UNPACKLOWS; +- goto Iop_V_int_wrk; ++ goto Iop_V_widen_wrk; + case Iop_Widen32Sto64x2: + size = 4; +- size_for_int_arg = 8; + vec_unop = S390_VEC_UNPACKLOWS; +- goto Iop_V_int_wrk; ++ goto Iop_V_widen_wrk; + case Iop_Widen8Uto16x8: + size = 1; +- size_for_int_arg = 8; + vec_unop = S390_VEC_UNPACKLOWU; +- goto Iop_V_int_wrk; ++ goto Iop_V_widen_wrk; + case Iop_Widen16Uto32x4: + size = 2; +- size_for_int_arg = 8; + vec_unop = S390_VEC_UNPACKLOWU; +- goto Iop_V_int_wrk; ++ goto Iop_V_widen_wrk; + case Iop_Widen32Uto64x2: + size = 4; +- size_for_int_arg = 8; + vec_unop = S390_VEC_UNPACKLOWU; +- goto Iop_V_int_wrk; +- +- Iop_V_int_wrk: { +- HReg vr1 = vec_generate_zeroes(env); +- s390_amode* amode2 = s390_isel_amode(env, IRExpr_Const(IRConst_U64(0))); +- reg1 = s390_isel_int_expr(env, arg); ++ goto Iop_V_widen_wrk; + ++ Iop_V_widen_wrk: { + vassert(vec_unop != S390_UNOP_T_INVALID); +- addInstr(env, +- s390_insn_vec_amodeintop(size_for_int_arg, S390_VEC_SET_ELEM, +- vr1, amode2, reg1)); +- ++ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg); ++ HReg vr1 = newVRegV(env); ++ addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, vr1, src)); + dst = newVRegV(env); + addInstr(env, s390_insn_unop(size, vec_unop, dst, s390_opnd_reg(vr1))); + return dst; + +commit 6c1cb1a0128b00858b973ef9344e12d6ddbaaf57 +Author: Andreas Arnez +Date: Thu Mar 25 18:48:07 2021 +0100 + + s390x: Add support for emitting "vector or with complement" + + In the instruction selector, look out for IR expressions that fit "vector + or with complement (VOC)". Emit when applicable. + + This slighly reduces the generated code sometimes, such as for certain + vector string instructions, where such expressions occur quite frequently. + +diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c +index c764d6ef9..239d9d299 100644 +--- a/VEX/priv/host_s390_defs.c ++++ b/VEX/priv/host_s390_defs.c +@@ -5907,6 +5907,15 @@ s390_emit_VO(UChar *p, UChar v1, UChar v2, UChar v3) + return emit_VRR_VVV(p, 0xE7000000006aULL, v1, v2, v3); + } + ++static UChar * ++s390_emit_VOC(UChar *p, UChar v1, UChar v2, UChar v3) ++{ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) ++ s390_disasm(ENC4(MNM, VR, VR, VR), "voc", v1, v2, v3); ++ ++ return emit_VRR_VVV(p, 0xE7000000006fULL, v1, v2, v3); ++} ++ + static UChar * + s390_emit_VX(UChar *p, UChar v1, UChar v2, UChar v3) + { +@@ -8312,6 +8321,7 @@ s390_insn_as_string(const s390_insn *insn) + case S390_VEC_PACK_SATURU: op = "v-vpacksaturu"; break; + case S390_VEC_COMPARE_EQUAL: op = "v-vcmpeq"; break; + case S390_VEC_OR: op = "v-vor"; break; ++ case S390_VEC_ORC: op = "v-vorc"; break; + case S390_VEC_XOR: op = "v-vxor"; break; + case S390_VEC_AND: op = "v-vand"; break; + case S390_VEC_MERGEL: op = "v-vmergel"; break; +@@ -11609,6 +11619,8 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn) + return s390_emit_VCEQ(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_OR: + return s390_emit_VO(buf, v1, v2, v3); ++ case S390_VEC_ORC: ++ return s390_emit_VOC(buf, v1, v2, v3); + case S390_VEC_XOR: + return s390_emit_VX(buf, v1, v2, v3); + case S390_VEC_AND: +diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h +index 063fd3800..dc116106e 100644 +--- a/VEX/priv/host_s390_defs.h ++++ b/VEX/priv/host_s390_defs.h +@@ -366,6 +366,7 @@ typedef enum { + S390_VEC_PACK_SATURU, + S390_VEC_COMPARE_EQUAL, + S390_VEC_OR, ++ S390_VEC_ORC, + S390_VEC_XOR, + S390_VEC_AND, + S390_VEC_MERGEL, +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index 968122596..53d76fe8a 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -4102,6 +4102,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + case Iop_OrV128: + size = 16; + vec_binop = S390_VEC_OR; ++ if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) { ++ IRExpr* orig_arg1 = arg1; ++ arg1 = arg2; ++ arg2 = orig_arg1->Iex.Unop.arg; ++ vec_binop = S390_VEC_ORC; ++ } else if (arg2->tag == Iex_Unop && arg2->Iex.Unop.op == Iop_NotV128) { ++ arg2 = arg2->Iex.Unop.arg; ++ vec_binop = S390_VEC_ORC; ++ } + goto Iop_VV_wrk; + + case Iop_XorV128: + +commit 0bd4263326b2d48f782339a9bbe1a069c7de45c7 +Author: Andreas Arnez +Date: Tue Mar 30 17:45:20 2021 +0200 + + s390x: Fix/optimize Iop_64HLtoV128 + + In s390_vr_fill() in guest_s390_toIR.c, filling a vector with two copies + of a 64-bit value is realized with Iop_64HLtoV128, since there is no such + operator as Iop_Dup64x2. But the two args to Iop_64HLtoV128 use the same + expression, referenced twice. Although this hasn't been seen to cause + real trouble yet, it's problematic and potentially inefficient, so change + it: Assign to a temp and pass that twice instead. + + In the instruction selector, if Iop_64HLtoV128 is found to be used for a + duplication as above, select "v-vdup" instead of "v-vinitfromgprs". This + mimicks the behavior we'd get if there actually was an operator + Iop_Dup64x2. + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index dfea54259..a73dcfb14 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -2299,9 +2299,12 @@ s390_vr_fill(UChar v1, IRExpr *o2) + case Ity_I32: + put_vr_qw(v1, unop(Iop_Dup32x4, o2)); + break; +- case Ity_I64: +- put_vr_qw(v1, binop(Iop_64HLtoV128, o2, o2)); ++ case Ity_I64: { ++ IRTemp val = newTemp(Ity_I64); ++ assign(val, o2); ++ put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(val), mkexpr(val))); + break; ++ } + default: + ppIRType(o2type); + vpanic("s390_vr_fill: invalid IRType"); +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index 53d76fe8a..ee20c6711 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -4662,12 +4662,16 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + } + + case Iop_64HLtoV128: +- reg1 = s390_isel_int_expr(env, arg1); +- reg2 = s390_isel_int_expr(env, arg2); +- +- addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS, +- dst, reg1, reg2)); +- ++ if (arg1->tag == Iex_RdTmp && arg2->tag == Iex_RdTmp && ++ arg1->Iex.RdTmp.tmp == arg2->Iex.RdTmp.tmp) { ++ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg1); ++ addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, dst, src)); ++ } else { ++ reg1 = s390_isel_int_expr(env, arg1); ++ reg2 = s390_isel_int_expr(env, arg2); ++ addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS, ++ dst, reg1, reg2)); ++ } + return dst; + + default: + +commit cae5062b05b95e0303b1122a0ea9aadc197e4f0a +Author: Andreas Arnez +Date: Fri May 7 18:13:03 2021 +0200 + + s390x: Add missing stdout.exp for vector string memcheck test + + The file vistr.stdout.exp was missing from commit 32312d588. Add it. + +diff --git a/memcheck/tests/s390x/vistr.stdout.exp b/memcheck/tests/s390x/vistr.stdout.exp +new file mode 100644 +index 000000000..e69de29bb diff --git a/valgrind-3.17.0-s390-z13-vec-fix.patch b/valgrind-3.17.0-s390-z13-vec-fix.patch new file mode 100644 index 0000000..959e5f8 --- /dev/null +++ b/valgrind-3.17.0-s390-z13-vec-fix.patch @@ -0,0 +1,46 @@ +commit 124ae6cfa303f0cc71ffd685620cb57c4f8f02bb +Author: Andreas Arnez +Date: Mon Jun 7 14:01:53 2021 +0200 + + s390x: Don't emit "vector or with complement" on z13 + + The z/Architecture instruction "vector or with complement" (VOC) can be + used as an optimization to combine "vector or" with "vector nor". This is + exploited in Valgrind since commit 6c1cb1a0128b00858b973e. However, VOC + requires the vector-enhancements facility 1, which is not installed on a + z13 CPU. Thus Valgrind can now run into SIGILL on z13 when trying to + execute vector string instructions. + + Fix this by suppressing the VOC optimization unless the + vector-enhancements facility 1 is recognized on the host. + +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index ee20c6711..15ca92a6b 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -4102,14 +4102,17 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + case Iop_OrV128: + size = 16; + vec_binop = S390_VEC_OR; +- if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) { +- IRExpr* orig_arg1 = arg1; +- arg1 = arg2; +- arg2 = orig_arg1->Iex.Unop.arg; +- vec_binop = S390_VEC_ORC; +- } else if (arg2->tag == Iex_Unop && arg2->Iex.Unop.op == Iop_NotV128) { +- arg2 = arg2->Iex.Unop.arg; +- vec_binop = S390_VEC_ORC; ++ if (s390_host_has_vxe) { ++ if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) { ++ IRExpr* orig_arg1 = arg1; ++ arg1 = arg2; ++ arg2 = orig_arg1->Iex.Unop.arg; ++ vec_binop = S390_VEC_ORC; ++ } else if (arg2->tag == Iex_Unop && ++ arg2->Iex.Unop.op == Iop_NotV128) { ++ arg2 = arg2->Iex.Unop.arg; ++ vec_binop = S390_VEC_ORC; ++ } + } + goto Iop_VV_wrk; + diff --git a/valgrind-3.17.0-s390-z15.patch b/valgrind-3.17.0-s390-z15.patch new file mode 100644 index 0000000..2ec3c2f --- /dev/null +++ b/valgrind-3.17.0-s390-z15.patch @@ -0,0 +1,2413 @@ +From 3fbde55a5696c9273084ee2c44daca752e407597 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 26 Jan 2021 15:06:47 +0100 +Subject: [PATCH 01/13] s390x: Misc-insn-3, bitwise logical 3-way instructions + +Add support for the instructions NCRK, NCGRK, NNRK, NNGRK, NORK, NOGRK, +NXRK, NXGRK, OCRK, and OCGRK. Introduce a common helper and use it for +the existing instructions NRK, NGRK, XRK, XGRK, ORK, and OGRK as well. +--- + VEX/priv/guest_s390_toIR.c | 154 ++++++++++++++++++++++++++----------- + 1 file changed, 109 insertions(+), 45 deletions(-) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index a73dcfb14..f8afd5b96 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -5022,8 +5022,12 @@ s390_irgen_NGR(UChar r1, UChar r2) + return "ngr"; + } + ++/* Helper for bitwise logical instructions with two 32-bit input operands and a ++ 32-bit output operand. `inv3' and `inv' indicate whether to invert (build ++ bitwise complement of) operand 3 or the result, respectively. */ + static const HChar * +-s390_irgen_NRK(UChar r3, UChar r1, UChar r2) ++s390_irgen_logicalK32(UChar r3, UChar r1, UChar r2, ++ const HChar *mnem, IROp op, Bool inv3, Bool inv) + { + IRTemp op2 = newTemp(Ity_I32); + IRTemp op3 = newTemp(Ity_I32); +@@ -5031,15 +5035,19 @@ s390_irgen_NRK(UChar r3, UChar r1, UChar r2) + + assign(op2, get_gpr_w1(r2)); + assign(op3, get_gpr_w1(r3)); +- assign(result, binop(Iop_And32, mkexpr(op2), mkexpr(op3))); ++ IRExpr* tmp = binop(op, mkexpr(op2), ++ inv3 ? unop(Iop_Not32, mkexpr(op3)) : mkexpr(op3)); ++ assign(result, inv ? unop(Iop_Not32, tmp) : tmp); + s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); + put_gpr_w1(r1, mkexpr(result)); + +- return "nrk"; ++ return mnem; + } + ++/* Same as s390_irgen_logicalK32, but for 64-bit operands. */ + static const HChar * +-s390_irgen_NGRK(UChar r3, UChar r1, UChar r2) ++s390_irgen_logicalK64(UChar r3, UChar r1, UChar r2, ++ const HChar *mnem, IROp op, Bool inv3, Bool inv) + { + IRTemp op2 = newTemp(Ity_I64); + IRTemp op3 = newTemp(Ity_I64); +@@ -5047,11 +5055,49 @@ s390_irgen_NGRK(UChar r3, UChar r1, UChar r2) + + assign(op2, get_gpr_dw0(r2)); + assign(op3, get_gpr_dw0(r3)); +- assign(result, binop(Iop_And64, mkexpr(op2), mkexpr(op3))); ++ IRExpr* tmp = binop(op, mkexpr(op2), ++ inv3 ? unop(Iop_Not64, mkexpr(op3)) : mkexpr(op3)); ++ assign(result, inv ? unop(Iop_Not64, tmp) : tmp); + s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); + put_gpr_dw0(r1, mkexpr(result)); + +- return "ngrk"; ++ return mnem; ++} ++ ++static const HChar * ++s390_irgen_NRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK32(r3, r1, r2, "nrk", Iop_And32, False, False); ++} ++ ++static const HChar * ++s390_irgen_NGRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK64(r3, r1, r2, "ngrk", Iop_And64, False, False); ++} ++ ++static const HChar * ++s390_irgen_NCRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK32(r3, r1, r2, "ncrk", Iop_And32, True, False); ++} ++ ++static const HChar * ++s390_irgen_NCGRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK64(r3, r1, r2, "ncgrk", Iop_And64, True, False); ++} ++ ++static const HChar * ++s390_irgen_NNRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK32(r3, r1, r2, "nnrk", Iop_And32, False, True); ++} ++ ++static const HChar * ++s390_irgen_NNGRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK64(r3, r1, r2, "nngrk", Iop_And64, False, True); + } + + static const HChar * +@@ -7071,33 +7117,25 @@ s390_irgen_XGR(UChar r1, UChar r2) + static const HChar * + s390_irgen_XRK(UChar r3, UChar r1, UChar r2) + { +- IRTemp op2 = newTemp(Ity_I32); +- IRTemp op3 = newTemp(Ity_I32); +- IRTemp result = newTemp(Ity_I32); +- +- assign(op2, get_gpr_w1(r2)); +- assign(op3, get_gpr_w1(r3)); +- assign(result, binop(Iop_Xor32, mkexpr(op2), mkexpr(op3))); +- s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); +- put_gpr_w1(r1, mkexpr(result)); +- +- return "xrk"; ++ return s390_irgen_logicalK32(r3, r1, r2, "xrk", Iop_Xor32, False, False); + } + + static const HChar * + s390_irgen_XGRK(UChar r3, UChar r1, UChar r2) + { +- IRTemp op2 = newTemp(Ity_I64); +- IRTemp op3 = newTemp(Ity_I64); +- IRTemp result = newTemp(Ity_I64); ++ return s390_irgen_logicalK64(r3, r1, r2, "xgrk", Iop_Xor64, False, False); ++} + +- assign(op2, get_gpr_dw0(r2)); +- assign(op3, get_gpr_dw0(r3)); +- assign(result, binop(Iop_Xor64, mkexpr(op2), mkexpr(op3))); +- s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); +- put_gpr_dw0(r1, mkexpr(result)); ++static const HChar * ++s390_irgen_NXRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK32(r3, r1, r2, "nxrk", Iop_Xor32, False, True); ++} + +- return "xgrk"; ++static const HChar * ++s390_irgen_NXGRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK64(r3, r1, r2, "nxgrk", Iop_Xor64, False, True); + } + + static const HChar * +@@ -8920,33 +8958,37 @@ s390_irgen_OGR(UChar r1, UChar r2) + static const HChar * + s390_irgen_ORK(UChar r3, UChar r1, UChar r2) + { +- IRTemp op2 = newTemp(Ity_I32); +- IRTemp op3 = newTemp(Ity_I32); +- IRTemp result = newTemp(Ity_I32); ++ return s390_irgen_logicalK32(r3, r1, r2, "ork", Iop_Or32, False, False); ++} + +- assign(op2, get_gpr_w1(r2)); +- assign(op3, get_gpr_w1(r3)); +- assign(result, binop(Iop_Or32, mkexpr(op2), mkexpr(op3))); +- s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); +- put_gpr_w1(r1, mkexpr(result)); ++static const HChar * ++s390_irgen_OGRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK64(r3, r1, r2, "ogrk", Iop_Or64, False, False); ++} + +- return "ork"; ++static const HChar * ++s390_irgen_OCRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK32(r3, r1, r2, "ocrk", Iop_Or32, True, False); + } + + static const HChar * +-s390_irgen_OGRK(UChar r3, UChar r1, UChar r2) ++s390_irgen_OCGRK(UChar r3, UChar r1, UChar r2) + { +- IRTemp op2 = newTemp(Ity_I64); +- IRTemp op3 = newTemp(Ity_I64); +- IRTemp result = newTemp(Ity_I64); ++ return s390_irgen_logicalK64(r3, r1, r2, "ocgrk", Iop_Or64, True, False); ++} + +- assign(op2, get_gpr_dw0(r2)); +- assign(op3, get_gpr_dw0(r3)); +- assign(result, binop(Iop_Or64, mkexpr(op2), mkexpr(op3))); +- s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); +- put_gpr_dw0(r1, mkexpr(result)); ++static const HChar * ++s390_irgen_NORK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK32(r3, r1, r2, "nork", Iop_Or32, False, True); ++} + +- return "ogrk"; ++static const HChar * ++s390_irgen_NOGRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK64(r3, r1, r2, "nogrk", Iop_Or64, False, True); + } + + static const HChar * +@@ -20031,12 +20073,28 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb961: s390_format_RRF_U0RR(s390_irgen_CLGRT, RRF2_m3(ovl), + RRF2_r1(ovl), RRF2_r2(ovl), + S390_XMNM_CAB); goto ok; ++ case 0xb964: s390_format_RRF_R0RR2(s390_irgen_NNGRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; ++ case 0xb965: s390_format_RRF_R0RR2(s390_irgen_OCGRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; ++ case 0xb966: s390_format_RRF_R0RR2(s390_irgen_NOGRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; ++ case 0xb967: s390_format_RRF_R0RR2(s390_irgen_NXGRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; + case 0xb972: s390_format_RRF_U0RR(s390_irgen_CRT, RRF2_m3(ovl), + RRF2_r1(ovl), RRF2_r2(ovl), + S390_XMNM_CAB); goto ok; + case 0xb973: s390_format_RRF_U0RR(s390_irgen_CLRT, RRF2_m3(ovl), + RRF2_r1(ovl), RRF2_r2(ovl), + S390_XMNM_CAB); goto ok; ++ case 0xb974: s390_format_RRF_R0RR2(s390_irgen_NNRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; ++ case 0xb975: s390_format_RRF_R0RR2(s390_irgen_OCRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; ++ case 0xb976: s390_format_RRF_R0RR2(s390_irgen_NORK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; ++ case 0xb977: s390_format_RRF_R0RR2(s390_irgen_NXRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; + case 0xb980: s390_format_RRE_RR(s390_irgen_NGR, RRE_r1(ovl), + RRE_r2(ovl)); goto ok; + case 0xb981: s390_format_RRE_RR(s390_irgen_OGR, RRE_r1(ovl), +@@ -20148,6 +20206,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb9e4: s390_format_RRF_R0RR2(s390_irgen_NGRK, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; ++ case 0xb9e5: s390_format_RRF_R0RR2(s390_irgen_NCGRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); ++ goto ok; + case 0xb9e6: s390_format_RRF_R0RR2(s390_irgen_OGRK, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; +@@ -20178,6 +20239,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb9f4: s390_format_RRF_R0RR2(s390_irgen_NRK, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; ++ case 0xb9f5: s390_format_RRF_R0RR2(s390_irgen_NCRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); ++ goto ok; + case 0xb9f6: s390_format_RRF_R0RR2(s390_irgen_ORK, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; +-- +2.23.0 + +From 748421b31ab6b15cc849bd6b9588ad759b807324 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 27 Jan 2021 18:11:06 +0100 +Subject: [PATCH 02/13] s390x: Misc-insn-3, "select" instructions + +Add support for the instructions SELR, SELGR, and SELFHR. +--- + VEX/priv/guest_s390_toIR.c | 43 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 43 insertions(+) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index f8afd5b96..41265631b 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -3113,6 +3113,16 @@ s390_format_RRF_FUFF2(const HChar *(*irgen)(UChar, UChar, UChar, UChar), + s390_disasm(ENC5(MNM, FPR, FPR, FPR, UINT), mnm, r1, r2, r3, m4); + } + ++static void ++s390_format_RRF_RURR(const HChar *(*irgen)(UChar, UChar, UChar, UChar), ++ UChar r3, UChar m4, UChar r1, UChar r2) ++{ ++ const HChar *mnm = irgen(r3, m4, r1, r2); ++ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) ++ s390_disasm(ENC5(MNM, GPR, GPR, GPR, UINT), mnm, r1, r3, r2, m4); ++} ++ + static void + s390_format_RRF_R0RR2(const HChar *(*irgen)(UChar r3, UChar r1, UChar r2), + UChar r3, UChar r1, UChar r2) +@@ -19254,6 +19264,30 @@ s390_irgen_VBPERM(UChar v1, UChar v2, UChar v3) + return "vbperm"; + } + ++static const HChar * ++s390_irgen_SELR(UChar r3, UChar m4, UChar r1, UChar r2) ++{ ++ IRExpr* cond = binop(Iop_CmpNE32, s390_call_calculate_cond(m4), mkU32(0)); ++ put_gpr_w1(r1, mkite(cond, get_gpr_w1(r2), get_gpr_w1(r3))); ++ return "selr"; ++} ++ ++static const HChar * ++s390_irgen_SELGR(UChar r3, UChar m4, UChar r1, UChar r2) ++{ ++ IRExpr* cond = binop(Iop_CmpNE32, s390_call_calculate_cond(m4), mkU32(0)); ++ put_gpr_dw0(r1, mkite(cond, get_gpr_dw0(r2), get_gpr_dw0(r3))); ++ return "selgr"; ++} ++ ++static const HChar * ++s390_irgen_SELFHR(UChar r3, UChar m4, UChar r1, UChar r2) ++{ ++ IRExpr* cond = binop(Iop_CmpNE32, s390_call_calculate_cond(m4), mkU32(0)); ++ put_gpr_w0(r1, mkite(cond, get_gpr_w0(r2), get_gpr_w0(r3))); ++ return "selfhr"; ++} ++ + /* New insns are added here. + If an insn is contingent on a facility being installed also + check whether the list of supported facilities in function +@@ -20163,6 +20197,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb9bd: /* TRTRE */ goto unimplemented; + case 0xb9be: /* SRSTU */ goto unimplemented; + case 0xb9bf: /* TRTE */ goto unimplemented; ++ case 0xb9c0: s390_format_RRF_RURR(s390_irgen_SELFHR, RRF4_r3(ovl), ++ RRF4_m4(ovl), RRF4_r1(ovl), ++ RRF4_r2(ovl)); goto ok; + case 0xb9c8: s390_format_RRF_R0RR2(s390_irgen_AHHHR, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; +@@ -20203,6 +20240,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb9e2: s390_format_RRF_U0RR(s390_irgen_LOCGR, RRF3_r3(ovl), + RRF3_r1(ovl), RRF3_r2(ovl), + S390_XMNM_LOCGR); goto ok; ++ case 0xb9e3: s390_format_RRF_RURR(s390_irgen_SELGR, RRF4_r3(ovl), ++ RRF4_m4(ovl), RRF4_r1(ovl), ++ RRF4_r2(ovl)); goto ok; + case 0xb9e4: s390_format_RRF_R0RR2(s390_irgen_NGRK, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; +@@ -20233,6 +20273,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb9ed: s390_format_RRF_R0RR2(s390_irgen_MSGRKC, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; ++ case 0xb9f0: s390_format_RRF_RURR(s390_irgen_SELR, RRF4_r3(ovl), ++ RRF4_m4(ovl), RRF4_r1(ovl), ++ RRF4_r2(ovl)); goto ok; + case 0xb9f2: s390_format_RRF_U0RR(s390_irgen_LOCR, RRF3_r3(ovl), + RRF3_r1(ovl), RRF3_r2(ovl), + S390_XMNM_LOCR); goto ok; +-- +2.23.0 + +From 31cbd583e858f47a86ada087d21a6abc13ba04f2 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Thu, 28 Jan 2021 19:47:00 +0100 +Subject: [PATCH 03/13] s390x: Misc-insn-3, new POPCNT variant + +Add support for the new POPCNT variant that has bit 0 of the M3 field set +and yields the total number of one bits in its 64-bit operand. +--- + VEX/priv/guest_s390_toIR.c | 44 ++++++++++++++++++++++++++------------ + 1 file changed, 30 insertions(+), 14 deletions(-) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 41265631b..ca9e6dc03 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -3073,6 +3073,20 @@ s390_format_RRF_U0RR(const HChar *(*irgen)(UChar m3, UChar r1, UChar r2), + s390_disasm(ENC3(XMNM, GPR, GPR), xmnm_kind, m3, r1, r2); + } + ++static void ++s390_format_RRFa_U0RR(const HChar *(*irgen)(UChar m3, UChar r1, UChar r2), ++ UChar m3, UChar r1, UChar r2) ++{ ++ const HChar *mnm = irgen(m3, r1, r2); ++ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) { ++ if (m3 != 0) ++ s390_disasm(ENC4(MNM, GPR, GPR, UINT), mnm, r1, r2, m3); ++ else ++ s390_disasm(ENC3(MNM, GPR, GPR), mnm, r1, r2); ++ } ++} ++ + static void + s390_format_RRF_F0FF2(const HChar *(*irgen)(UChar, UChar, UChar), + UChar r3, UChar r1, UChar r2) +@@ -15112,30 +15126,32 @@ s390_irgen_FLOGR(UChar r1, UChar r2) + } + + static const HChar * +-s390_irgen_POPCNT(UChar r1, UChar r2) ++s390_irgen_POPCNT(UChar m3, UChar r1, UChar r2) + { +- Int i; ++ s390_insn_assert("popcnt", (m3 & 7) == 0); ++ ++ static const ULong masks[] = { ++ 0x5555555555555555, 0x3333333333333333, 0x0F0F0F0F0F0F0F0F, ++ 0x00FF00FF00FF00FF, 0x0000FFFF0000FFFF, 0x00000000FFFFFFFF, ++ }; ++ Int i, n; + IRTemp val = newTemp(Ity_I64); +- IRTemp mask[3]; + + assign(val, get_gpr_dw0(r2)); +- for (i = 0; i < 3; i++) { +- mask[i] = newTemp(Ity_I64); +- } +- assign(mask[0], mkU64(0x5555555555555555ULL)); +- assign(mask[1], mkU64(0x3333333333333333ULL)); +- assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL)); +- for (i = 0; i < 3; i++) { ++ n = (m3 & 8) ? 6 : 3; ++ for (i = 0; i < n; i++) { ++ IRTemp mask = newTemp(Ity_I64); + IRTemp tmp = newTemp(Ity_I64); + ++ assign (mask, mkU64(masks[i])); + assign(tmp, + binop(Iop_Add64, + binop(Iop_And64, + mkexpr(val), +- mkexpr(mask[i])), ++ mkexpr(mask)), + binop(Iop_And64, + binop(Iop_Shr64, mkexpr(val), mkU8(1 << i)), +- mkexpr(mask[i])))); ++ mkexpr(mask)))); + val = tmp; + } + s390_cc_thunk_putZ(S390_CC_OP_BITWISE, val); +@@ -20235,8 +20251,8 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb9e0: s390_format_RRF_U0RR(s390_irgen_LOCFHR, RRF3_r3(ovl), + RRF3_r1(ovl), RRF3_r2(ovl), + S390_XMNM_LOCFHR); goto ok; +- case 0xb9e1: s390_format_RRE_RR(s390_irgen_POPCNT, RRE_r1(ovl), +- RRE_r2(ovl)); goto ok; ++ case 0xb9e1: s390_format_RRFa_U0RR(s390_irgen_POPCNT, RRF3_r3(ovl), ++ RRF3_r1(ovl), RRF3_r2(ovl)); goto ok; + case 0xb9e2: s390_format_RRF_U0RR(s390_irgen_LOCGR, RRF3_r3(ovl), + RRF3_r1(ovl), RRF3_r2(ovl), + S390_XMNM_LOCGR); goto ok; +-- +2.23.0 + +From 64352d57f93711ce76fd481558dcf6d65e26b19f Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Fri, 29 Jan 2021 20:13:05 +0100 +Subject: [PATCH 04/13] s390x: Misc-insn-3, MVCRL + +Add support for the "move right to left" instruction MVCRL. +--- + VEX/priv/guest_s390_toIR.c | 47 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 47 insertions(+) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index ca9e6dc03..9f7d98f8c 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -3562,6 +3562,25 @@ s390_format_SS_L0RDRD(const HChar *(*irgen)(UChar, IRTemp, IRTemp), + s390_disasm(ENC3(MNM, UDLB, UDXB), mnm, d1, l, b1, d2, 0, b2); + } + ++static void ++s390_format_SSE_RDRD(const HChar *(*irgen)(IRTemp, IRTemp), ++ UChar b1, UShort d1, UChar b2, UShort d2) ++{ ++ const HChar *mnm; ++ IRTemp op1addr = newTemp(Ity_I64); ++ IRTemp op2addr = newTemp(Ity_I64); ++ ++ assign(op1addr, binop(Iop_Add64, mkU64(d1), b1 != 0 ? get_gpr_dw0(b1) : ++ mkU64(0))); ++ assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) : ++ mkU64(0))); ++ ++ mnm = irgen(op1addr, op2addr); ++ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) ++ s390_disasm(ENC2(UDXB, UDXB), mnm, d1, 0, b1, d2, 0, b2); ++} ++ + static void + s390_format_SIL_RDI(const HChar *(*irgen)(UShort i2, IRTemp op1addr), + UChar b1, UShort d1, UShort i2) +@@ -13667,6 +13686,31 @@ s390_irgen_MVCIN(UChar length, IRTemp start1, IRTemp start2) + return "mvcin"; + } + ++static const HChar * ++s390_irgen_MVCRL(IRTemp op1addr, IRTemp op2addr) ++{ ++ IRTemp counter = newTemp(Ity_I64); ++ IRTemp offset = newTemp(Ity_I64); ++ ++ assign(counter, get_counter_dw0()); ++ /* offset = length - 1 - counter, where length-1 is specified in r0 */ ++ assign(offset, ++ binop(Iop_Sub64, ++ unop(Iop_16Uto64, ++ binop(Iop_And16, get_gpr_hw3(0), mkU16(0xfff))), ++ mkexpr(counter))); ++ ++ store(binop(Iop_Add64, mkexpr(op1addr), mkexpr(offset)), ++ load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkexpr(offset)))); ++ ++ /* Check for end of field */ ++ put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); ++ iterate_if(binop(Iop_CmpNE64, mkexpr(offset), mkU64(0))); ++ put_counter_dw0(mkU64(0)); ++ ++ return "mvcrl"; ++} ++ + static const HChar * + s390_irgen_MVCL(UChar r1, UChar r2) + { +@@ -22217,6 +22261,9 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + case 0xe500ULL: /* LASP */ goto unimplemented; + case 0xe501ULL: /* TPROT */ goto unimplemented; + case 0xe502ULL: /* STRAG */ goto unimplemented; ++ case 0xe50aULL: s390_format_SSE_RDRD(s390_irgen_MVCRL, ++ SS_b1(ovl), SS_d1(ovl), ++ SS_b2(ovl), SS_d2(ovl)); goto ok; + case 0xe50eULL: /* MVCSK */ goto unimplemented; + case 0xe50fULL: /* MVCDK */ goto unimplemented; + case 0xe544ULL: s390_format_SIL_RDI(s390_irgen_MVHHI, SIL_b1(ovl), +-- +2.23.0 + +From 6cc4d66cc3a999253d9a57e2b5c75aeb67f77918 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 2 Feb 2021 20:15:02 +0100 +Subject: [PATCH 05/13] s390x: Misc-insn-3, test case + +Add a test case for the new instructions in the miscellaneous instruction +extensions facitility 3. +--- + .gitignore | 1 + + none/tests/s390x/Makefile.am | 3 +- + none/tests/s390x/misc3.c | 182 ++++++++++++++++++++++++++++++ + none/tests/s390x/misc3.stderr.exp | 2 + + none/tests/s390x/misc3.stdout.exp | 103 +++++++++++++++++ + none/tests/s390x/misc3.vgtest | 1 + + 6 files changed, 291 insertions(+), 1 deletion(-) + create mode 100644 none/tests/s390x/misc3.c + create mode 100644 none/tests/s390x/misc3.stderr.exp + create mode 100644 none/tests/s390x/misc3.stdout.exp + create mode 100644 none/tests/s390x/misc3.vgtest + +diff --git a/none/tests/s390x/Makefile.am b/none/tests/s390x/Makefile.am +index a0fb92ef5..2fd45ec1e 100644 +--- a/none/tests/s390x/Makefile.am ++++ b/none/tests/s390x/Makefile.am +@@ -19,7 +19,8 @@ INSN_TESTS = clc clcle cvb cvd icm lpr tcxb lam_stam xc mvst add sub mul \ + spechelper-ltr spechelper-or \ + spechelper-icm-1 spechelper-icm-2 spechelper-tmll \ + spechelper-tm laa vector lsc2 ppno vector_string vector_integer \ +- vector_float add-z14 sub-z14 mul-z14 bic ++ vector_float add-z14 sub-z14 mul-z14 bic \ ++ misc3 + + if BUILD_DFP_TESTS + INSN_TESTS += dfp-1 dfp-2 dfp-3 dfp-4 dfptest dfpext dfpconv srnmt pfpo +diff --git a/none/tests/s390x/misc3.c b/none/tests/s390x/misc3.c +new file mode 100644 +index 000000000..ae6e8d4c2 +--- /dev/null ++++ b/none/tests/s390x/misc3.c +@@ -0,0 +1,182 @@ ++#include ++ ++/* -- Logical instructions -- */ ++ ++#define TEST_GENERATE(opcode,insn) \ ++ static void test_##insn(unsigned long a, unsigned long b) \ ++ { \ ++ unsigned long out = 0xdecaffee42424242; \ ++ int cc; \ ++ \ ++ __asm__( \ ++ "cr 0,0\n\t" /* Clear CC */ \ ++ ".insn rrf,0x" #opcode "0000,%[out],%[a],%[b],0\n\t" \ ++ "ipm %[cc]\n\t" \ ++ "srl %[cc],28\n" \ ++ : [out] "+d" (out), \ ++ [cc] "=d" (cc) \ ++ : [a] "d" (a), \ ++ [b] "d" (b) \ ++ : "cc"); \ ++ \ ++ printf("\t%016lx %016lx -> %016lx cc=%d\n", \ ++ a, b, out, cc); \ ++ } ++ ++#define TEST_EXEC(opcode,insn) \ ++ do { \ ++ puts(#insn); \ ++ test_##insn(0, 0); \ ++ test_##insn(0, -1); \ ++ test_##insn(-1, 0); \ ++ test_##insn(-1, -1); \ ++ test_##insn(0x012345678abcdef, 0); \ ++ test_##insn(0x012345678abcdef, -1); \ ++ test_##insn(0x55555555aaaaaaaa, 0xaaaaaaaa55555555); \ ++ } while (0) ++ ++#define INSNS \ ++ XTEST(b9f5,ncrk); \ ++ XTEST(b9e5,ncgrk); \ ++ XTEST(b974,nnrk); \ ++ XTEST(b964,nngrk); \ ++ XTEST(b976,nork); \ ++ XTEST(b966,nogrk); \ ++ XTEST(b977,nxrk); \ ++ XTEST(b967,nxgrk); \ ++ XTEST(b975,ocrk); \ ++ XTEST(b965,ocgrk); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_logical_insns() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++#undef INSNS ++#undef TEST_GENERATE ++#undef TEST_EXEC ++ ++ ++/* -- Full population count -- */ ++ ++static void test_popcnt(unsigned long op2) ++{ ++ unsigned long result; ++ int cc; ++ ++ __asm__(".insn rrf,0xb9e10000,%[result],%[op2],8,0\n\t" ++ "ipm %[cc]\n\t" ++ "srl %[cc],28\n" ++ : [result]"=d" (result), ++ [cc]"=d" (cc) ++ : [op2]"d" (op2) ++ : "cc"); ++ printf("\t%016lx -> %2lu cc=%d\n", op2, result, cc); ++} ++ ++static int test_all_popcnt() ++{ ++ puts("popcnt"); ++ test_popcnt(0); ++ test_popcnt(1); ++ test_popcnt(0x8000000000000000); ++ test_popcnt(-1UL); ++ test_popcnt(0xff427e3800556bcd); ++ return 0; ++} ++ ++/* -- Select -- */ ++ ++#define TEST_GENERATE(opcode,insn) \ ++ static void test_##insn(unsigned long a, unsigned long b) \ ++ { \ ++ unsigned long out0 = 0x0cafebad0badcafe; \ ++ unsigned long out1 = 0x0badcafe0cafebad; \ ++ \ ++ __asm__( \ ++ "cr 0,0\n\t" /* Clear CC */ \ ++ ".insn rrf,0x" #opcode "0000,%[out0],%[a],%[b],8\n\t" \ ++ ".insn rrf,0x" #opcode "0000,%[out1],%[a],%[b],7\n\t" \ ++ : [out0] "+d" (out0), \ ++ [out1] "+d" (out1) \ ++ : [a] "d" (a), \ ++ [b] "d" (b) \ ++ : ); \ ++ \ ++ printf("\t%016lx %016lx -> %016lx %016lx\n", \ ++ a, b, out0, out1); \ ++ } ++ ++#define TEST_EXEC(opcode,insn) \ ++ do { \ ++ puts(#insn); \ ++ test_##insn(-1, 0); \ ++ test_##insn(0, -1); \ ++ test_##insn(0x1234567890abcdef, 0xfedcba9876543210); \ ++ } while (0) ++ ++#define INSNS \ ++ XTEST(b9f0,selr); \ ++ XTEST(b9e3,selgr); \ ++ XTEST(b9c0,selfhr); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_select() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++#undef INSNS ++#undef TEST_GENERATE ++#undef TEST_EXEC ++ ++ ++/* -- Move right to left -- */ ++ ++static void test_mvcrl(void *to, void *from, size_t len) ++{ ++ len -= 1; ++ __asm__("lgr 0,%[len]\n\t" ++ ".insn sse,0xe50a00000000,%[to],%[from]\n\t" ++ : [to] "+Q" (*(struct { char c[len]; } *) to) ++ : [from] "Q" (*(struct { char c[len]; } *) from), ++ [len] "d" (len) ++ : ); ++} ++ ++static void test_all_mvcrl() ++{ ++ static const char pattern[] = ++ "abcdefghijklmnopqrstuvwxyz-0123456789.ABCDEFGHIJKLMNOPQRSTUVWXYZ"; ++ char buf[4 * sizeof(pattern) - 2]; ++ ++ test_mvcrl(buf, (char *) pattern, sizeof(pattern)); ++ test_mvcrl(buf + sizeof(pattern) - 1, buf, sizeof(pattern)); ++ test_mvcrl(buf + 2 * sizeof(pattern) - 2, buf, 2 * sizeof(pattern) - 1); ++ test_mvcrl(buf + 32, buf + 10, 63); ++ test_mvcrl(buf + 2, buf + 1, 256); ++ test_mvcrl(buf + 254, buf + 256, 2); ++ puts("mvcrl"); ++ for (int i = 0; i < 256; i += 64) { ++ printf("\t%.64s\n", buf + i); ++ } ++} ++ ++ ++int main() ++{ ++ test_all_logical_insns(); ++ test_all_popcnt(); ++ test_all_select(); ++ test_all_mvcrl(); ++ return 0; ++} +diff --git a/none/tests/s390x/misc3.stderr.exp b/none/tests/s390x/misc3.stderr.exp +new file mode 100644 +index 000000000..139597f9c +--- /dev/null ++++ b/none/tests/s390x/misc3.stderr.exp +@@ -0,0 +1,2 @@ ++ ++ +diff --git a/none/tests/s390x/misc3.stdout.exp b/none/tests/s390x/misc3.stdout.exp +new file mode 100644 +index 000000000..caaba4960 +--- /dev/null ++++ b/none/tests/s390x/misc3.stdout.exp +@@ -0,0 +1,103 @@ ++ncrk ++ 0000000000000000 0000000000000000 -> decaffee00000000 cc=0 ++ 0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> decaffeeffffffff cc=1 ++ ffffffffffffffff ffffffffffffffff -> decaffee00000000 cc=0 ++ 0012345678abcdef 0000000000000000 -> decaffee78abcdef cc=1 ++ 0012345678abcdef ffffffffffffffff -> decaffee00000000 cc=0 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffeeaaaaaaaa cc=1 ++ncgrk ++ 0000000000000000 0000000000000000 -> 0000000000000000 cc=0 ++ 0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> ffffffffffffffff cc=1 ++ ffffffffffffffff ffffffffffffffff -> 0000000000000000 cc=0 ++ 0012345678abcdef 0000000000000000 -> 0012345678abcdef cc=1 ++ 0012345678abcdef ffffffffffffffff -> 0000000000000000 cc=0 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> 55555555aaaaaaaa cc=1 ++nnrk ++ 0000000000000000 0000000000000000 -> decaffeeffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> decaffeeffffffff cc=1 ++ ffffffffffffffff 0000000000000000 -> decaffeeffffffff cc=1 ++ ffffffffffffffff ffffffffffffffff -> decaffee00000000 cc=0 ++ 0012345678abcdef 0000000000000000 -> decaffeeffffffff cc=1 ++ 0012345678abcdef ffffffffffffffff -> decaffee87543210 cc=1 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffeeffffffff cc=1 ++nngrk ++ 0000000000000000 0000000000000000 -> ffffffffffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> ffffffffffffffff cc=1 ++ ffffffffffffffff 0000000000000000 -> ffffffffffffffff cc=1 ++ ffffffffffffffff ffffffffffffffff -> 0000000000000000 cc=0 ++ 0012345678abcdef 0000000000000000 -> ffffffffffffffff cc=1 ++ 0012345678abcdef ffffffffffffffff -> ffedcba987543210 cc=1 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> ffffffffffffffff cc=1 ++nork ++ 0000000000000000 0000000000000000 -> decaffeeffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> decaffee00000000 cc=0 ++ ffffffffffffffff ffffffffffffffff -> decaffee00000000 cc=0 ++ 0012345678abcdef 0000000000000000 -> decaffee87543210 cc=1 ++ 0012345678abcdef ffffffffffffffff -> decaffee00000000 cc=0 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffee00000000 cc=0 ++nogrk ++ 0000000000000000 0000000000000000 -> ffffffffffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> 0000000000000000 cc=0 ++ ffffffffffffffff ffffffffffffffff -> 0000000000000000 cc=0 ++ 0012345678abcdef 0000000000000000 -> ffedcba987543210 cc=1 ++ 0012345678abcdef ffffffffffffffff -> 0000000000000000 cc=0 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> 0000000000000000 cc=0 ++nxrk ++ 0000000000000000 0000000000000000 -> decaffeeffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> decaffee00000000 cc=0 ++ ffffffffffffffff ffffffffffffffff -> decaffeeffffffff cc=1 ++ 0012345678abcdef 0000000000000000 -> decaffee87543210 cc=1 ++ 0012345678abcdef ffffffffffffffff -> decaffee78abcdef cc=1 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffee00000000 cc=0 ++nxgrk ++ 0000000000000000 0000000000000000 -> ffffffffffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> 0000000000000000 cc=0 ++ ffffffffffffffff ffffffffffffffff -> ffffffffffffffff cc=1 ++ 0012345678abcdef 0000000000000000 -> ffedcba987543210 cc=1 ++ 0012345678abcdef ffffffffffffffff -> 0012345678abcdef cc=1 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> 0000000000000000 cc=0 ++ocrk ++ 0000000000000000 0000000000000000 -> decaffeeffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> decaffeeffffffff cc=1 ++ ffffffffffffffff ffffffffffffffff -> decaffeeffffffff cc=1 ++ 0012345678abcdef 0000000000000000 -> decaffeeffffffff cc=1 ++ 0012345678abcdef ffffffffffffffff -> decaffee78abcdef cc=1 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffeeaaaaaaaa cc=1 ++ocgrk ++ 0000000000000000 0000000000000000 -> ffffffffffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> ffffffffffffffff cc=1 ++ ffffffffffffffff ffffffffffffffff -> ffffffffffffffff cc=1 ++ 0012345678abcdef 0000000000000000 -> ffffffffffffffff cc=1 ++ 0012345678abcdef ffffffffffffffff -> 0012345678abcdef cc=1 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> 55555555aaaaaaaa cc=1 ++popcnt ++ 0000000000000000 -> 0 cc=0 ++ 0000000000000001 -> 1 cc=1 ++ 8000000000000000 -> 1 cc=1 ++ ffffffffffffffff -> 64 cc=1 ++ ff427e3800556bcd -> 33 cc=1 ++selr ++ ffffffffffffffff 0000000000000000 -> 0cafebadffffffff 0badcafe00000000 ++ 0000000000000000 ffffffffffffffff -> 0cafebad00000000 0badcafeffffffff ++ 1234567890abcdef fedcba9876543210 -> 0cafebad90abcdef 0badcafe76543210 ++selgr ++ ffffffffffffffff 0000000000000000 -> ffffffffffffffff 0000000000000000 ++ 0000000000000000 ffffffffffffffff -> 0000000000000000 ffffffffffffffff ++ 1234567890abcdef fedcba9876543210 -> 1234567890abcdef fedcba9876543210 ++selfhr ++ ffffffffffffffff 0000000000000000 -> ffffffff0badcafe 000000000cafebad ++ 0000000000000000 ffffffffffffffff -> 000000000badcafe ffffffff0cafebad ++ 1234567890abcdef fedcba9876543210 -> 123456780badcafe fedcba980cafebad ++mvcrl ++ abbcdefghijklmnopqrstuvwxyz-01234klmnopqrstuvwxyz-0123456789.ABC ++ DEFGHIJKLMNOPQRSTUVWXYZabcdefghi456789.ABCDEFGHIJKLMNOPQRSTUVWXY ++ Zabcdefghijklmnopqrstuvwxyz-0123456789.ABCDEFGHIJKLMNOPQRSTUVWXY ++ Zabcdefghijklmnopqrstuvwxyz-0123456789.ABCDEFGHIJKLMNOPQRSTUVWZ +diff --git a/none/tests/s390x/misc3.vgtest b/none/tests/s390x/misc3.vgtest +new file mode 100644 +index 000000000..d051a06bd +--- /dev/null ++++ b/none/tests/s390x/misc3.vgtest +@@ -0,0 +1 @@ ++prog: misc3 +-- +2.23.0 + +From 401b51d79886362d1962dc487db45ac91462eaa0 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 7 Apr 2021 12:29:32 +0200 +Subject: [PATCH 06/13] s390x: Vec-enh-2, extend VSL, VSRA, and VSRL + +The vector-enhancements facility 2 extends the existing bitwise vector +shift instructions VSL, VSRA, and VSRL. Now they allow the shift +vector (the third operand) to contain different shift amounts for each +byte. Add support for these new forms. +--- + VEX/priv/guest_s390_toIR.c | 58 ++++++++++++++++++++++++++++++-------- + 1 file changed, 47 insertions(+), 11 deletions(-) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 9f7d98f8c..622d5a02e 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -17983,30 +17983,66 @@ s390_irgen_VERLL(UChar v1, IRTemp op2addr, UChar v3, UChar m4) + static const HChar * + s390_irgen_VSL(UChar v1, UChar v2, UChar v3) + { +- IRTemp shift_amount = newTemp(Ity_I8); +- assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111))); +- +- put_vr_qw(v1, binop(Iop_ShlV128, get_vr_qw(v2), mkexpr(shift_amount))); ++ IRTemp a = newTemp(Ity_V128); ++ IRTemp b = newTemp(Ity_V128); ++ ++ assign(a, get_vr_qw(v2)); ++ assign(b, get_vr_qw(v3)); ++ ++ put_vr_qw(v1, ++ binop(Iop_OrV128, ++ binop(Iop_Shl8x16, mkexpr(a), mkexpr(b)), ++ binop(Iop_Shr8x16, ++ binop(Iop_Shr8x16, ++ binop(Iop_ShlV128, mkexpr(a), mkU8(8)), ++ unop(Iop_NotV128, mkexpr(b))), ++ unop(Iop_Dup8x16, mkU8(1))))); + return "vsl"; + } + + static const HChar * + s390_irgen_VSRL(UChar v1, UChar v2, UChar v3) + { +- IRTemp shift_amount = newTemp(Ity_I8); +- assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111))); ++ IRTemp a = newTemp(Ity_V128); ++ IRTemp b = newTemp(Ity_V128); + +- put_vr_qw(v1, binop(Iop_ShrV128, get_vr_qw(v2), mkexpr(shift_amount))); ++ assign(a, get_vr_qw(v2)); ++ assign(b, get_vr_qw(v3)); ++ ++ put_vr_qw(v1, ++ binop(Iop_OrV128, ++ binop(Iop_Shr8x16, mkexpr(a), mkexpr(b)), ++ binop(Iop_Shl8x16, ++ binop(Iop_Shl8x16, ++ binop(Iop_ShrV128, mkexpr(a), mkU8(8)), ++ unop(Iop_NotV128, mkexpr(b))), ++ unop(Iop_Dup8x16, mkU8(1))))); + return "vsrl"; + } + + static const HChar * + s390_irgen_VSRA(UChar v1, UChar v2, UChar v3) + { +- IRTemp shift_amount = newTemp(Ity_I8); +- assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111))); +- +- put_vr_qw(v1, binop(Iop_SarV128, get_vr_qw(v2), mkexpr(shift_amount))); ++ IRTemp a = newTemp(Ity_V128); ++ IRTemp b = newTemp(Ity_V128); ++ ++ assign(a, get_vr_qw(v2)); ++ assign(b, get_vr_qw(v3)); ++ ++ /* Shift-right: first byte arithmetically, all others logically */ ++ IRExpr* elems_shifted = ++ binop(Iop_Sar8x16, ++ binop(Iop_Shr8x16, mkexpr(a), ++ binop(Iop_AndV128, mkexpr(b), mkV128(0x7fff))), ++ binop(Iop_AndV128, mkexpr(b), mkV128(0x8000))); ++ /* Then OR the appropriate bits from the byte to the left */ ++ put_vr_qw(v1, ++ binop(Iop_OrV128, elems_shifted, ++ binop(Iop_Shl8x16, ++ binop(Iop_Shl8x16, ++ binop(Iop_ShrV128, mkexpr(a), mkU8(8)), ++ unop(Iop_NotV128, mkexpr(b))), ++ unop(Iop_Dup8x16, mkU8(1))))); + return "vsra"; + } + +-- +2.23.0 + +From 3fdf065d0bf26a02d6d93a812a6571a287379c36 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Thu, 11 Feb 2021 20:02:03 +0100 +Subject: [PATCH 07/13] s390x: Vec-enh-2, extend VCDG, VCDLG, VCGD, and VCLGD + +The vector-enhancements facility 2 extends the vector floating-point +conversion instructions VCDG, VCDLG, VCGD, and VCLGD. In addition to +64-bit elements, they now also handle 32-bit elements. Add support for +these new forms. +--- + VEX/priv/guest_s390_toIR.c | 36 ++++++++++++++++++++---------------- + 1 file changed, 20 insertions(+), 16 deletions(-) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 622d5a02e..11271a1c9 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -18794,44 +18794,48 @@ s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, Bool rounding, + static const HChar * + s390_irgen_VCDG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { +- s390_insn_assert("vcdg", m3 == 3); +- +- s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, True, +- v1, v2, m3, m4, m5); ++ s390_insn_assert("vcdg", m3 == 2 || m3 == 3); + ++ s390_vector_fp_convert(m3 == 2 ? Iop_I32StoF32 : Iop_I64StoF64, ++ m3 == 2 ? Ity_I32 : Ity_I64, ++ m3 == 2 ? Ity_F32 : Ity_F64, ++ True, v1, v2, m3, m4, m5); + return "vcdg"; + } + + static const HChar * + s390_irgen_VCDLG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { +- s390_insn_assert("vcdlg", m3 == 3); +- +- s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, True, +- v1, v2, m3, m4, m5); ++ s390_insn_assert("vcdlg", m3 == 2 || m3 == 3); + ++ s390_vector_fp_convert(m3 == 2 ? Iop_I32UtoF32 : Iop_I64UtoF64, ++ m3 == 2 ? Ity_I32 : Ity_I64, ++ m3 == 2 ? Ity_F32 : Ity_F64, ++ True, v1, v2, m3, m4, m5); + return "vcdlg"; + } + + static const HChar * + s390_irgen_VCGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { +- s390_insn_assert("vcgd", m3 == 3); +- +- s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, True, +- v1, v2, m3, m4, m5); ++ s390_insn_assert("vcgd", m3 == 2 || m3 == 3); + ++ s390_vector_fp_convert(m3 == 2 ? Iop_F32toI32S : Iop_F64toI64S, ++ m3 == 2 ? Ity_F32 : Ity_F64, ++ m3 == 2 ? Ity_I32 : Ity_I64, ++ True, v1, v2, m3, m4, m5); + return "vcgd"; + } + + static const HChar * + s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { +- s390_insn_assert("vclgd", m3 == 3); +- +- s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, True, +- v1, v2, m3, m4, m5); ++ s390_insn_assert("vclgd", m3 == 2 || m3 == 3); + ++ s390_vector_fp_convert(m3 == 2 ? Iop_F32toI32U : Iop_F64toI64U, ++ m3 == 2 ? Ity_F32 : Ity_F64, ++ m3 == 2 ? Ity_I32 : Ity_I64, ++ True, v1, v2, m3, m4, m5); + return "vclgd"; + } + +-- +2.23.0 + +From d195bf17388572e85474c7ded4b5bd0e4774637d Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 16 Feb 2021 16:19:31 +0100 +Subject: [PATCH 08/13] s390x: Vec-enh-2, VLBR and friends + +Add support for the new byte- and element-swapping vector load/store +instructions VLEBRH, VLEBRG, VLEBRF, VLLEBRZ, VLBRREP, VLBR, VLER, +VSTEBRH, VSTEBRG, VSTEBRF, VSTBR, and VSTER. +--- + VEX/priv/guest_s390_toIR.c | 256 +++++++++++++++++++++++++++++++++++++ + VEX/priv/host_s390_isel.c | 9 ++ + 2 files changed, 265 insertions(+) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 11271a1c9..f65b42705 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -19388,6 +19388,209 @@ s390_irgen_SELFHR(UChar r3, UChar m4, UChar r1, UChar r2) + return "selfhr"; + } + ++/* Helper function that byte-swaps each element of its V128 input operand */ ++static IRExpr * ++s390_byteswap_elements(IRExpr* v, UChar m) ++{ ++ static const ULong perm[4][2] = { ++ { 0x0100030205040706, 0x09080b0a0d0c0f0e }, /* 2-byte elements */ ++ { 0x0302010007060504, 0x0b0a09080f0e0d0c }, /* 4-byte elements */ ++ { 0x0706050403020100, 0x0f0e0d0c0b0a0908 }, /* 8-byte elements */ ++ { 0x0f0e0d0c0b0a0908, 0x0706050403020100 }, /* whole vector */ ++ }; ++ return binop(Iop_Perm8x16, v, binop(Iop_64HLtoV128, ++ mkU64(perm[m - 1][0]), ++ mkU64(perm[m - 1][1]))); ++} ++ ++/* Helper function that reverses the elements of its V128 input operand */ ++static IRExpr * ++s390_reverse_elements(IRExpr* v, UChar m) ++{ ++ static const ULong perm[3][2] = { ++ { 0x0e0f0c0d0a0b0809, 0x0607040502030001 }, /* 2-byte elements */ ++ { 0x0c0d0e0f08090a0b, 0x0405060700010203 }, /* 4-byte elements */ ++ { 0x08090a0b0c0d0e0f, 0x0001020304050607 }, /* 8-byte elements */ ++ }; ++ return binop(Iop_Perm8x16, v, binop(Iop_64HLtoV128, ++ mkU64(perm[m - 1][0]), ++ mkU64(perm[m - 1][1]))); ++} ++ ++static const HChar * ++s390_irgen_VLBR(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vlbr", m3 >= 1 && m3 <= 4); ++ put_vr_qw(v1, s390_byteswap_elements(load(Ity_V128, mkexpr(op2addr)), m3)); ++ return "vlbr"; ++} ++ ++static const HChar * ++s390_irgen_VSTBR(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vstbr", m3 >= 1 && m3 <= 4); ++ store(mkexpr(op2addr), s390_byteswap_elements(get_vr_qw(v1), m3)); ++ return "vstbr"; ++} ++ ++static const HChar * ++s390_irgen_VLER(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vler", m3 >= 1 && m3 <= 3); ++ put_vr_qw(v1, s390_reverse_elements(load(Ity_V128, mkexpr(op2addr)), m3)); ++ return "vler"; ++} ++ ++static const HChar * ++s390_irgen_VSTER(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vstbr", m3 >= 1 && m3 <= 4); ++ store(mkexpr(op2addr), s390_reverse_elements(get_vr_qw(v1), m3)); ++ return "vstbr"; ++} ++ ++/* Helper function that combines its two V128 operands by replacing element 'to' ++ in 'a' by byte-swapped element 'from' in 'b' */ ++static IRExpr * ++s390_insert_byteswapped(IRExpr* a, IRExpr* b, UChar m, UChar to, UChar from) ++{ ++ UInt elem_size = 1U << m; ++ UInt start = elem_size * to; ++ UInt end = start + elem_size - 1; ++ UInt offs = end + elem_size * from + 16; ++ UInt i; ++ ++ ULong permH = 0; ++ for (i = 0; i < 8; i++) { ++ permH = (permH << 8) | (i >= start && i <= end ? offs - i : i); ++ } ++ ULong permL = 0; ++ for (i = 8; i < 16; i++) { ++ permL = (permL << 8) | (i >= start && i <= end ? offs - i : i); ++ } ++ return triop(Iop_Perm8x16x2, a, b, binop(Iop_64HLtoV128, ++ mkU64(permH), mkU64(permL))); ++} ++ ++static const HChar * ++s390_irgen_VLEBRH(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vlebrh", m3 <= 7); ++ IRTemp op2 = newTemp(Ity_I16); ++ assign(op2, load(Ity_I16, mkexpr(op2addr))); ++ put_vr(v1, Ity_I16, m3, binop(Iop_Or16, ++ binop(Iop_Shl16, mkexpr(op2), mkU8(8)), ++ binop(Iop_Shr16, mkexpr(op2), mkU8(8)))); ++ return "vlebrh"; ++} ++ ++static const HChar * ++s390_irgen_VLEBRF(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vlebrf", m3 <= 3); ++ IRTemp op1 = newTemp(Ity_V128); ++ assign(op1, get_vr_qw(v1)); ++ IRTemp op2 = newTemp(Ity_I64); ++ assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkexpr(op2addr)))); ++ IRExpr* b = binop(Iop_64HLtoV128, mkexpr(op2), mkexpr(op2)); ++ put_vr_qw(v1, s390_insert_byteswapped(mkexpr(op1), b, 2, m3, 3)); ++ return "vlebrf"; ++} ++ ++static const HChar * ++s390_irgen_VLEBRG(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vlebrg", m3 <= 1); ++ IRTemp op1 = newTemp(Ity_V128); ++ assign(op1, get_vr_qw(v1)); ++ IRTemp op2 = newTemp(Ity_I64); ++ assign(op2, load(Ity_I64, mkexpr(op2addr))); ++ IRExpr* b = binop(Iop_64HLtoV128, mkexpr(op2), mkexpr(op2)); ++ put_vr_qw(v1, s390_insert_byteswapped(mkexpr(op1), b, 3, m3, 1)); ++ return "vlebrg"; ++} ++ ++static const HChar * ++s390_irgen_VLBRREP(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vlbrrep", m3 >= 1 && m3 <= 3); ++ static const ULong perm[3] = { ++ 0x0f0e0f0e0f0e0f0e, /* 2-byte element */ ++ 0x0f0e0d0c0f0e0d0c, /* 4-byte element */ ++ 0x0f0e0d0c0b0a0908 /* 8-byte element */ ++ }; ++ IRExpr* permHL = mkU64(perm[m3 - 1]); ++ IRTemp op2 = newTemp(Ity_I64); ++ if (m3 == 3) ++ assign(op2, load(Ity_I64, mkexpr(op2addr))); ++ else ++ assign(op2, unop(m3 == 2 ? Iop_32Uto64 : Iop_16Uto64, ++ load(s390_vr_get_type(m3), mkexpr(op2addr)))); ++ put_vr_qw(v1, binop(Iop_Perm8x16, ++ binop(Iop_64HLtoV128, mkexpr(op2), mkexpr(op2)), ++ binop(Iop_64HLtoV128, permHL, permHL))); ++ return "vlbrrep"; ++} ++ ++static const HChar * ++s390_irgen_VLLEBRZ(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vllebrz", (m3 >= 1 && m3 <= 3) || m3 == 6); ++ static const ULong perm[6] = { ++ 0x0000000000000f0e, /* 2-byte element */ ++ 0x000000000f0e0d0c, /* 4-byte element */ ++ 0x0f0e0d0c0b0a0908, /* 8-byte element */ ++ 0, /* invalid (4) */ ++ 0, /* invalid (5) */ ++ 0x0f0e0d0c00000000, /* 4-byte element, left-aligned */ ++ }; ++ IRExpr* permH = mkU64(perm[m3 - 1]); ++ IRTemp op2 = newTemp(Ity_I64); ++ if (m3 == 3) ++ assign(op2, load(Ity_I64, mkexpr(op2addr))); ++ else ++ assign(op2, unop((m3 & 3) == 2 ? Iop_32Uto64 : Iop_16Uto64, ++ load(s390_vr_get_type(m3 & 3), mkexpr(op2addr)))); ++ put_vr_qw(v1, binop(Iop_Perm8x16, ++ binop(Iop_64HLtoV128, mkU64(0), mkexpr(op2)), ++ binop(Iop_64HLtoV128, permH, mkU64(0)))); ++ return "vllebrz"; ++} ++ ++static const HChar * ++s390_irgen_VSTEBRH(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vstebrh", m3 <= 7); ++ IRTemp op1 = newTemp(Ity_I16); ++ assign(op1, get_vr(v1, Ity_I16, m3)); ++ store(mkexpr(op2addr), binop(Iop_Or16, ++ binop(Iop_Shl16, mkexpr(op1), mkU8(8)), ++ binop(Iop_Shr16, mkexpr(op1), mkU8(8)))); ++ return "vstebrh"; ++} ++ ++static const HChar * ++s390_irgen_VSTEBRF(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vstebrf", m3 <= 3); ++ IRTemp op1 = newTemp(Ity_V128); ++ assign(op1, get_vr_qw(v1)); ++ IRExpr* b = s390_insert_byteswapped(mkexpr(op1), mkexpr(op1), 2, 3, m3); ++ store(mkexpr(op2addr), unop(Iop_V128to32, b)); ++ return "vstebrf"; ++} ++ ++static const HChar * ++s390_irgen_VSTEBRG(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vstebrg", m3 <= 1); ++ IRTemp op1 = newTemp(Ity_V128); ++ assign(op1, get_vr_qw(v1)); ++ IRExpr* b = s390_insert_byteswapped(mkexpr(op1), mkexpr(op1), 3, 1, m3); ++ store(mkexpr(op2addr), unop(Iop_V128to64, b)); ++ return "vstebrg"; ++} ++ + /* New insns are added here. + If an insn is contingent on a facility being installed also + check whether the list of supported facilities in function +@@ -21003,6 +21206,59 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + RXY_x2(ovl), RXY_b2(ovl), + RXY_dl2(ovl), + RXY_dh2(ovl)); goto ok; ++ case 0xe60000000001ULL: s390_format_VRX_VRRDM(s390_irgen_VLEBRH, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000002ULL: s390_format_VRX_VRRDM(s390_irgen_VLEBRG, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000003ULL: s390_format_VRX_VRRDM(s390_irgen_VLEBRF, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000004ULL: s390_format_VRX_VRRDM(s390_irgen_VLLEBRZ, ++ VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000005ULL: s390_format_VRX_VRRDM(s390_irgen_VLBRREP, ++ VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000006ULL: s390_format_VRX_VRRDM(s390_irgen_VLBR, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000007ULL: s390_format_VRX_VRRDM(s390_irgen_VLER, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000009ULL: s390_format_VRX_VRRDM(s390_irgen_VSTEBRH, ++ VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe6000000000aULL: s390_format_VRX_VRRDM(s390_irgen_VSTEBRG, ++ VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe6000000000bULL: s390_format_VRX_VRRDM(s390_irgen_VSTEBRF, ++ VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe6000000000eULL: s390_format_VRX_VRRDM(s390_irgen_VSTBR, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe6000000000fULL: s390_format_VRX_VRRDM(s390_irgen_VSTER, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; + case 0xe60000000034ULL: /* VPKZ */ goto unimplemented; + case 0xe60000000035ULL: s390_format_VSI_URDV(s390_irgen_VLRL, VSI_v1(ovl), + VSI_b2(ovl), VSI_d2(ovl), +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index ee20c6711..06e195957 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -4189,6 +4189,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + return dst; + } + ++ case Iop_Perm8x16: ++ size = 16; ++ reg1 = s390_isel_vec_expr(env, arg1); ++ reg2 = s390_isel_vec_expr(env, arg2); ++ ++ addInstr(env, s390_insn_vec_triop(size, S390_VEC_PERM, ++ dst, reg1, reg1, reg2)); ++ return dst; ++ + case Iop_CmpEQ8x16: + size = 1; + vec_binop = S390_VEC_COMPARE_EQUAL; +-- +2.23.0 + +From f7447f4c73b2d0fb4eb3827c3709f378f6c9c656 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 23 Feb 2021 19:10:37 +0100 +Subject: [PATCH 09/13] s390x: Vec-enh-2, VSLD and VSRD + +Support the new "vector shift left/right double by bit" instructions VSLD +and VSRD. +--- + VEX/priv/guest_s390_toIR.c | 50 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 50 insertions(+) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index f65b42705..aa429d085 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -18228,6 +18228,48 @@ s390_irgen_VSLDB(UChar v1, UChar v2, UChar v3, UChar i4) + return "vsldb"; + } + ++static const HChar * ++s390_irgen_VSLD(UChar v1, UChar v2, UChar v3, UChar i4) ++{ ++ s390_insn_assert("vsld", i4 <= 7); ++ ++ if (i4 == 0) { ++ /* Just copy v2. */ ++ put_vr_qw(v1, get_vr_qw(v2)); ++ } else { ++ /* Concatenate v2's tail with v3's head. */ ++ put_vr_qw(v1, ++ binop(Iop_OrV128, ++ binop(Iop_ShlV128, get_vr_qw(v2), mkU8(i4)), ++ binop(Iop_ShrV128, get_vr_qw(v3), mkU8(128 - i4)) ++ ) ++ ); ++ } ++ ++ return "vsld"; ++} ++ ++static const HChar * ++s390_irgen_VSRD(UChar v1, UChar v2, UChar v3, UChar i4) ++{ ++ s390_insn_assert("vsrd", i4 <= 7); ++ ++ if (i4 == 0) { ++ /* Just copy v3. */ ++ put_vr_qw(v1, get_vr_qw(v3)); ++ } else { ++ /* Concatenate v2's tail with v3's head. */ ++ put_vr_qw(v1, ++ binop(Iop_OrV128, ++ binop(Iop_ShlV128, get_vr_qw(v2), mkU8(128 - i4)), ++ binop(Iop_ShrV128, get_vr_qw(v3), mkU8(i4)) ++ ) ++ ); ++ } ++ ++ return "vsrd"; ++} ++ + static const HChar * + s390_irgen_VMO(UChar v1, UChar v2, UChar v3, UChar m4) + { +@@ -21541,6 +21583,14 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + case 0xe70000000085ULL: s390_format_VRR_VVV(s390_irgen_VBPERM, VRR_v1(ovl), + VRR_v2(ovl), VRR_r3(ovl), + VRR_rxb(ovl)); goto ok; ++ case 0xe70000000086ULL: s390_format_VRId_VVVI(s390_irgen_VSLD, VRId_v1(ovl), ++ VRId_v2(ovl), VRId_v3(ovl), ++ VRId_i4(ovl), ++ VRId_rxb(ovl)); goto ok; ++ case 0xe70000000087ULL: s390_format_VRId_VVVI(s390_irgen_VSRD, VRId_v1(ovl), ++ VRId_v2(ovl), VRId_v3(ovl), ++ VRId_i4(ovl), ++ VRId_rxb(ovl)); goto ok; + case 0xe7000000008aULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRC, VRRd_v1(ovl), + VRRd_v2(ovl), VRRd_v3(ovl), + VRRd_v4(ovl), VRRd_m5(ovl), +-- +2.23.0 + +From 388082bca7146f8a15814798dbfe570af2aab2a9 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 10 Mar 2021 19:22:51 +0100 +Subject: [PATCH 10/13] s390x: Vec-enh-2, VSTRS + +Support the new "vector string search" instruction VSTRS. The +implementation is a full emulation and follows a similar approach as for +the other vector string instructions. +--- + VEX/priv/guest_s390_toIR.c | 104 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 104 insertions(+) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index aa429d085..46a867475 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -17601,6 +17601,105 @@ s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) + return "vstrc"; + } + ++static const HChar * ++s390_irgen_VSTRS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) ++{ ++ s390_insn_assert("vstrs", m5 <= 2 && m6 == (m6 & 2)); ++ ++ IRTemp op2 = newTemp(Ity_V128); ++ IRTemp op3 = newTemp(Ity_V128); ++ IRTemp op4 = newTemp(Ity_I8); ++ IRTemp op2clean = newTemp(Ity_V128); ++ IRTemp op3mask = newTemp(Ity_V128); ++ IRTemp result = newTemp(Ity_V128); ++ IRTemp ccnomatch = newTemp(Ity_I64); ++ IRExpr* tmp; ++ IRExpr* match = NULL; ++ UChar elem_bits = 8 << m5; ++ IROp cmpeq_op = S390_VEC_OP3(m5, Iop_CmpEQ8x16, ++ Iop_CmpEQ16x8, Iop_CmpEQ32x4); ++ ++ assign(op2, get_vr_qw(v2)); ++ assign(op3, get_vr_qw(v3)); ++ assign(op4, get_vr_b7(v4)); ++ ++ tmp = unop(Iop_Dup32x4, ++ unop(Iop_1Sto32, binop(Iop_CmpNE8, mkexpr(op4), mkU8(16)))); ++ tmp = binop(Iop_ShrV128, tmp, binop(Iop_Shl8, mkexpr(op4), mkU8(3))); ++ ++ if (s390_vr_is_zs_set(m6)) { ++ IRTemp op2eos = newTemp(Ity_V128); ++ IRExpr* t; ++ t = binop(cmpeq_op, mkexpr(op2), mkV128(0)); ++ for (UChar i = m5; i < 4; i++) { ++ IRTemp s = newTemp(Ity_V128); ++ assign(s, t); ++ t = binop(Iop_OrV128, mkexpr(s), binop(Iop_ShrV128, mkexpr(s), ++ mkU8(8 << i))); ++ } ++ assign(op2eos, t); ++ assign(op2clean, binop(Iop_AndV128, mkexpr(op2), ++ unop(Iop_NotV128, mkexpr(op2eos)))); ++ assign(ccnomatch, binop(Iop_And64, mkU64(1), ++ unop(Iop_V128to64, mkexpr(op2eos)))); ++ ++ t = binop(cmpeq_op, mkexpr(op3), mkV128(0)); ++ for (UChar i = m5; i < 4; i++) { ++ IRTemp s = newTemp(Ity_V128); ++ assign(s, t); ++ t = binop(Iop_OrV128, mkexpr(s), binop(Iop_ShrV128, mkexpr(s), ++ mkU8(8 << i))); ++ } ++ tmp = binop(Iop_OrV128, tmp, t); ++ } else { ++ assign(op2clean, mkexpr(op2)); ++ } ++ assign(op3mask, unop(Iop_NotV128, tmp)); ++ ++ for (UChar shift = 0; shift < 128; shift += elem_bits) { ++ IRTemp s = newTemp(Ity_V128); ++ tmp = unop(Iop_NotV128, ++ binop(cmpeq_op, mkexpr(op2clean), ++ binop(Iop_ShrV128, mkexpr(op3), mkU8(shift)))); ++ assign(s, binop(Iop_CmpEQ64x2, mkV128(0), ++ binop(Iop_AndV128, mkexpr(op3mask), ++ binop(Iop_ShlV128, tmp, mkU8(shift))))); ++ tmp = mkexpr(s); ++ if (shift < 64) { ++ tmp = binop(Iop_AndV128, tmp, ++ unop(Iop_Dup16x8, binop(Iop_GetElem16x8, tmp, mkU8(4)))); ++ } ++ tmp = binop(Iop_AndV128, tmp, ++ unop(Iop_Dup16x8, mkU16(1 << (15 - shift / 8)))); ++ if (shift) ++ match = binop(Iop_OrV128, mkexpr(mktemp(Ity_V128, match)), tmp); ++ else ++ match = tmp; ++ } ++ assign(result, unop(Iop_ClzNat64, ++ binop(Iop_Or64, ++ unop(Iop_V128HIto64, match), ++ mkU64((1UL << 48) - 1)))); ++ put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0))); ++ ++ /* Set condition code. ++ 0: no match, no string terminator in op2 ++ 1: no match, string terminator found ++ 2: full match ++ 3: partial match */ ++ IRTemp cc = newTemp(Ity_I64); ++ tmp = binop(Iop_CmpLE64U, ++ binop(Iop_Add64, mkexpr(result), unop(Iop_8Uto64, mkexpr(op4))), ++ mkU64(16)); ++ assign(cc, mkite(binop(Iop_CmpEQ64, mkexpr(result), mkU64(16)), ++ s390_vr_is_zs_set(m6) ? mkexpr(ccnomatch) : mkU64(0), ++ mkite(tmp, mkU64(2), mkU64(3)))); ++ s390_cc_set(cc); ++ ++ dis_res->hint = Dis_HintVerbose; ++ return "vstrs"; ++} ++ + static const HChar * + s390_irgen_VNC(UChar v1, UChar v2, UChar v3) + { +@@ -21596,6 +21695,11 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + VRRd_v4(ovl), VRRd_m5(ovl), + VRRd_m6(ovl), + VRRd_rxb(ovl)); goto ok; ++ case 0xe7000000008bULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRS, VRRd_v1(ovl), ++ VRRd_v2(ovl), VRRd_v3(ovl), ++ VRRd_v4(ovl), VRRd_m5(ovl), ++ VRRd_m6(ovl), ++ VRRd_rxb(ovl)); goto ok; + case 0xe7000000008cULL: s390_format_VRR_VVVV(s390_irgen_VPERM, VRR_v1(ovl), + VRR_v2(ovl), VRR_r3(ovl), + VRR_m4(ovl), VRR_rxb(ovl)); goto ok; +-- +2.23.0 + +From 8a079b405467fa127c6c311d7ae3c649e76106c6 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 16 Feb 2021 17:52:09 +0100 +Subject: [PATCH 11/13] s390x: Mark arch13 features as supported + +Make the STFLE instruction report the miscellaneous-instruction-extensions +facility 3 and the vector-enhancements facility 2 as supported. Indicate +support for the latter in the HWCAP vector as well. +--- + VEX/priv/guest_s390_helpers.c | 9 +++------ + coregrind/m_initimg/initimg-linux.c | 3 ++- + include/vki/vki-s390x-linux.h | 1 + + 3 files changed, 6 insertions(+), 7 deletions(-) + +diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c +index 1e04f601a..804b92a29 100644 +--- a/VEX/priv/guest_s390_helpers.c ++++ b/VEX/priv/guest_s390_helpers.c +@@ -356,9 +356,7 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr) + | s390_stfle_range(51, 55) + /* 56: unassigned */ + /* 57: MSA5, not supported */ +- | s390_stfle_range(58, 60) +- /* 61: miscellaneous-instruction 3, not supported */ +- | s390_stfle_range(62, 63)), ++ | s390_stfle_range(58, 63)), + + /* === 64 .. 127 === */ + (s390_stfle_range(64, 72) +@@ -384,11 +382,10 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr) + /* 143: unassigned */ + | s390_stfle_range(144, 145) + /* 146: MSA8, not supported */ +- | s390_stfle_range(147, 147) +- /* 148: vector-enhancements 2, not supported */ +- | s390_stfle_range(149, 149) ++ | s390_stfle_range(147, 149) + /* 150: unassigned */ + /* 151: DEFLATE-conversion, not supported */ ++ /* 152: vector packed decimal enhancement, not supported */ + /* 153: unassigned */ + /* 154: unassigned */ + /* 155: MSA9, not supported */ +diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c +index fc1a32ecf..37d005168 100644 +--- a/coregrind/m_initimg/initimg-linux.c ++++ b/coregrind/m_initimg/initimg-linux.c +@@ -703,7 +703,8 @@ Addr setup_client_stack( void* init_sp, + itself, is not supported by Valgrind. */ + auxv->u.a_val &= ((VKI_HWCAP_S390_TE - 1) + | VKI_HWCAP_S390_VXRS +- | VKI_HWCAP_S390_VXRS_EXT); ++ | VKI_HWCAP_S390_VXRS_EXT ++ | VKI_HWCAP_S390_VXRS_EXT2); + } + # elif defined(VGP_arm64_linux) + { +diff --git a/include/vki/vki-s390x-linux.h b/include/vki/vki-s390x-linux.h +index 4ab2d3334..71b363029 100644 +--- a/include/vki/vki-s390x-linux.h ++++ b/include/vki/vki-s390x-linux.h +@@ -807,6 +807,7 @@ typedef vki_s390_regs vki_elf_gregset_t; + #define VKI_HWCAP_S390_TE 1024 + #define VKI_HWCAP_S390_VXRS 2048 + #define VKI_HWCAP_S390_VXRS_EXT 8192 ++#define VKI_HWCAP_S390_VXRS_EXT2 32768 + + + //---------------------------------------------------------------------- +-- +2.23.0 + +From 1461d9b8d0b12e55b648fbf50c5dcee30785afa2 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Mon, 17 May 2021 15:34:15 +0200 +Subject: [PATCH 12/13] s390x: Vec-enh-2, test cases + +Add test cases for verifying the new/enhanced instructions in the +vector-enhancements facility 2. For "vector string search" VSTRS add a +memcheck test case. +--- + .gitignore | 2 + + memcheck/tests/s390x/Makefile.am | 3 +- + memcheck/tests/s390x/vstrs.c | 68 ++++++ + memcheck/tests/s390x/vstrs.stderr.exp | 16 ++ + memcheck/tests/s390x/vstrs.stdout.exp | 0 + memcheck/tests/s390x/vstrs.vgtest | 2 + + none/tests/s390x/Makefile.am | 3 +- + none/tests/s390x/vec2.c | 314 ++++++++++++++++++++++++++ + none/tests/s390x/vec2.stderr.exp | 2 + + none/tests/s390x/vec2.stdout.exp | 168 ++++++++++++++ + none/tests/s390x/vec2.vgtest | 2 + + tests/s390x_features.c | 4 + + 12 files changed, 582 insertions(+), 2 deletions(-) + create mode 100644 memcheck/tests/s390x/vstrs.c + create mode 100644 memcheck/tests/s390x/vstrs.stderr.exp + create mode 100644 memcheck/tests/s390x/vstrs.stdout.exp + create mode 100644 memcheck/tests/s390x/vstrs.vgtest + create mode 100644 none/tests/s390x/vec2.c + create mode 100644 none/tests/s390x/vec2.stderr.exp + create mode 100644 none/tests/s390x/vec2.stdout.exp + create mode 100644 none/tests/s390x/vec2.vgtest + +diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am +index d183841ef..668fd9933 100644 +--- a/memcheck/tests/s390x/Makefile.am ++++ b/memcheck/tests/s390x/Makefile.am +@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am + + dist_noinst_SCRIPTS = filter_stderr + +-INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr ++INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr vstrs + + check_PROGRAMS = $(INSN_TESTS) + +@@ -18,3 +18,4 @@ AM_CCASFLAGS += @FLAG_M64@ + vstrc_CFLAGS = $(AM_CFLAGS) -march=z13 + vfae_CFLAGS = $(AM_CFLAGS) -march=z13 + vistr_CFLAGS = $(AM_CFLAGS) -march=z13 ++vstrs_CFLAGS = $(AM_CFLAGS) -march=z13 +diff --git a/memcheck/tests/s390x/vstrs.c b/memcheck/tests/s390x/vstrs.c +new file mode 100644 +index 000000000..3354c2e53 +--- /dev/null ++++ b/memcheck/tests/s390x/vstrs.c +@@ -0,0 +1,68 @@ ++#include ++#include ++ ++#define VECTOR __attribute__ ((vector_size (16))) ++ ++typedef char VECTOR char_v; ++ ++volatile char tmp; ++static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV"; ++ ++static char_v to_char_vec(const char *str) ++{ ++ char buf[17]; ++ char_v v; ++ ++ for (int i = 0; i < sizeof(buf); i++) { ++ char ch = str[i]; ++ if (ch == '\0') ++ break; ++ else if (ch == '$') ++ buf[i] = '\0'; ++ else if (ch != '~') ++ buf[i] = ch; ++ } ++ v = *(char_v *) buf; ++ return v; ++} ++ ++static void test_vstrs_char(const char *haystack, const char *needle, ++ int expect_res, int expect_cc) ++{ ++ int cc; ++ char_v v2val = to_char_vec(haystack); ++ char_v v3val = to_char_vec(needle); ++ ++ register unsigned long VECTOR v4 __asm__("v4") = { strlen(needle), 0 }; ++ register char_v v1 __asm__("v1"); ++ register char_v v2 __asm__("v2") = v2val; ++ register char_v v3 __asm__("v3") = v3val; ++ ++ __asm__( ++ "cr 0,0\n\t" /* Clear CC */ ++ ".short 0xe712,0x3020,0x408b\n\t" /* vstrs %v1,%v2,%v3,%v4,0,2 */ ++ "ipm %[cc]\n\t" ++ "srl %[cc],28" ++ : "=v" (v1), [cc] "=d" (cc) ++ : "v" (v2), "v" (v3), "v" (v4) ++ : "cc"); ++ ++ tmp = hex_digit[v1[7] & 0x1f]; ++ if (expect_res >= 0 && v1[7] != expect_res) ++ printf("result %u != %d\n", v1[7], expect_res); ++ ++ tmp = hex_digit[cc & 0xf]; ++ if (expect_cc >= 0 && cc != expect_cc) ++ printf("CC %d != %d\n", cc, expect_cc); ++} ++ ++int main() ++{ ++ test_vstrs_char("haystack$needle", "needle$haystack", 16, 1); ++ test_vstrs_char("haystack, needle", "needle, haystack", 10, 3); ++ test_vstrs_char("ABCDEFGH", "DEFGHI", -1, -1); ++ test_vstrs_char("match in UNDEF", "UN", 9, 2); ++ test_vstrs_char("after ~ UNDEF", "DEF", -1, -1); ++ test_vstrs_char("", "", 0, 2); ++ return 0; ++} +diff --git a/memcheck/tests/s390x/vstrs.stderr.exp b/memcheck/tests/s390x/vstrs.stderr.exp +new file mode 100644 +index 000000000..c5c3ef705 +--- /dev/null ++++ b/memcheck/tests/s390x/vstrs.stderr.exp +@@ -0,0 +1,16 @@ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrs_char (vstrs.c:50) ++ by 0x........: main (vstrs.c:63) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrs_char (vstrs.c:54) ++ by 0x........: main (vstrs.c:63) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrs_char (vstrs.c:50) ++ by 0x........: main (vstrs.c:65) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrs_char (vstrs.c:54) ++ by 0x........: main (vstrs.c:65) ++ +diff --git a/memcheck/tests/s390x/vstrs.stdout.exp b/memcheck/tests/s390x/vstrs.stdout.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/memcheck/tests/s390x/vstrs.vgtest b/memcheck/tests/s390x/vstrs.vgtest +new file mode 100644 +index 000000000..fd2a29873 +--- /dev/null ++++ b/memcheck/tests/s390x/vstrs.vgtest +@@ -0,0 +1,2 @@ ++prog: vstrs ++vgopts: -q +diff --git a/none/tests/s390x/Makefile.am b/none/tests/s390x/Makefile.am +index 2fd45ec1e..ca38db935 100644 +--- a/none/tests/s390x/Makefile.am ++++ b/none/tests/s390x/Makefile.am +@@ -20,7 +20,7 @@ INSN_TESTS = clc clcle cvb cvd icm lpr tcxb lam_stam xc mvst add sub mul \ + spechelper-icm-1 spechelper-icm-2 spechelper-tmll \ + spechelper-tm laa vector lsc2 ppno vector_string vector_integer \ + vector_float add-z14 sub-z14 mul-z14 bic \ +- misc3 ++ misc3 vec2 + + if BUILD_DFP_TESTS + INSN_TESTS += dfp-1 dfp-2 dfp-3 dfp-4 dfptest dfpext dfpconv srnmt pfpo +@@ -74,3 +74,4 @@ lsc2_CFLAGS = -march=z13 -DS390_TESTS_NOCOLOR + vector_string_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=5 + vector_integer_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=4 + vector_float_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=4 ++vec2_CFLAGS = $(AM_CFLAGS) -march=z13 +diff --git a/none/tests/s390x/vec2.c b/none/tests/s390x/vec2.c +new file mode 100644 +index 000000000..73b04dee4 +--- /dev/null ++++ b/none/tests/s390x/vec2.c +@@ -0,0 +1,314 @@ ++#include ++ ++#define VECTOR __attribute__ ((vector_size (16))) ++ ++typedef unsigned long VECTOR ulong_v; ++typedef float VECTOR float_v; ++ ++static const ulong_v vec_a = { 0x0123456789abcdef, 0xfedcba9876543210 }; ++static const ulong_v vec_b = { 0xfedcba9876543210, 0x0123456789abcdef }; ++static const ulong_v vec_c = { 0x8040201008040201, 0x7fbfdfeff7fbfdfe }; ++static const ulong_v vec_one = { -1, -1 }; ++static const ulong_v vec_ini = { 0x0112233445566778, 0x899aabbccddeeff0 }; ++ ++static const float_v vec_fa = { 16777215., -16777215., 42.5, 10000. }; ++static const float_v vec_fb = { 4., 3., 2., 1. }; ++ ++/* -- Vector shift -- */ ++ ++#define TEST_GENERATE(insn) \ ++ static void test_##insn(ulong_v a, ulong_v b) \ ++ { \ ++ ulong_v out; \ ++ __asm__( \ ++ #insn " %[out],%[a],%[b]" \ ++ : [out] "=v" (out) \ ++ : [a] "v" (a), \ ++ [b] "v" (b) \ ++ : ); \ ++ printf("\t%016lx %016lx\n", out[0], out[1]); \ ++ } ++ ++#define TEST_EXEC(insn) \ ++ do { \ ++ puts(#insn); \ ++ test_##insn(vec_a, vec_b); \ ++ test_##insn(vec_b, vec_a); \ ++ test_##insn(vec_c, vec_a); \ ++ test_##insn(vec_one, vec_b); \ ++ } while (0) ++ ++#define INSNS \ ++ XTEST(vsl); \ ++ XTEST(vsrl); \ ++ XTEST(vsra); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_single_bitshifts() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++#undef INSNS ++#undef TEST_EXEC ++#undef TEST_GENERATE ++ ++/* -- Vector load element-/byte-swapped -- */ ++ ++#define TEST_EXEC(opc1,opc2,insn,m3) \ ++ do { \ ++ puts(#insn " " #m3); \ ++ test_##insn##_##m3(vec_a); \ ++ test_##insn##_##m3(vec_b); \ ++ } while (0) ++ ++#define TEST_GENERATE(opc1,opc2,insn,m3) \ ++ static void test_##insn##_##m3(ulong_v a) \ ++ { \ ++ ulong_v out = vec_ini; \ ++ __asm__( \ ++ ".insn vrx,0x" #opc1 "00000000" #opc2 ",%[out],%[a]," #m3 \ ++ : [out] "+v" (out) \ ++ : [a] "R" (a) \ ++ : ); \ ++ printf("\t%016lx %016lx\n", out[0], out[1]); \ ++ } ++ ++#define INSNS \ ++ XTEST(e6,01, vlebrh, 0); \ ++ XTEST(e6,01, vlebrh, 7); \ ++ XTEST(e6,01, vlebrh, 2); \ ++ XTEST(e6,03, vlebrf, 0); \ ++ XTEST(e6,03, vlebrf, 3); \ ++ XTEST(e6,03, vlebrf, 1); \ ++ XTEST(e6,02, vlebrg, 0); \ ++ XTEST(e6,02, vlebrg, 1); \ ++ XTEST(e6,04, vllebrz, 1); \ ++ XTEST(e6,04, vllebrz, 2); \ ++ XTEST(e6,04, vllebrz, 3); \ ++ XTEST(e6,04, vllebrz, 6); \ ++ XTEST(e6,05, vlbrrep, 1); \ ++ XTEST(e6,05, vlbrrep, 2); \ ++ XTEST(e6,05, vlbrrep, 3); \ ++ XTEST(e6,06, vlbr, 1); \ ++ XTEST(e6,06, vlbr, 2); \ ++ XTEST(e6,06, vlbr, 3); \ ++ XTEST(e6,06, vlbr, 4); \ ++ XTEST(e6,07, vler, 1); \ ++ XTEST(e6,07, vler, 2); \ ++ XTEST(e6,07, vler, 3); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_swapped_loads() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++ ++#undef INSNS ++#undef TEST_GENERATE ++ ++/* -- Vector store element-/byte-swapped -- */ ++ ++#define TEST_GENERATE(opc1,opc2,insn,m3) \ ++ static void test_##insn##_##m3(ulong_v a) \ ++ { \ ++ ulong_v out = vec_ini; \ ++ __asm__( \ ++ ".insn vrx,0x" #opc1 "00000000" #opc2 ",%[a],%[out]," #m3 \ ++ : [out] "+R" (out) \ ++ : [a] "v" (a) \ ++ : ); \ ++ printf("\t%016lx %016lx\n", out[0], out[1]); \ ++ } ++ ++#define INSNS \ ++ XTEST(e6,09, vstebrh, 0); \ ++ XTEST(e6,09, vstebrh, 7); \ ++ XTEST(e6,09, vstebrh, 2); \ ++ XTEST(e6,0b, vstebrf, 0); \ ++ XTEST(e6,0b, vstebrf, 3); \ ++ XTEST(e6,0b, vstebrf, 1); \ ++ XTEST(e6,0a, vstebrg, 0); \ ++ XTEST(e6,0a, vstebrg, 1); \ ++ XTEST(e6,0e, vstbr, 1); \ ++ XTEST(e6,0e, vstbr, 2); \ ++ XTEST(e6,0e, vstbr, 3); \ ++ XTEST(e6,0e, vstbr, 4); \ ++ XTEST(e6,0f, vster, 1); \ ++ XTEST(e6,0f, vster, 2); \ ++ XTEST(e6,0f, vster, 3); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_swapped_stores() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++ ++#undef INSNS ++#undef TEST_EXEC ++#undef TEST_GENERATE ++ ++/* -- Vector shift double by bit -- */ ++ ++#define TEST_GENERATE(opc1,opc2,insn,i4) \ ++ static void test_##insn##_##i4(ulong_v a, ulong_v b) \ ++ { \ ++ ulong_v out = vec_ini; \ ++ __asm__( \ ++ ".insn vrr,0x" #opc1 "00000000" #opc2 \ ++ ",%[out],%[a],%[b],0," #i4 ",0" \ ++ : [out] "+v" (out) \ ++ : [a] "v" (a), \ ++ [b] "v" (b) \ ++ : ); \ ++ printf("\t%016lx %016lx\n", out[0], out[1]); \ ++ } ++ ++#define TEST_EXEC(opc1,opc2,insn,i4) \ ++ do { \ ++ puts(#insn " " #i4); \ ++ test_##insn##_##i4(vec_a, vec_one); \ ++ test_##insn##_##i4(vec_b, vec_a); \ ++ } while (0) ++ ++#define INSNS \ ++ XTEST(e7,86,vsld,0); \ ++ XTEST(e7,86,vsld,7); \ ++ XTEST(e7,86,vsld,4); \ ++ XTEST(e7,87,vsrd,0); \ ++ XTEST(e7,87,vsrd,7); \ ++ XTEST(e7,87,vsrd,4); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_double_bitshifts() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++ ++#undef INSNS ++#undef TEST_EXEC ++#undef TEST_GENERATE ++ ++/* -- Vector integer -> FP conversions -- */ ++ ++#define TEST_GENERATE(opc1,opc2,insn,m4) \ ++ static void test_##insn##_##m4(ulong_v a) \ ++ { \ ++ float_v out; \ ++ __asm__( \ ++ ".insn vrr,0x" #opc1 "00000000" #opc2 \ ++ ",%[out],%[a],0,2," #m4 ",0" \ ++ : [out] "=v" (out) \ ++ : [a] "v" (a) \ ++ : ); \ ++ if (m4 & 8) \ ++ printf("\t%a - - -\n", out[0]); \ ++ else \ ++ printf("\t%a %a %a %a\n", out[0], out[1], out[2], out[3]); \ ++ } ++ ++#define TEST_EXEC(opc1,opc2,insn,m4) \ ++ do { \ ++ puts(#insn " " #m4); \ ++ test_##insn##_##m4(vec_a); \ ++ test_##insn##_##m4(vec_c); \ ++ } while (0) ++ ++#define INSNS \ ++ XTEST(e7,c1,vcfpl,0); \ ++ XTEST(e7,c1,vcfpl,8); \ ++ XTEST(e7,c3,vcfps,0); \ ++ XTEST(e7,c3,vcfps,8); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_int_fp_conversions() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++ ++#undef INSNS ++#undef TEST_EXEC ++#undef TEST_GENERATE ++ ++/* -- Vector FP -> integer conversions -- */ ++ ++#define TEST_GENERATE(opc1,opc2,insn,m4) \ ++ static void test_##insn##_##m4(float_v a) \ ++ { \ ++ unsigned int VECTOR out; \ ++ __asm__( \ ++ ".insn vrr,0x" #opc1 "00000000" #opc2 \ ++ ",%[out],%[a],0,2," #m4 ",0" \ ++ : [out] "=v" (out) \ ++ : [a] "v" (a) \ ++ : ); \ ++ if (m4 & 8) \ ++ printf("\t%08x - - -\n", out[0]); \ ++ else \ ++ printf("\t%08x %08x %08x %08x\n", \ ++ out[0], out[1], out[2], out[3]); \ ++ } ++ ++#define TEST_EXEC(opc1,opc2,insn,m4) \ ++ do { \ ++ puts(#insn " " #m4); \ ++ test_##insn##_##m4(vec_fa); \ ++ test_##insn##_##m4(vec_fb); \ ++ } while (0) ++ ++#define INSNS \ ++ XTEST(e7,c0,vclfp,0); \ ++ XTEST(e7,c0,vclfp,8); \ ++ XTEST(e7,c2,vcsfp,0); \ ++ XTEST(e7,c2,vcsfp,8); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_fp_int_conversions() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++ ++#undef INSNS ++#undef TEST_EXEC ++#undef TEST_GENERATE ++ ++ ++int main() ++{ ++ test_all_single_bitshifts(); ++ test_all_swapped_loads(); ++ test_all_swapped_stores(); ++ test_all_double_bitshifts(); ++ test_all_int_fp_conversions(); ++ test_all_fp_int_conversions(); ++ return 0; ++} +diff --git a/none/tests/s390x/vec2.stderr.exp b/none/tests/s390x/vec2.stderr.exp +new file mode 100644 +index 000000000..139597f9c +--- /dev/null ++++ b/none/tests/s390x/vec2.stderr.exp +@@ -0,0 +1,2 @@ ++ ++ +diff --git a/none/tests/s390x/vec2.stdout.exp b/none/tests/s390x/vec2.stdout.exp +new file mode 100644 +index 000000000..b32cbe1bc +--- /dev/null ++++ b/none/tests/s390x/vec2.stdout.exp +@@ -0,0 +1,168 @@ ++vsl ++ 483415676abc37ef fde5533beca14200 ++ fde5533beca14200 483415676abc37ef ++ 00010204102040bf effd7feffebff7fe ++ ffffffffffffffff ffffffffffffff80 ++vsrl ++ 0012d1679e9af3ef ffdbe5753bcaa164 ++ 7fdbe5753bcaa164 4012d1679e9af3ef ++ 4008014004002004 05fbf7efbf7ffffe ++ 03ffffffffffffff ffffffffffffffff ++vsra ++ 0012d1679e9af3ef ffdbe5753bcaa164 ++ ffdbe5753bcaa164 4012d1679e9af3ef ++ c008014004002004 05fbf7efbf7ffffe ++ ffffffffffffffff ffffffffffffffff ++vlebrh 0 ++ 2301233445566778 899aabbccddeeff0 ++ dcfe233445566778 899aabbccddeeff0 ++vlebrh 7 ++ 0112233445566778 899aabbccdde2301 ++ 0112233445566778 899aabbccddedcfe ++vlebrh 2 ++ 0112233423016778 899aabbccddeeff0 ++ 01122334dcfe6778 899aabbccddeeff0 ++vlebrf 0 ++ 6745230145566778 899aabbccddeeff0 ++ 98badcfe45566778 899aabbccddeeff0 ++vlebrf 3 ++ 0112233445566778 899aabbc67452301 ++ 0112233445566778 899aabbc98badcfe ++vlebrf 1 ++ 0112233467452301 899aabbccddeeff0 ++ 0112233498badcfe 899aabbccddeeff0 ++vlebrg 0 ++ efcdab8967452301 899aabbccddeeff0 ++ 1032547698badcfe 899aabbccddeeff0 ++vlebrg 1 ++ 0112233445566778 efcdab8967452301 ++ 0112233445566778 1032547698badcfe ++vllebrz 1 ++ 0000000000002301 0000000000000000 ++ 000000000000dcfe 0000000000000000 ++vllebrz 2 ++ 0000000067452301 0000000000000000 ++ 0000000098badcfe 0000000000000000 ++vllebrz 3 ++ efcdab8967452301 0000000000000000 ++ 1032547698badcfe 0000000000000000 ++vllebrz 6 ++ 6745230100000000 0000000000000000 ++ 98badcfe00000000 0000000000000000 ++vlbrrep 1 ++ 2301230123012301 2301230123012301 ++ dcfedcfedcfedcfe dcfedcfedcfedcfe ++vlbrrep 2 ++ 6745230167452301 6745230167452301 ++ 98badcfe98badcfe 98badcfe98badcfe ++vlbrrep 3 ++ efcdab8967452301 efcdab8967452301 ++ 1032547698badcfe 1032547698badcfe ++vlbr 1 ++ 23016745ab89efcd dcfe98ba54761032 ++ dcfe98ba54761032 23016745ab89efcd ++vlbr 2 ++ 67452301efcdab89 98badcfe10325476 ++ 98badcfe10325476 67452301efcdab89 ++vlbr 3 ++ efcdab8967452301 1032547698badcfe ++ 1032547698badcfe efcdab8967452301 ++vlbr 4 ++ 1032547698badcfe efcdab8967452301 ++ efcdab8967452301 1032547698badcfe ++vler 1 ++ 32107654ba98fedc cdef89ab45670123 ++ cdef89ab45670123 32107654ba98fedc ++vler 2 ++ 76543210fedcba98 89abcdef01234567 ++ 89abcdef01234567 76543210fedcba98 ++vler 3 ++ fedcba9876543210 0123456789abcdef ++ 0123456789abcdef fedcba9876543210 ++vstebrh 0 ++ 2301233445566778 899aabbccddeeff0 ++ dcfe233445566778 899aabbccddeeff0 ++vstebrh 7 ++ 1032233445566778 899aabbccddeeff0 ++ efcd233445566778 899aabbccddeeff0 ++vstebrh 2 ++ ab89233445566778 899aabbccddeeff0 ++ 5476233445566778 899aabbccddeeff0 ++vstebrf 0 ++ 6745230145566778 899aabbccddeeff0 ++ 98badcfe45566778 899aabbccddeeff0 ++vstebrf 3 ++ 1032547645566778 899aabbccddeeff0 ++ efcdab8945566778 899aabbccddeeff0 ++vstebrf 1 ++ efcdab8945566778 899aabbccddeeff0 ++ 1032547645566778 899aabbccddeeff0 ++vstebrg 0 ++ efcdab8967452301 899aabbccddeeff0 ++ 1032547698badcfe 899aabbccddeeff0 ++vstebrg 1 ++ 1032547698badcfe 899aabbccddeeff0 ++ efcdab8967452301 899aabbccddeeff0 ++vstbr 1 ++ 23016745ab89efcd dcfe98ba54761032 ++ dcfe98ba54761032 23016745ab89efcd ++vstbr 2 ++ 67452301efcdab89 98badcfe10325476 ++ 98badcfe10325476 67452301efcdab89 ++vstbr 3 ++ efcdab8967452301 1032547698badcfe ++ 1032547698badcfe efcdab8967452301 ++vstbr 4 ++ 1032547698badcfe efcdab8967452301 ++ efcdab8967452301 1032547698badcfe ++vster 1 ++ 32107654ba98fedc cdef89ab45670123 ++ cdef89ab45670123 32107654ba98fedc ++vster 2 ++ 76543210fedcba98 89abcdef01234567 ++ 89abcdef01234567 76543210fedcba98 ++vster 3 ++ fedcba9876543210 0123456789abcdef ++ 0123456789abcdef fedcba9876543210 ++vsld 0 ++ 0123456789abcdef fedcba9876543210 ++ fedcba9876543210 0123456789abcdef ++vsld 7 ++ 91a2b3c4d5e6f7ff 6e5d4c3b2a19087f ++ 6e5d4c3b2a190800 91a2b3c4d5e6f780 ++vsld 4 ++ 123456789abcdeff edcba9876543210f ++ edcba98765432100 123456789abcdef0 ++vsrd 0 ++ ffffffffffffffff ffffffffffffffff ++ 0123456789abcdef fedcba9876543210 ++vsrd 7 ++ 21ffffffffffffff ffffffffffffffff ++ de02468acf13579b dffdb97530eca864 ++vsrd 4 ++ 0fffffffffffffff ffffffffffffffff ++ f0123456789abcde ffedcba987654321 ++vcfpl 0 ++ 0x1.234568p+24 0x1.13579cp+31 0x1.fdb976p+31 0x1.d950c8p+30 ++ 0x1.00804p+31 0x1.00804p+27 0x1.feff8p+30 0x1.eff7fcp+31 ++vcfpl 8 ++ 0x1.234568p+24 - - - ++ 0x1.00804p+31 - - - ++vcfps 0 ++ 0x1.234568p+24 -0x1.d950c8p+30 -0x1.234568p+24 0x1.d950c8p+30 ++ -0x1.feff8p+30 0x1.00804p+27 0x1.feff8p+30 -0x1.00804p+27 ++vcfps 8 ++ 0x1.234568p+24 - - - ++ -0x1.feff8p+30 - - - ++vclfp 0 ++ 00ffffff 00000000 0000002a 00002710 ++ 00000004 00000003 00000002 00000001 ++vclfp 8 ++ 00ffffff - - - ++ 00000004 - - - ++vcsfp 0 ++ 00ffffff ff000001 0000002a 00002710 ++ 00000004 00000003 00000002 00000001 ++vcsfp 8 ++ 00ffffff - - - ++ 00000004 - - - +diff --git a/none/tests/s390x/vec2.vgtest b/none/tests/s390x/vec2.vgtest +new file mode 100644 +index 000000000..45e942e64 +--- /dev/null ++++ b/none/tests/s390x/vec2.vgtest +@@ -0,0 +1,2 @@ ++prog: vec2 ++prereq: test -e vec2 && ../../../tests/s390x_features s390x-vx +diff --git a/tests/s390x_features.c b/tests/s390x_features.c +index 25b98f3a3..e7939c463 100644 +--- a/tests/s390x_features.c ++++ b/tests/s390x_features.c +@@ -270,6 +270,10 @@ static int go(char *feature, char *cpu) + match = facilities[0] & FAC_BIT(57); /* message security assist 5 facility */ + } else if (strcmp(feature, "s390x-mi2") == 0 ) { + match = facilities[0] & FAC_BIT(58); ++ } else if (strcmp(feature, "s390x-mi3") == 0 ) { ++ match = facilities[0] & FAC_BIT(61); ++ } else if (strcmp(feature, "s390x-vx2") == 0 ) { ++ match = facilities[2] & FAC_BIT(20); + } else { + return 2; // Unrecognised feature. + } +-- +2.23.0 + +From d9364bc90ee894c43ee742840f806571edc08ab3 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 18 May 2021 19:59:32 +0200 +Subject: [PATCH 13/13] s390x: Wrap up misc-insn-3 and vec-enh-2 support + +Wrap up support for the miscellaneous-instruction-extensions facility 3 +and the vector-enhancements facility 2: Add 'case' statements for the +remaining unhandled arch13 instructions to 'guest_s390_toIR.c', document +the new support in 's390-opcodes.csv', adjust 's390-check-opcodes.pl', and +announce the new feature in 'NEWS'. +--- + NEWS | 5 ++ + VEX/priv/guest_s390_toIR.c | 5 +- + auxprogs/s390-check-opcodes.pl | 22 ++++++++- + docs/internals/s390-opcodes.csv | 81 +++++++++++++++++++++++++++++++-- + 4 files changed, 108 insertions(+), 5 deletions(-) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 46a867475..1bd18f760 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -8,7 +8,7 @@ + This file is part of Valgrind, a dynamic binary instrumentation + framework. + +- Copyright IBM Corp. 2010-2020 ++ Copyright IBM Corp. 2010-2021 + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as +@@ -20503,6 +20503,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + RRE_r2(ovl)); goto ok; + case 0xb931: s390_format_RRE_RR(s390_irgen_CLGFR, RRE_r1(ovl), + RRE_r2(ovl)); goto ok; ++ case 0xb938: /* SORTL */ goto unimplemented; ++ case 0xb939: /* DFLTCC */ goto unimplemented; ++ case 0xb93a: /* KDSA */ goto unimplemented; + case 0xb93c: s390_format_RRE_RR(s390_irgen_PPNO, RRE_r1(ovl), + RRE_r2(ovl)); goto ok; + case 0xb93e: /* KIMD */ goto unimplemented; +-- +2.23.0 + diff --git a/valgrind.spec b/valgrind.spec index 08878c7..5870d1a 100644 --- a/valgrind.spec +++ b/valgrind.spec @@ -3,7 +3,7 @@ Summary: Tool for finding memory management bugs in programs Name: %{?scl_prefix}valgrind Version: 3.17.0 -Release: 5%{?dist} +Release: 6%{?dist} Epoch: 1 License: GPLv2+ URL: http://www.valgrind.org/ @@ -120,6 +120,39 @@ Patch10: valgrind-3.17.0-clone-parent-res.patch # workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100217 Patch11: valgrind-3.17.0-s390x-tests-z14-workaround.patch +# commit d74a637206ef5532ccd2ccb2e31ee2762f184e60 +# Bug 433863 - s390x: Remove memcheck test cases for cs, cds, and csg +# commit 18ddcc47c951427efd3b790ba2481159b9bd1598 +# s390x: Support "expensive" comparisons Iop_ExpCmpNE32/64 +# commit 5db3f929c43bf46f4707178706cfe90f43acdd19 +# s390x: Add convenience function mkV128() +# commit e78bd78d3043729033b426218ab8c6dae9c51e96 +# Bug 434296 - s390x: Rework IR conversion of VSTRC, VFAE, and VFEE +# commit 4f17a067c4f8245c05611d6e8aa36e8841bab376 +# Bug 434296 - s390x: Rework IR conversion of VFENE +# commit 9bd78ebd8bb5cd4ebb3f081ceba46836cc485551 +# Bug 434296 - s390x: Rework IR conversion of VISTR +# commit 32312d588b77c5b5b5a0145bb0cc6f795b447790 +# Bug 434296 - s390x: Add memcheck test cases for vector string insns +# commit a0bb049ace14ab52d386bb1d49a399f39eec4986 +# s390x: Improve handling of amodes without base register +# commit fd935e238d907d9c523a311ba795077d95ad6912 +# s390x: Rework insn "v-vdup" and add "v-vrep" +# commit 6c1cb1a0128b00858b973ef9344e12d6ddbaaf57 +# s390x: Add support for emitting "vector or with complement" +# commit 0bd4263326b2d48f782339a9bbe1a069c7de45c7 +# s390x: Fix/optimize Iop_64HLtoV128 +# commit cae5062b05b95e0303b1122a0ea9aadc197e4f0a +# s390x: Add missing stdout.exp for vector string memcheck test +Patch12: valgrind-3.17.0-s390-prep.patch + +# KDE#432387 - s390x: z15 instructions support +Patch13: valgrind-3.17.0-s390-z15.patch + +# commit 124ae6cfa303f0cc71ffd685620cb57c4f8f02bb +# s390x: Don't emit "vector or with complement" on z13 +Patch14: valgrind-3.17.0-s390-z13-vec-fix.patch + BuildRequires: make BuildRequires: glibc-devel @@ -268,6 +301,11 @@ Valgrind User Manual for details. %patch9 -p1 %patch11 -p1 +%patch12 -p1 +touch memcheck/tests/s390x/vistr.stdout.exp +%patch13 -p1 +%patch14 -p1 + %build # LTO triggers undefined symbols in valgrind. Valgrind has a --enable-lto # configure time option, but that doesn't seem to help. @@ -492,6 +530,11 @@ fi %endif %changelog +* Thu Jun 24 2021 Mark Wielaard - 3.17.0-6 +- Add valgrind-3.17.0-s390-prep.patch +- Add valgrind-3.17.0-s390-z15.patch +- Add valgrind-3.17.0-s390-z13-vec-fix.patch + * Mon Jun 7 2021 Mark Wielaard - 3.17.0-5 - Disable run_full_regtest.