2284 lines
77 KiB
Diff
2284 lines
77 KiB
Diff
|
commit d74a637206ef5532ccd2ccb2e31ee2762f184e60
|
||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||
|
Date: Wed Apr 28 18:52:30 2021 +0200
|
||
|
|
||
|
Bug 433863 - s390x: Remove memcheck test cases for cs, cds, and csg
|
||
|
|
||
|
The fix for bug 429864 - "s390x: C++ atomic test_and_set yields
|
||
|
false-positive memcheck diagnostics" changes the memcheck behavior at
|
||
|
various compare-and-swap instructions. The comparison between the old and
|
||
|
expected value now always yields a defined result, even if the input
|
||
|
values are (partially) undefined. However, some existing test cases
|
||
|
explicitly verify that memcheck complains about the use of uninitialised
|
||
|
values here. These test cases are no longer valid. Remove them.
|
||
|
|
||
|
diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am
|
||
|
index 67ae8c293..e4e69eb38 100644
|
||
|
--- a/memcheck/tests/s390x/Makefile.am
|
||
|
+++ b/memcheck/tests/s390x/Makefile.am
|
||
|
@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
|
||
|
|
||
|
dist_noinst_SCRIPTS = filter_stderr
|
||
|
|
||
|
-INSN_TESTS = cs csg cds cdsg cu21 cu42 ltgjhe
|
||
|
+INSN_TESTS = cdsg cu21 cu42 ltgjhe
|
||
|
|
||
|
check_PROGRAMS = $(INSN_TESTS)
|
||
|
|
||
|
@@ -14,7 +14,3 @@ EXTRA_DIST = \
|
||
|
AM_CFLAGS += @FLAG_M64@
|
||
|
AM_CXXFLAGS += @FLAG_M64@
|
||
|
AM_CCASFLAGS += @FLAG_M64@
|
||
|
-
|
||
|
-cs_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
|
||
|
-csg_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
|
||
|
-cds_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
|
||
|
diff --git a/memcheck/tests/s390x/cds.c b/memcheck/tests/s390x/cds.c
|
||
|
deleted file mode 100644
|
||
|
index ec5c533e0..000000000
|
||
|
--- a/memcheck/tests/s390x/cds.c
|
||
|
+++ /dev/null
|
||
|
@@ -1,82 +0,0 @@
|
||
|
-#include <stdint.h>
|
||
|
-#include <stdio.h>
|
||
|
-
|
||
|
-typedef struct {
|
||
|
- uint64_t high;
|
||
|
- uint64_t low;
|
||
|
-} quad_word;
|
||
|
-
|
||
|
-void
|
||
|
-test(quad_word op1_init, uint64_t op2_init, quad_word op3_init)
|
||
|
-{
|
||
|
- int cc; // unused
|
||
|
- quad_word op1 = op1_init;
|
||
|
- uint64_t op2 = op2_init;
|
||
|
- quad_word op3 = op3_init;
|
||
|
-
|
||
|
- __asm__ volatile (
|
||
|
- "lmg %%r0,%%r1,%1\n\t"
|
||
|
- "lmg %%r2,%%r3,%3\n\t"
|
||
|
- "cds %%r0,%%r2,%2\n\t" // cds 1st,3rd,2nd
|
||
|
- "stmg %%r0,%%r1,%1\n" // store r0,r1 to op1
|
||
|
- "stmg %%r2,%%r3,%3\n" // store r2,r3 to op3
|
||
|
- : "=d" (cc), "+QS" (op1), "+QS" (op2), "+QS" (op3)
|
||
|
- :
|
||
|
- : "r0", "r1", "r2", "r3", "cc");
|
||
|
-
|
||
|
-}
|
||
|
-
|
||
|
-// Return a quad-word that only bits low[32:63] are undefined
|
||
|
-quad_word
|
||
|
-make_undefined(void)
|
||
|
-{
|
||
|
- quad_word val;
|
||
|
-
|
||
|
- val.high = 0;
|
||
|
- val.low |= 0xFFFFFFFF00000000ull;
|
||
|
-
|
||
|
- return val;
|
||
|
-}
|
||
|
-
|
||
|
-void op1_undefined(void)
|
||
|
-{
|
||
|
- quad_word op1, op3;
|
||
|
- uint64_t op2;
|
||
|
-
|
||
|
- // op1 undefined
|
||
|
- op1 = make_undefined();
|
||
|
- op2 = 42;
|
||
|
- op3.high = op3.low = 0xdeadbeefdeadbabeull;
|
||
|
- test(op1, op2, op3); // complaint
|
||
|
-}
|
||
|
-
|
||
|
-void op2_undefined(void)
|
||
|
-{
|
||
|
- quad_word op1, op3;
|
||
|
- uint64_t op2;
|
||
|
-
|
||
|
- op1.high = op1.low = 42;
|
||
|
- // op2 undefined
|
||
|
- op3.high = op3.low = 0xdeadbeefdeadbabeull;
|
||
|
- test(op1, op2, op3); // complaint
|
||
|
-}
|
||
|
-
|
||
|
-void op3_undefined(void)
|
||
|
-{
|
||
|
- quad_word op1, op3;
|
||
|
- uint64_t op2;
|
||
|
-
|
||
|
- op1.high = op1.low = 42;
|
||
|
- op2 = 100;
|
||
|
- op3 = make_undefined();
|
||
|
- test(op1, op2, op3); // no complaint; op3 is just copied around
|
||
|
-}
|
||
|
-
|
||
|
-int main ()
|
||
|
-{
|
||
|
- op1_undefined();
|
||
|
- op2_undefined();
|
||
|
- op3_undefined();
|
||
|
-
|
||
|
- return 0;
|
||
|
-}
|
||
|
diff --git a/memcheck/tests/s390x/cds.stderr.exp b/memcheck/tests/s390x/cds.stderr.exp
|
||
|
deleted file mode 100644
|
||
|
index e72de94c8..000000000
|
||
|
--- a/memcheck/tests/s390x/cds.stderr.exp
|
||
|
+++ /dev/null
|
||
|
@@ -1,10 +0,0 @@
|
||
|
-Conditional jump or move depends on uninitialised value(s)
|
||
|
- at 0x........: test (cds.c:17)
|
||
|
- by 0x........: op1_undefined (cds.c:50)
|
||
|
- by 0x........: main (cds.c:77)
|
||
|
-
|
||
|
-Conditional jump or move depends on uninitialised value(s)
|
||
|
- at 0x........: test (cds.c:17)
|
||
|
- by 0x........: op2_undefined (cds.c:61)
|
||
|
- by 0x........: main (cds.c:78)
|
||
|
-
|
||
|
diff --git a/memcheck/tests/s390x/cds.stdout.exp b/memcheck/tests/s390x/cds.stdout.exp
|
||
|
deleted file mode 100644
|
||
|
index e69de29bb..000000000
|
||
|
diff --git a/memcheck/tests/s390x/cds.vgtest b/memcheck/tests/s390x/cds.vgtest
|
||
|
deleted file mode 100644
|
||
|
index 5195887e2..000000000
|
||
|
--- a/memcheck/tests/s390x/cds.vgtest
|
||
|
+++ /dev/null
|
||
|
@@ -1,2 +0,0 @@
|
||
|
-prog: cds
|
||
|
-vgopts: -q
|
||
|
diff --git a/memcheck/tests/s390x/cs.c b/memcheck/tests/s390x/cs.c
|
||
|
deleted file mode 100644
|
||
|
index 9a298cef9..000000000
|
||
|
--- a/memcheck/tests/s390x/cs.c
|
||
|
+++ /dev/null
|
||
|
@@ -1,32 +0,0 @@
|
||
|
-#include <stdint.h>
|
||
|
-#include <stdio.h>
|
||
|
-#include <string.h>
|
||
|
-
|
||
|
-void
|
||
|
-test(int32_t op1_init, int32_t op2_init, int32_t op3_init)
|
||
|
-{
|
||
|
- register int32_t op1 asm("8") = op1_init;
|
||
|
- register int32_t op3 asm("9") = op3_init;
|
||
|
-
|
||
|
- int32_t op2 = op2_init;
|
||
|
- int cc = 1;
|
||
|
-
|
||
|
- __asm__ volatile (
|
||
|
- "cs 8,9,%1\n\t"
|
||
|
- "ipm %0\n\t"
|
||
|
- "srl %0,28\n\t"
|
||
|
- : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3)
|
||
|
- :
|
||
|
- : "cc");
|
||
|
-}
|
||
|
-
|
||
|
-int main ()
|
||
|
-{
|
||
|
- int op1, op2, op3;
|
||
|
-
|
||
|
- test(op1, 0x10000000, 0x12345678); // complaint
|
||
|
- test(0x10000000, op2, 0x12345678); // complaint
|
||
|
- test(0x10000000, 0x01000000, op3); // no complaint
|
||
|
-
|
||
|
- return 0;
|
||
|
-}
|
||
|
diff --git a/memcheck/tests/s390x/cs.stderr.exp b/memcheck/tests/s390x/cs.stderr.exp
|
||
|
deleted file mode 100644
|
||
|
index e45dc99cd..000000000
|
||
|
--- a/memcheck/tests/s390x/cs.stderr.exp
|
||
|
+++ /dev/null
|
||
|
@@ -1,8 +0,0 @@
|
||
|
-Conditional jump or move depends on uninitialised value(s)
|
||
|
- at 0x........: test (cs.c:14)
|
||
|
- by 0x........: main (cs.c:27)
|
||
|
-
|
||
|
-Conditional jump or move depends on uninitialised value(s)
|
||
|
- at 0x........: test (cs.c:14)
|
||
|
- by 0x........: main (cs.c:28)
|
||
|
-
|
||
|
diff --git a/memcheck/tests/s390x/cs.stdout.exp b/memcheck/tests/s390x/cs.stdout.exp
|
||
|
deleted file mode 100644
|
||
|
index e69de29bb..000000000
|
||
|
diff --git a/memcheck/tests/s390x/cs.vgtest b/memcheck/tests/s390x/cs.vgtest
|
||
|
deleted file mode 100644
|
||
|
index 323cce80c..000000000
|
||
|
--- a/memcheck/tests/s390x/cs.vgtest
|
||
|
+++ /dev/null
|
||
|
@@ -1,2 +0,0 @@
|
||
|
-prog: cs
|
||
|
-vgopts: -q
|
||
|
diff --git a/memcheck/tests/s390x/csg.c b/memcheck/tests/s390x/csg.c
|
||
|
deleted file mode 100644
|
||
|
index 7f9d8c88e..000000000
|
||
|
--- a/memcheck/tests/s390x/csg.c
|
||
|
+++ /dev/null
|
||
|
@@ -1,32 +0,0 @@
|
||
|
-#include <stdint.h>
|
||
|
-#include <stdio.h>
|
||
|
-#include <string.h>
|
||
|
-
|
||
|
-void
|
||
|
-test(int64_t op1_init, int64_t op2_init, int64_t op3_init)
|
||
|
-{
|
||
|
- register int64_t op1 asm("8") = op1_init;
|
||
|
- register int64_t op3 asm("9") = op3_init;
|
||
|
-
|
||
|
- int64_t op2 = op2_init;
|
||
|
- int cc = 1;
|
||
|
-
|
||
|
- __asm__ volatile (
|
||
|
- "csg 8,9,%1\n\t"
|
||
|
- "ipm %0\n\t"
|
||
|
- "srl %0,28\n\t"
|
||
|
- : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3)
|
||
|
- :
|
||
|
- : "cc");
|
||
|
-}
|
||
|
-
|
||
|
-int main ()
|
||
|
-{
|
||
|
- int64_t op1, op2, op3;
|
||
|
-
|
||
|
- test(op1, 0x1000000000000000ull, 0x1234567887654321ull); // complaint
|
||
|
- test(0x1000000000000000ull, op2, 0x1234567887654321ull); // complaint
|
||
|
- test(0x1000000000000000ull, 0x1000000000000000ull, op3); // no complaint
|
||
|
-
|
||
|
- return 0;
|
||
|
-}
|
||
|
diff --git a/memcheck/tests/s390x/csg.stderr.exp b/memcheck/tests/s390x/csg.stderr.exp
|
||
|
deleted file mode 100644
|
||
|
index fda2021ce..000000000
|
||
|
--- a/memcheck/tests/s390x/csg.stderr.exp
|
||
|
+++ /dev/null
|
||
|
@@ -1,8 +0,0 @@
|
||
|
-Conditional jump or move depends on uninitialised value(s)
|
||
|
- at 0x........: test (csg.c:14)
|
||
|
- by 0x........: main (csg.c:27)
|
||
|
-
|
||
|
-Conditional jump or move depends on uninitialised value(s)
|
||
|
- at 0x........: test (csg.c:14)
|
||
|
- by 0x........: main (csg.c:28)
|
||
|
-
|
||
|
diff --git a/memcheck/tests/s390x/csg.stdout.exp b/memcheck/tests/s390x/csg.stdout.exp
|
||
|
deleted file mode 100644
|
||
|
index e69de29bb..000000000
|
||
|
diff --git a/memcheck/tests/s390x/csg.vgtest b/memcheck/tests/s390x/csg.vgtest
|
||
|
deleted file mode 100644
|
||
|
index 6de75c1d6..000000000
|
||
|
--- a/memcheck/tests/s390x/csg.vgtest
|
||
|
+++ /dev/null
|
||
|
@@ -1,2 +0,0 @@
|
||
|
-prog: csg
|
||
|
-vgopts: -q
|
||
|
|
||
|
commit 18ddcc47c951427efd3b790ba2481159b9bd1598
|
||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||
|
Date: Wed Apr 7 16:48:29 2021 +0200
|
||
|
|
||
|
s390x: Support "expensive" comparisons Iop_ExpCmpNE32/64
|
||
|
|
||
|
Add support for Iop_ExpCmpNE32 and Iop_ExpCmpNE64 in the s390x instruction
|
||
|
selector. Handle them exactly like the "inexpensive" variants Iop_CmpNE32
|
||
|
and Iop_CmpNE64.
|
||
|
|
||
|
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
|
||
|
index 2000ec224..5f79280c0 100644
|
||
|
--- a/VEX/priv/host_s390_isel.c
|
||
|
+++ b/VEX/priv/host_s390_isel.c
|
||
|
@@ -3611,6 +3611,8 @@ s390_isel_cc(ISelEnv *env, IRExpr *cond)
|
||
|
|
||
|
case Iop_CmpNE32:
|
||
|
case Iop_CmpNE64:
|
||
|
+ case Iop_ExpCmpNE32:
|
||
|
+ case Iop_ExpCmpNE64:
|
||
|
case Iop_CasCmpNE32:
|
||
|
case Iop_CasCmpNE64:
|
||
|
result = S390_CC_NE;
|
||
|
|
||
|
commit 5db3f929c43bf46f4707178706cfe90f43acdd19
|
||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||
|
Date: Wed Apr 7 12:30:20 2021 +0200
|
||
|
|
||
|
s390x: Add convenience function mkV128()
|
||
|
|
||
|
Provide mkV128() as a short-hand notation for creating a vector constant from
|
||
|
a bit pattern, similar to other such functions like mkU64().
|
||
|
|
||
|
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
|
||
|
index 339377007..7d54cb551 100644
|
||
|
--- a/VEX/priv/guest_s390_toIR.c
|
||
|
+++ b/VEX/priv/guest_s390_toIR.c
|
||
|
@@ -376,6 +376,13 @@ mkU64(ULong value)
|
||
|
return IRExpr_Const(IRConst_U64(value));
|
||
|
}
|
||
|
|
||
|
+/* Create an expression node for a 128-bit vector constant */
|
||
|
+static __inline__ IRExpr *
|
||
|
+mkV128(UShort value)
|
||
|
+{
|
||
|
+ return IRExpr_Const(IRConst_V128(value));
|
||
|
+}
|
||
|
+
|
||
|
/* Create an expression node for a 32-bit floating point constant
|
||
|
whose value is given by a bit pattern. */
|
||
|
static __inline__ IRExpr *
|
||
|
@@ -16249,7 +16256,7 @@ s390_irgen_VLGV(UChar r1, IRTemp op2addr, UChar v3, UChar m4)
|
||
|
static const HChar *
|
||
|
s390_irgen_VGBM(UChar v1, UShort i2, UChar m3 __attribute__((unused)))
|
||
|
{
|
||
|
- put_vr_qw(v1, IRExpr_Const(IRConst_V128(i2)));
|
||
|
+ put_vr_qw(v1, mkV128(i2));
|
||
|
|
||
|
return "vgbm";
|
||
|
}
|
||
|
@@ -18160,11 +18167,11 @@ s390_irgen_VSUM(UChar v1, UChar v2, UChar v3, UChar m4)
|
||
|
switch(type) {
|
||
|
case Ity_I8:
|
||
|
sum = unop(Iop_PwAddL16Ux8, unop(Iop_PwAddL8Ux16, get_vr_qw(v2)));
|
||
|
- mask = IRExpr_Const(IRConst_V128(0b0001000100010001));
|
||
|
+ mask = mkV128(0b0001000100010001);
|
||
|
break;
|
||
|
case Ity_I16:
|
||
|
sum = unop(Iop_PwAddL16Ux8, get_vr_qw(v2));
|
||
|
- mask = IRExpr_Const(IRConst_V128(0b0011001100110011));
|
||
|
+ mask = mkV128(0b0011001100110011);
|
||
|
break;
|
||
|
default:
|
||
|
vpanic("s390_irgen_VSUM: invalid type ");
|
||
|
@@ -18185,11 +18192,11 @@ s390_irgen_VSUMG(UChar v1, UChar v2, UChar v3, UChar m4)
|
||
|
switch(type) {
|
||
|
case Ity_I16:
|
||
|
sum = unop(Iop_PwAddL32Ux4, unop(Iop_PwAddL16Ux8, get_vr_qw(v2)));
|
||
|
- mask = IRExpr_Const(IRConst_V128(0b0000001100000011));
|
||
|
+ mask = mkV128(0b0000001100000011);
|
||
|
break;
|
||
|
case Ity_I32:
|
||
|
sum = unop(Iop_PwAddL32Ux4, get_vr_qw(v2));
|
||
|
- mask = IRExpr_Const(IRConst_V128(0b0000111100001111));
|
||
|
+ mask = mkV128(0b0000111100001111);
|
||
|
break;
|
||
|
default:
|
||
|
vpanic("s390_irgen_VSUMG: invalid type ");
|
||
|
@@ -18210,11 +18217,11 @@ s390_irgen_VSUMQ(UChar v1, UChar v2, UChar v3, UChar m4)
|
||
|
switch(type) {
|
||
|
case Ity_I32:
|
||
|
sum = unop(Iop_PwAddL64Ux2, unop(Iop_PwAddL32Ux4, get_vr_qw(v2)));
|
||
|
- mask = IRExpr_Const(IRConst_V128(0b0000000000001111));
|
||
|
+ mask = mkV128(0b0000000000001111);
|
||
|
break;
|
||
|
case Ity_I64:
|
||
|
sum = unop(Iop_PwAddL64Ux2, get_vr_qw(v2));
|
||
|
- mask = IRExpr_Const(IRConst_V128(0b0000000011111111));
|
||
|
+ mask = mkV128(0b0000000011111111);
|
||
|
break;
|
||
|
default:
|
||
|
vpanic("s390_irgen_VSUMQ: invalid type ");
|
||
|
@@ -18943,8 +18950,8 @@ s390_irgen_VFCx(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6,
|
||
|
assign(cond, binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp)));
|
||
|
}
|
||
|
put_vr_qw(v1, mkite(mkexpr(cond),
|
||
|
- IRExpr_Const(IRConst_V128(0xffff)),
|
||
|
- IRExpr_Const(IRConst_V128(0))));
|
||
|
+ mkV128(0xffff),
|
||
|
+ mkV128(0)));
|
||
|
if (s390_vr_is_cs_set(m6)) {
|
||
|
IRTemp cc = newTemp(Ity_I64);
|
||
|
assign(cc, mkite(mkexpr(cond), mkU64(0), mkU64(3)));
|
||
|
|
||
|
commit e78bd78d3043729033b426218ab8c6dae9c51e96
|
||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||
|
Date: Thu Mar 18 18:01:10 2021 +0100
|
||
|
|
||
|
Bug 434296 - s390x: Rework IR conversion of VSTRC, VFAE, and VFEE
|
||
|
|
||
|
The z/Architecture instructions "vector string range compare" (VSTRC),
|
||
|
"vector find any element equal" (VFAE), and "vector find element
|
||
|
equal" (VFEE) are each implemented with a dirty helper that executes the
|
||
|
instruction. Unfortunately this approach leads to memcheck false
|
||
|
positives, because these instructions may yield a defined result even if
|
||
|
parts of the input vectors are undefined. There are multiple ways this
|
||
|
can happen: Wherever the flags in the fourth operand to VSTRC indicate
|
||
|
"match always" or "match never", the corresponding elements in the third
|
||
|
operand don't affect the result. The same is true for the elements
|
||
|
following the first zero-element in the second operand if the ZS flag is
|
||
|
set, or for the elements following the first matching element, if any.
|
||
|
|
||
|
Re-implement the instructions without dirty helpers and transform into
|
||
|
lengthy IR instead.
|
||
|
|
||
|
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
|
||
|
index 905429015..49b6cd5dd 100644
|
||
|
--- a/VEX/priv/guest_s390_defs.h
|
||
|
+++ b/VEX/priv/guest_s390_defs.h
|
||
|
@@ -265,11 +265,8 @@ typedef enum {
|
||
|
S390_VEC_OP_INVALID = 0,
|
||
|
S390_VEC_OP_VPKS,
|
||
|
S390_VEC_OP_VPKLS,
|
||
|
- S390_VEC_OP_VFAE,
|
||
|
- S390_VEC_OP_VFEE,
|
||
|
S390_VEC_OP_VFENE,
|
||
|
S390_VEC_OP_VISTR,
|
||
|
- S390_VEC_OP_VSTRC,
|
||
|
S390_VEC_OP_VCEQ,
|
||
|
S390_VEC_OP_VTM,
|
||
|
S390_VEC_OP_VGFM,
|
||
|
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
|
||
|
index b71b621ae..63d2e8ce5 100644
|
||
|
--- a/VEX/priv/guest_s390_helpers.c
|
||
|
+++ b/VEX/priv/guest_s390_helpers.c
|
||
|
@@ -2538,11 +2538,8 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
|
||
|
{0x00, 0x00}, /* invalid */
|
||
|
[S390_VEC_OP_VPKS] = {0xe7, 0x97},
|
||
|
[S390_VEC_OP_VPKLS] = {0xe7, 0x95},
|
||
|
- [S390_VEC_OP_VFAE] = {0xe7, 0x82},
|
||
|
- [S390_VEC_OP_VFEE] = {0xe7, 0x80},
|
||
|
[S390_VEC_OP_VFENE] = {0xe7, 0x81},
|
||
|
[S390_VEC_OP_VISTR] = {0xe7, 0x5c},
|
||
|
- [S390_VEC_OP_VSTRC] = {0xe7, 0x8a},
|
||
|
[S390_VEC_OP_VCEQ] = {0xe7, 0xf8},
|
||
|
[S390_VEC_OP_VTM] = {0xe7, 0xd8},
|
||
|
[S390_VEC_OP_VGFM] = {0xe7, 0xb4},
|
||
|
@@ -2630,8 +2627,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
|
||
|
|
||
|
case S390_VEC_OP_VPKS:
|
||
|
case S390_VEC_OP_VPKLS:
|
||
|
- case S390_VEC_OP_VFAE:
|
||
|
- case S390_VEC_OP_VFEE:
|
||
|
case S390_VEC_OP_VFENE:
|
||
|
case S390_VEC_OP_VCEQ:
|
||
|
case S390_VEC_OP_VGFM:
|
||
|
@@ -2645,7 +2640,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
|
||
|
the_insn.VRR.m5 = d->m5;
|
||
|
break;
|
||
|
|
||
|
- case S390_VEC_OP_VSTRC:
|
||
|
case S390_VEC_OP_VGFMA:
|
||
|
case S390_VEC_OP_VMAH:
|
||
|
case S390_VEC_OP_VMALH:
|
||
|
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
|
||
|
index 7d54cb551..26a947813 100644
|
||
|
--- a/VEX/priv/guest_s390_toIR.c
|
||
|
+++ b/VEX/priv/guest_s390_toIR.c
|
||
|
@@ -17156,90 +17156,205 @@ s390_irgen_PPNO(UChar r1, UChar r2)
|
||
|
return "ppno";
|
||
|
}
|
||
|
|
||
|
-static const HChar *
|
||
|
-s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
|
||
|
-{
|
||
|
- IRDirty* d;
|
||
|
- IRTemp cc = newTemp(Ity_I64);
|
||
|
+enum s390_VStrX {
|
||
|
+ s390_VStrX_VSTRC,
|
||
|
+ s390_VStrX_VFAE,
|
||
|
+ s390_VStrX_VFEE
|
||
|
+};
|
||
|
|
||
|
- /* Check for specification exception */
|
||
|
- vassert(m4 < 3);
|
||
|
+#define S390_VEC_OP3(m, op0, op1, op2) \
|
||
|
+ (m) == 0 ? op0 : (m) == 1 ? op1 : (m) == 2 ? op2 : Iop_INVALID;
|
||
|
|
||
|
- s390x_vec_op_details_t details = { .serialized = 0ULL };
|
||
|
- details.op = S390_VEC_OP_VFAE;
|
||
|
- details.v1 = v1;
|
||
|
- details.v2 = v2;
|
||
|
- details.v3 = v3;
|
||
|
- details.m4 = m4;
|
||
|
- details.m5 = m5;
|
||
|
-
|
||
|
- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
|
||
|
- &s390x_dirtyhelper_vec_op,
|
||
|
- mkIRExprVec_2(IRExpr_GSPTR(),
|
||
|
- mkU64(details.serialized)));
|
||
|
+/* Helper function for transforming VSTRC, VFAE, or VFEE. These instructions
|
||
|
+ share much of the same logic. */
|
||
|
+static void
|
||
|
+s390_irgen_VStrX(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5,
|
||
|
+ UChar m6, enum s390_VStrX which_insn)
|
||
|
+{
|
||
|
+ IRTemp op2 = newTemp(Ity_V128);
|
||
|
+ IRTemp op3 = newTemp(Ity_V128);
|
||
|
+ IRExpr* tmp;
|
||
|
+ IRExpr* match = NULL;
|
||
|
+ UChar bitwidth = 8 << m5;
|
||
|
+ UChar n_elem = 16 >> m5;
|
||
|
+ IROp sub_op = S390_VEC_OP3(m5, Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4);
|
||
|
+ IROp sar_op = S390_VEC_OP3(m5, Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4);
|
||
|
+ IROp shl_op = S390_VEC_OP3(m5, Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4);
|
||
|
+ IROp dup_op = S390_VEC_OP3(m5, Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4);
|
||
|
+ IROp cmpeq_op = S390_VEC_OP3(m5, Iop_CmpEQ8x16,
|
||
|
+ Iop_CmpEQ16x8, Iop_CmpEQ32x4);
|
||
|
+ IROp cmpgt_op = S390_VEC_OP3(m5, Iop_CmpGT8Ux16,
|
||
|
+ Iop_CmpGT16Ux8, Iop_CmpGT32Ux4);
|
||
|
+ IROp getelem_op = S390_VEC_OP3(m5, Iop_GetElem8x16,
|
||
|
+ Iop_GetElem16x8, Iop_GetElem32x4);
|
||
|
+
|
||
|
+ assign(op2, get_vr_qw(v2));
|
||
|
+ assign(op3, get_vr_qw(v3));
|
||
|
+
|
||
|
+ switch (which_insn) {
|
||
|
+
|
||
|
+ case s390_VStrX_VSTRC: {
|
||
|
+ IRTemp op4 = newTemp(Ity_V128);
|
||
|
+ assign(op4, get_vr_qw(v4));
|
||
|
+
|
||
|
+ /* Mask off insignificant range boundaries from op3, i.e., all those for
|
||
|
+ which the corresponding field in op4 has all or no bits set ("match
|
||
|
+ always" / "match never"). */
|
||
|
+ IRTemp bounds = newTemp(Ity_V128);
|
||
|
+ tmp = unop(Iop_NotV128,
|
||
|
+ binop(cmpeq_op, mkV128(0),
|
||
|
+ binop(sar_op,
|
||
|
+ binop(sub_op,
|
||
|
+ binop(sar_op, mkexpr(op4),
|
||
|
+ mkU8(bitwidth - 3)),
|
||
|
+ mkV128(-1)),
|
||
|
+ mkU8(1))));
|
||
|
+ assign(bounds, binop(Iop_AndV128, mkexpr(op3), tmp));
|
||
|
+
|
||
|
+ IRTemp flags_eq = newTemp(Ity_V128);
|
||
|
+ IRTemp flags_lt = newTemp(Ity_V128);
|
||
|
+ IRTemp flags_gt = newTemp(Ity_V128);
|
||
|
+ assign(flags_eq, binop(sar_op, mkexpr(op4), mkU8(bitwidth - 1)));
|
||
|
+ assign(flags_lt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(1)),
|
||
|
+ mkU8(bitwidth - 1)));
|
||
|
+ assign(flags_gt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(2)),
|
||
|
+ mkU8(bitwidth - 1)));
|
||
|
+
|
||
|
+ for (UChar idx = 0; idx < n_elem; idx += 2) {
|
||
|
+ /* Match according to the even/odd pairs in op3 and op4 at idx */
|
||
|
+ IRTemp part[2];
|
||
|
+
|
||
|
+ for (UChar j = 0; j < 2; j++) {
|
||
|
+ IRTemp a = newTemp(Ity_V128);
|
||
|
+ assign(a, unop(dup_op,
|
||
|
+ binop(getelem_op, mkexpr(bounds), mkU8(idx + j))));
|
||
|
+
|
||
|
+ IRExpr* m[] = {
|
||
|
+ binop(cmpeq_op, mkexpr(op2), mkexpr(a)),
|
||
|
+ binop(cmpgt_op, mkexpr(a), mkexpr(op2)),
|
||
|
+ binop(cmpgt_op, mkexpr(op2), mkexpr(a))
|
||
|
+ };
|
||
|
+ IRExpr* f[] = {
|
||
|
+ unop(dup_op, binop(getelem_op, mkexpr(flags_eq), mkU8(idx + j))),
|
||
|
+ unop(dup_op, binop(getelem_op, mkexpr(flags_lt), mkU8(idx + j))),
|
||
|
+ unop(dup_op, binop(getelem_op, mkexpr(flags_gt), mkU8(idx + j)))
|
||
|
+ };
|
||
|
+ part[j] = newTemp(Ity_V128);
|
||
|
+ assign(part[j], binop(Iop_OrV128,
|
||
|
+ binop(Iop_OrV128,
|
||
|
+ binop(Iop_AndV128, f[0], m[0]),
|
||
|
+ binop(Iop_AndV128, f[1], m[1])),
|
||
|
+ binop(Iop_AndV128, f[2], m[2])));
|
||
|
+ }
|
||
|
+ tmp = binop(Iop_AndV128, mkexpr(part[0]), mkexpr(part[1]));
|
||
|
+ match = idx == 0 ? tmp : binop(Iop_OrV128, match, tmp);
|
||
|
+ }
|
||
|
+ break;
|
||
|
+ }
|
||
|
|
||
|
- d->nFxState = 3;
|
||
|
- vex_bzero(&d->fxState, sizeof(d->fxState));
|
||
|
- d->fxState[0].fx = Ifx_Read;
|
||
|
- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
|
||
|
- d->fxState[0].size = sizeof(V128);
|
||
|
- d->fxState[1].fx = Ifx_Read;
|
||
|
- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
|
||
|
- d->fxState[1].size = sizeof(V128);
|
||
|
- d->fxState[2].fx = Ifx_Write;
|
||
|
- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
|
||
|
- d->fxState[2].size = sizeof(V128);
|
||
|
+ case s390_VStrX_VFAE:
|
||
|
+ for (UChar idx = 0; idx < n_elem; idx++) {
|
||
|
+ IRTemp a = newTemp(Ity_V128);
|
||
|
+ assign(a, binop(cmpeq_op, mkexpr(op2),
|
||
|
+ unop(dup_op,
|
||
|
+ binop(getelem_op, mkexpr(op3), mkU8(idx)))));
|
||
|
+ match = idx == 0 ? mkexpr(a) : binop(Iop_OrV128, match, mkexpr(a));
|
||
|
+ }
|
||
|
+ break;
|
||
|
|
||
|
- stmt(IRStmt_Dirty(d));
|
||
|
+ case s390_VStrX_VFEE:
|
||
|
+ match = binop(cmpeq_op, mkexpr(op2), mkexpr(op3));
|
||
|
+ break;
|
||
|
|
||
|
- if (s390_vr_is_cs_set(m5)) {
|
||
|
- s390_cc_set(cc);
|
||
|
+ default:
|
||
|
+ vpanic("s390_irgen_VStrX: unknown insn");
|
||
|
}
|
||
|
|
||
|
- return "vfae";
|
||
|
-}
|
||
|
+ /* Invert first intermediate result if requested */
|
||
|
+ if (m6 & 8)
|
||
|
+ match = unop(Iop_NotV128, match);
|
||
|
|
||
|
-static const HChar *
|
||
|
-s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
|
||
|
-{
|
||
|
- IRDirty* d;
|
||
|
- IRTemp cc = newTemp(Ity_I64);
|
||
|
+ IRTemp inter1 = newTemp(Ity_V128);
|
||
|
+ IRTemp inter2 = newTemp(Ity_V128);
|
||
|
+ IRTemp accu = newTemp(Ity_V128);
|
||
|
+ assign(inter1, match);
|
||
|
|
||
|
- /* Check for specification exception */
|
||
|
- vassert(m4 < 3);
|
||
|
- vassert((m5 & 0b1100) == 0);
|
||
|
+ /* Determine second intermediate and accumulated result */
|
||
|
+ if (s390_vr_is_zs_set(m6)) {
|
||
|
+ assign(inter2, binop(cmpeq_op, mkexpr(op2), mkV128(0)));
|
||
|
+ assign(accu, binop(Iop_OrV128, mkexpr(inter1), mkexpr(inter2)));
|
||
|
+ } else {
|
||
|
+ assign(inter2, mkV128(0));
|
||
|
+ assign(accu, mkexpr(inter1));
|
||
|
+ }
|
||
|
|
||
|
- s390x_vec_op_details_t details = { .serialized = 0ULL };
|
||
|
- details.op = S390_VEC_OP_VFEE;
|
||
|
- details.v1 = v1;
|
||
|
- details.v2 = v2;
|
||
|
- details.v3 = v3;
|
||
|
- details.m4 = m4;
|
||
|
- details.m5 = m5;
|
||
|
+ IRTemp accu0 = newTemp(Ity_I64);
|
||
|
+ IRTemp is_match0 = newTemp(Ity_I1);
|
||
|
+ IRTemp mismatch_bits = newTemp(Ity_I64);
|
||
|
|
||
|
- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
|
||
|
- &s390x_dirtyhelper_vec_op,
|
||
|
- mkIRExprVec_2(IRExpr_GSPTR(),
|
||
|
- mkU64(details.serialized)));
|
||
|
+ assign(accu0, unop(Iop_V128HIto64, mkexpr(accu)));
|
||
|
+ assign(is_match0, binop(Iop_ExpCmpNE64, mkexpr(accu0), mkU64(0)));
|
||
|
+ assign(mismatch_bits, unop(Iop_ClzNat64,
|
||
|
+ mkite(mkexpr(is_match0), mkexpr(accu0),
|
||
|
+ unop(Iop_V128to64, mkexpr(accu)))));
|
||
|
|
||
|
- d->nFxState = 3;
|
||
|
- vex_bzero(&d->fxState, sizeof(d->fxState));
|
||
|
- d->fxState[0].fx = Ifx_Read;
|
||
|
- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
|
||
|
- d->fxState[0].size = sizeof(V128);
|
||
|
- d->fxState[1].fx = Ifx_Read;
|
||
|
- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
|
||
|
- d->fxState[1].size = sizeof(V128);
|
||
|
- d->fxState[2].fx = Ifx_Write;
|
||
|
- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
|
||
|
- d->fxState[2].size = sizeof(V128);
|
||
|
+ if (m6 & 4) {
|
||
|
+ put_vr_qw(v1, mkexpr(inter1));
|
||
|
+ } else {
|
||
|
+ /* Determine byte position of first match */
|
||
|
+ tmp = binop(Iop_Add64,
|
||
|
+ binop(Iop_Shr64, mkexpr(mismatch_bits), mkU8(3)),
|
||
|
+ mkite(mkexpr(is_match0), mkU64(0), mkU64(8)));
|
||
|
+ put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0)));
|
||
|
+ }
|
||
|
|
||
|
- stmt(IRStmt_Dirty(d));
|
||
|
+ if (s390_vr_is_cs_set(m6)) {
|
||
|
+ /* Set condition code depending on...
|
||
|
+ zero found
|
||
|
+ n y
|
||
|
+ +------
|
||
|
+ match n | 3 0
|
||
|
+ found y | 1 2 */
|
||
|
|
||
|
- if (s390_vr_is_cs_set(m5)) {
|
||
|
+ IRTemp cc = newTemp(Ity_I64);
|
||
|
+
|
||
|
+ tmp = binop(Iop_Shr64,
|
||
|
+ mkite(mkexpr(is_match0),
|
||
|
+ unop(Iop_V128HIto64, mkexpr(inter1)),
|
||
|
+ unop(Iop_V128to64, mkexpr(inter1))),
|
||
|
+ unop(Iop_64to8,
|
||
|
+ binop(Iop_Sub64, mkU64(63), mkexpr(mismatch_bits))));
|
||
|
+ tmp = binop(Iop_Shl64, tmp, mkU8(1));
|
||
|
+ if (s390_vr_is_zs_set(m6)) {
|
||
|
+ tmp = binop(Iop_Xor64, tmp,
|
||
|
+ mkite(binop(Iop_ExpCmpNE64, mkU64(0),
|
||
|
+ binop(Iop_Or64,
|
||
|
+ unop(Iop_V128HIto64, mkexpr(inter2)),
|
||
|
+ unop(Iop_V128to64, mkexpr(inter2)))),
|
||
|
+ mkU64(0),
|
||
|
+ mkU64(3)));
|
||
|
+ } else {
|
||
|
+ tmp = binop(Iop_Xor64, tmp, mkU64(3));
|
||
|
+ }
|
||
|
+ assign(cc, tmp);
|
||
|
s390_cc_set(cc);
|
||
|
}
|
||
|
+ dis_res->hint = Dis_HintVerbose;
|
||
|
+}
|
||
|
|
||
|
+static const HChar *
|
||
|
+s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
|
||
|
+{
|
||
|
+ s390_insn_assert("vfae", m4 <= 2);
|
||
|
+ s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFAE);
|
||
|
+ return "vfae";
|
||
|
+}
|
||
|
+
|
||
|
+static const HChar *
|
||
|
+s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
|
||
|
+{
|
||
|
+ s390_insn_assert("vfee", m4 < 3 && m5 == (m5 & 3));
|
||
|
+ s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFEE);
|
||
|
return "vfee";
|
||
|
}
|
||
|
|
||
|
@@ -17406,47 +17521,8 @@ s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5)
|
||
|
static const HChar *
|
||
|
s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
|
||
|
{
|
||
|
- IRDirty* d;
|
||
|
- IRTemp cc = newTemp(Ity_I64);
|
||
|
-
|
||
|
- /* Check for specification exception */
|
||
|
- vassert(m5 < 3);
|
||
|
-
|
||
|
- s390x_vec_op_details_t details = { .serialized = 0ULL };
|
||
|
- details.op = S390_VEC_OP_VSTRC;
|
||
|
- details.v1 = v1;
|
||
|
- details.v2 = v2;
|
||
|
- details.v3 = v3;
|
||
|
- details.v4 = v4;
|
||
|
- details.m4 = m5;
|
||
|
- details.m5 = m6;
|
||
|
-
|
||
|
- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
|
||
|
- &s390x_dirtyhelper_vec_op,
|
||
|
- mkIRExprVec_2(IRExpr_GSPTR(),
|
||
|
- mkU64(details.serialized)));
|
||
|
-
|
||
|
- d->nFxState = 4;
|
||
|
- vex_bzero(&d->fxState, sizeof(d->fxState));
|
||
|
- d->fxState[0].fx = Ifx_Read;
|
||
|
- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
|
||
|
- d->fxState[0].size = sizeof(V128);
|
||
|
- d->fxState[1].fx = Ifx_Read;
|
||
|
- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
|
||
|
- d->fxState[1].size = sizeof(V128);
|
||
|
- d->fxState[2].fx = Ifx_Read;
|
||
|
- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128);
|
||
|
- d->fxState[2].size = sizeof(V128);
|
||
|
- d->fxState[3].fx = Ifx_Write;
|
||
|
- d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
|
||
|
- d->fxState[3].size = sizeof(V128);
|
||
|
-
|
||
|
- stmt(IRStmt_Dirty(d));
|
||
|
-
|
||
|
- if (s390_vr_is_cs_set(m6)) {
|
||
|
- s390_cc_set(cc);
|
||
|
- }
|
||
|
-
|
||
|
+ s390_insn_assert("vstrc", m5 <= 2);
|
||
|
+ s390_irgen_VStrX(v1, v2, v3, v4, m5, m6, s390_VStrX_VSTRC);
|
||
|
return "vstrc";
|
||
|
}
|
||
|
|
||
|
|
||
|
commit 4f17a067c4f8245c05611d6e8aa36e8841bab376
|
||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||
|
Date: Tue Mar 2 14:12:29 2021 +0100
|
||
|
|
||
|
Bug 434296 - s390x: Rework IR conversion of VFENE
|
||
|
|
||
|
So far the z/Architecture instruction "vector find element not
|
||
|
equal" (VFENE) is transformed to a loop. This can cause spurious
|
||
|
"conditional jump or move depends on uninitialised value(s)" messages by
|
||
|
memcheck. Re-implement without a loop.
|
||
|
|
||
|
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
|
||
|
index 49b6cd5dd..caec3108e 100644
|
||
|
--- a/VEX/priv/guest_s390_defs.h
|
||
|
+++ b/VEX/priv/guest_s390_defs.h
|
||
|
@@ -265,7 +265,6 @@ typedef enum {
|
||
|
S390_VEC_OP_INVALID = 0,
|
||
|
S390_VEC_OP_VPKS,
|
||
|
S390_VEC_OP_VPKLS,
|
||
|
- S390_VEC_OP_VFENE,
|
||
|
S390_VEC_OP_VISTR,
|
||
|
S390_VEC_OP_VCEQ,
|
||
|
S390_VEC_OP_VTM,
|
||
|
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
|
||
|
index 63d2e8ce5..2188ce5c1 100644
|
||
|
--- a/VEX/priv/guest_s390_helpers.c
|
||
|
+++ b/VEX/priv/guest_s390_helpers.c
|
||
|
@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
|
||
|
{0x00, 0x00}, /* invalid */
|
||
|
[S390_VEC_OP_VPKS] = {0xe7, 0x97},
|
||
|
[S390_VEC_OP_VPKLS] = {0xe7, 0x95},
|
||
|
- [S390_VEC_OP_VFENE] = {0xe7, 0x81},
|
||
|
[S390_VEC_OP_VISTR] = {0xe7, 0x5c},
|
||
|
[S390_VEC_OP_VCEQ] = {0xe7, 0xf8},
|
||
|
[S390_VEC_OP_VTM] = {0xe7, 0xd8},
|
||
|
@@ -2627,7 +2626,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
|
||
|
|
||
|
case S390_VEC_OP_VPKS:
|
||
|
case S390_VEC_OP_VPKLS:
|
||
|
- case S390_VEC_OP_VFENE:
|
||
|
case S390_VEC_OP_VCEQ:
|
||
|
case S390_VEC_OP_VGFM:
|
||
|
case S390_VEC_OP_VCH:
|
||
|
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
|
||
|
index 26a947813..c8dc3ec18 100644
|
||
|
--- a/VEX/priv/guest_s390_toIR.c
|
||
|
+++ b/VEX/priv/guest_s390_toIR.c
|
||
|
@@ -17361,120 +17361,86 @@ s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
|
||
|
static const HChar *
|
||
|
s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
|
||
|
{
|
||
|
- const Bool negateComparison = True;
|
||
|
- const IRType type = s390_vr_get_type(m4);
|
||
|
+ s390_insn_assert("vfene", m4 < 3 && m5 == (m5 & 3));
|
||
|
|
||
|
- /* Check for specification exception */
|
||
|
- vassert(m4 < 3);
|
||
|
- vassert((m5 & 0b1100) == 0);
|
||
|
-
|
||
|
- static const IROp elementGetters[] = {
|
||
|
- Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4
|
||
|
+ static const IROp compare_op[3] = {
|
||
|
+ Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4
|
||
|
};
|
||
|
- IROp getter = elementGetters[m4];
|
||
|
-
|
||
|
- static const IROp elementComparators[] = {
|
||
|
- Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32
|
||
|
+ static const IROp abs_op[3] = {
|
||
|
+ Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4
|
||
|
};
|
||
|
- IROp comparator = elementComparators[m4];
|
||
|
-
|
||
|
- static const IROp resultConverter[] = {Iop_64to8, Iop_64to16, Iop_64to32};
|
||
|
- IROp converter = resultConverter[m4];
|
||
|
-
|
||
|
- IRTemp isZeroElem;
|
||
|
-
|
||
|
- IRTemp counter = newTemp(Ity_I64);
|
||
|
- assign(counter, get_counter_dw0());
|
||
|
-
|
||
|
- IRTemp arg1 = newTemp(type);
|
||
|
- assign(arg1, binop(getter, get_vr_qw(v2), unop(Iop_64to8, mkexpr(counter))));
|
||
|
- IRTemp arg2 = newTemp(type);
|
||
|
- assign(arg2, binop(getter, get_vr_qw(v3), unop(Iop_64to8, mkexpr(counter))));
|
||
|
+ IRTemp op2 = newTemp(Ity_V128);
|
||
|
+ IRTemp op3 = newTemp(Ity_V128);
|
||
|
+ IRTemp op2zero = newTemp(Ity_V128);
|
||
|
+ IRTemp diff = newTemp(Ity_V128);
|
||
|
+ IRTemp diff0 = newTemp(Ity_I64);
|
||
|
+ IRTemp neq0 = newTemp(Ity_I1);
|
||
|
+ IRTemp samebits = newTemp(Ity_I64);
|
||
|
+ IRExpr* tmp;
|
||
|
|
||
|
- IRTemp isGoodPair = newTemp(Ity_I1);
|
||
|
- if(negateComparison) {
|
||
|
- assign(isGoodPair, unop(Iop_Not1, binop(comparator, mkexpr(arg1),
|
||
|
- mkexpr(arg2))));
|
||
|
- } else {
|
||
|
- assign(isGoodPair, binop(comparator, mkexpr(arg1), mkexpr(arg2)));
|
||
|
- }
|
||
|
+ assign(op2, get_vr_qw(v2));
|
||
|
+ assign(op3, get_vr_qw(v3));
|
||
|
|
||
|
- if(s390_vr_is_zs_set(m5)) {
|
||
|
- isZeroElem = newTemp(Ity_I1);
|
||
|
- assign(isZeroElem, binop(comparator, mkexpr(arg1),
|
||
|
- unop(converter, mkU64(0))));
|
||
|
+ tmp = mkV128(0);
|
||
|
+ if (s390_vr_is_zs_set(m5)) {
|
||
|
+ tmp = binop(compare_op[m4], mkexpr(op2), tmp);
|
||
|
+ if (s390_vr_is_cs_set(m5) && v3 != v2) {
|
||
|
+ /* Count leading equal bits in the terminating element too */
|
||
|
+ tmp = unop(abs_op[m4], tmp);
|
||
|
+ }
|
||
|
+ assign(op2zero, tmp);
|
||
|
+ tmp = mkexpr(op2zero);
|
||
|
}
|
||
|
-
|
||
|
- static const UChar invalidIndices[] = {16, 8, 4};
|
||
|
- const UChar invalidIndex = invalidIndices[m4];
|
||
|
- IRTemp endOfVectorIsReached = newTemp(Ity_I1);
|
||
|
- assign(endOfVectorIsReached, binop(Iop_CmpEQ64, mkexpr(counter),
|
||
|
- mkU64(invalidIndex)));
|
||
|
-
|
||
|
- put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
|
||
|
- IRExpr* shouldBreak = binop(Iop_Or32,
|
||
|
- unop(Iop_1Uto32, mkexpr(isGoodPair)),
|
||
|
- unop(Iop_1Uto32, mkexpr(endOfVectorIsReached))
|
||
|
- );
|
||
|
- if(s390_vr_is_zs_set(m5)) {
|
||
|
- shouldBreak = binop(Iop_Or32,
|
||
|
- shouldBreak,
|
||
|
- unop(Iop_1Uto32, mkexpr(isZeroElem)));
|
||
|
- }
|
||
|
- iterate_if(binop(Iop_CmpEQ32, shouldBreak, mkU32(0)));
|
||
|
-
|
||
|
- IRExpr* foundIndex = binop(Iop_Sub64, get_counter_dw0(), mkU64(1));
|
||
|
- if(m4 > 0) {
|
||
|
- /* We should return index of byte but we found index of element in
|
||
|
- general case.
|
||
|
- if byte elem (m4 == 0) then indexOfByte = indexOfElement
|
||
|
- if halfword elem (m4 == 1) then indexOfByte = 2 * indexOfElement
|
||
|
- = indexOfElement << 1
|
||
|
- if word elem (m4 == 2) then indexOfByte = 4 * indexOfElement
|
||
|
- = indexOfElement << 2
|
||
|
- */
|
||
|
- foundIndex = binop(Iop_Shl64, foundIndex, mkU8(m4));
|
||
|
+ if (v3 != v2) {
|
||
|
+ tmp = binop(Iop_XorV128, mkexpr(op2), mkexpr(op3));
|
||
|
+ if (s390_vr_is_zs_set(m5))
|
||
|
+ tmp = binop(Iop_OrV128, tmp, mkexpr(op2zero));
|
||
|
}
|
||
|
|
||
|
- IRTemp result = newTemp(Ity_I64);
|
||
|
- assign(result, mkite(mkexpr(endOfVectorIsReached),
|
||
|
- mkU64(16),
|
||
|
- foundIndex));
|
||
|
- put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0)));
|
||
|
+ assign(diff, tmp);
|
||
|
+ assign(diff0, unop(Iop_V128HIto64, mkexpr(diff)));
|
||
|
+ assign(neq0, binop(Iop_ExpCmpNE64, mkexpr(diff0), mkU64(0)));
|
||
|
+ assign(samebits, unop(Iop_ClzNat64,
|
||
|
+ mkite(mkexpr(neq0), mkexpr(diff0),
|
||
|
+ unop(Iop_V128to64, mkexpr(diff)))));
|
||
|
|
||
|
+ /* Determine the byte size of the initial equal-elements sequence */
|
||
|
+ tmp = binop(Iop_Shr64, mkexpr(samebits), mkU8(m4 + 3));
|
||
|
+ if (m4 != 0)
|
||
|
+ tmp = binop(Iop_Shl64, tmp, mkU8(m4));
|
||
|
+ tmp = binop(Iop_Add64, tmp, mkite(mkexpr(neq0), mkU64(0), mkU64(8)));
|
||
|
+ put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0)));
|
||
|
|
||
|
if (s390_vr_is_cs_set(m5)) {
|
||
|
- static const IROp to64Converters[] = {Iop_8Uto64, Iop_16Uto64, Iop_32Uto64};
|
||
|
- IROp to64Converter = to64Converters[m4];
|
||
|
-
|
||
|
- IRExpr* arg1IsLessThanArg2 = binop(Iop_CmpLT64U,
|
||
|
- unop(to64Converter, mkexpr(arg1)),
|
||
|
- unop(to64Converter, mkexpr(arg2)));
|
||
|
-
|
||
|
- IRExpr* ccexp = mkite(binop(Iop_CmpEQ32,
|
||
|
- unop(Iop_1Uto32, mkexpr(isGoodPair)),
|
||
|
- mkU32(1)),
|
||
|
- mkite(arg1IsLessThanArg2, mkU64(1), mkU64(2)),
|
||
|
- mkU64(3));
|
||
|
-
|
||
|
- if(s390_vr_is_zs_set(m5)) {
|
||
|
- IRExpr* arg2IsZero = binop(comparator, mkexpr(arg2),
|
||
|
- unop(converter, mkU64(0)));
|
||
|
- IRExpr* bothArgsAreZero = binop(Iop_And32,
|
||
|
- unop(Iop_1Uto32, mkexpr(isZeroElem)),
|
||
|
- unop(Iop_1Uto32, arg2IsZero));
|
||
|
- ccexp = mkite(binop(Iop_CmpEQ32, bothArgsAreZero, mkU32(1)),
|
||
|
- mkU64(0),
|
||
|
- ccexp);
|
||
|
- }
|
||
|
+ /* Set condition code like follows --
|
||
|
+ 0: operands equal up to and including zero element
|
||
|
+ 1: op2 < op3 2: op2 > op3 3: op2 = op3 */
|
||
|
IRTemp cc = newTemp(Ity_I64);
|
||
|
- assign(cc, ccexp);
|
||
|
-
|
||
|
+ if (v3 == v2) {
|
||
|
+ tmp = mkU64(0);
|
||
|
+ } else {
|
||
|
+ IRTemp shift = newTemp(Ity_I8);
|
||
|
+ IRExpr* op2half = mkite(mkexpr(neq0),
|
||
|
+ unop(Iop_V128HIto64, mkexpr(op2)),
|
||
|
+ unop(Iop_V128to64, mkexpr(op2)));
|
||
|
+ IRExpr* op3half = mkite(mkexpr(neq0),
|
||
|
+ unop(Iop_V128HIto64, mkexpr(op3)),
|
||
|
+ unop(Iop_V128to64, mkexpr(op3)));
|
||
|
+ assign(shift, unop(Iop_64to8,
|
||
|
+ binop(Iop_Sub64, mkU64(63), mkexpr(samebits))));
|
||
|
+ tmp = binop(Iop_Or64,
|
||
|
+ binop(Iop_Shl64,
|
||
|
+ binop(Iop_And64, mkU64(1),
|
||
|
+ binop(Iop_Shr64, op2half, mkexpr(shift))),
|
||
|
+ mkU8(1)),
|
||
|
+ binop(Iop_And64, mkU64(1),
|
||
|
+ binop(Iop_Shr64, op3half, mkexpr(shift))));
|
||
|
+ }
|
||
|
+ assign(cc, mkite(binop(Iop_CmpEQ64, mkexpr(samebits), mkU64(64)),
|
||
|
+ mkU64(3), tmp));
|
||
|
s390_cc_set(cc);
|
||
|
}
|
||
|
-
|
||
|
-
|
||
|
- put_counter_dw0(mkU64(0));
|
||
|
+ dis_res->hint = Dis_HintVerbose;
|
||
|
return "vfene";
|
||
|
}
|
||
|
|
||
|
|
||
|
commit 9bd78ebd8bb5cd4ebb3f081ceba46836cc485551
|
||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||
|
Date: Tue Apr 27 20:13:26 2021 +0200
|
||
|
|
||
|
Bug 434296 - s390x: Rework IR conversion of VISTR
|
||
|
|
||
|
The z/Architecture instruction VISTR is currently transformed to a dirty
|
||
|
helper that executes the instruction. This can cause false positives with
|
||
|
memcheck if the input string contains undefined characters after the
|
||
|
string terminator. Implement without a dirty helper and emulate the
|
||
|
instruction instead.
|
||
|
|
||
|
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
|
||
|
index caec3108e..24f3798c1 100644
|
||
|
--- a/VEX/priv/guest_s390_defs.h
|
||
|
+++ b/VEX/priv/guest_s390_defs.h
|
||
|
@@ -265,7 +265,6 @@ typedef enum {
|
||
|
S390_VEC_OP_INVALID = 0,
|
||
|
S390_VEC_OP_VPKS,
|
||
|
S390_VEC_OP_VPKLS,
|
||
|
- S390_VEC_OP_VISTR,
|
||
|
S390_VEC_OP_VCEQ,
|
||
|
S390_VEC_OP_VTM,
|
||
|
S390_VEC_OP_VGFM,
|
||
|
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
|
||
|
index 2188ce5c1..1e04f601a 100644
|
||
|
--- a/VEX/priv/guest_s390_helpers.c
|
||
|
+++ b/VEX/priv/guest_s390_helpers.c
|
||
|
@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
|
||
|
{0x00, 0x00}, /* invalid */
|
||
|
[S390_VEC_OP_VPKS] = {0xe7, 0x97},
|
||
|
[S390_VEC_OP_VPKLS] = {0xe7, 0x95},
|
||
|
- [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
|
||
|
[S390_VEC_OP_VCEQ] = {0xe7, 0xf8},
|
||
|
[S390_VEC_OP_VTM] = {0xe7, 0xd8},
|
||
|
[S390_VEC_OP_VGFM] = {0xe7, 0xb4},
|
||
|
@@ -2610,14 +2609,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
|
||
|
the_insn.VRR.op2 = opcodes[d->op][1];
|
||
|
|
||
|
switch(d->op) {
|
||
|
- case S390_VEC_OP_VISTR:
|
||
|
- the_insn.VRR.v1 = 1;
|
||
|
- the_insn.VRR.v2 = 2;
|
||
|
- the_insn.VRR.rxb = 0b1100;
|
||
|
- the_insn.VRR.m4 = d->m4;
|
||
|
- the_insn.VRR.m5 = d->m5;
|
||
|
- break;
|
||
|
-
|
||
|
case S390_VEC_OP_VTM:
|
||
|
the_insn.VRR.v1 = 2;
|
||
|
the_insn.VRR.v2 = 3;
|
||
|
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
|
||
|
index c8dc3ec18..dfea54259 100644
|
||
|
--- a/VEX/priv/guest_s390_toIR.c
|
||
|
+++ b/VEX/priv/guest_s390_toIR.c
|
||
|
@@ -17447,40 +17447,34 @@ s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
|
||
|
static const HChar *
|
||
|
s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5)
|
||
|
{
|
||
|
- IRDirty* d;
|
||
|
- IRTemp cc = newTemp(Ity_I64);
|
||
|
-
|
||
|
- /* Check for specification exception */
|
||
|
- vassert(m3 < 3);
|
||
|
- vassert((m5 & 0b1110) == 0);
|
||
|
+ s390_insn_assert("vistr", m3 < 3 && m5 == (m5 & 1));
|
||
|
|
||
|
- s390x_vec_op_details_t details = { .serialized = 0ULL };
|
||
|
- details.op = S390_VEC_OP_VISTR;
|
||
|
- details.v1 = v1;
|
||
|
- details.v2 = v2;
|
||
|
- details.m4 = m3;
|
||
|
- details.m5 = m5;
|
||
|
-
|
||
|
- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
|
||
|
- &s390x_dirtyhelper_vec_op,
|
||
|
- mkIRExprVec_2(IRExpr_GSPTR(),
|
||
|
- mkU64(details.serialized)));
|
||
|
+ static const IROp compare_op[3] = {
|
||
|
+ Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4
|
||
|
+ };
|
||
|
+ IRExpr* t;
|
||
|
+ IRTemp op2 = newTemp(Ity_V128);
|
||
|
+ IRTemp op2term = newTemp(Ity_V128);
|
||
|
+ IRTemp mask = newTemp(Ity_V128);
|
||
|
|
||
|
- d->nFxState = 2;
|
||
|
- vex_bzero(&d->fxState, sizeof(d->fxState));
|
||
|
- d->fxState[0].fx = Ifx_Read;
|
||
|
- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
|
||
|
- d->fxState[0].size = sizeof(V128);
|
||
|
- d->fxState[1].fx = Ifx_Write;
|
||
|
- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
|
||
|
- d->fxState[1].size = sizeof(V128);
|
||
|
+ assign(op2, get_vr_qw(v2));
|
||
|
+ assign(op2term, binop(compare_op[m3], mkexpr(op2), mkV128(0)));
|
||
|
+ t = mkexpr(op2term);
|
||
|
|
||
|
- stmt(IRStmt_Dirty(d));
|
||
|
+ for (UChar i = m3; i < 4; i++) {
|
||
|
+ IRTemp s = newTemp(Ity_V128);
|
||
|
+ assign(s, binop(Iop_OrV128, t, binop(Iop_ShrV128, t, mkU8(8 << i))));
|
||
|
+ t = mkexpr(s);
|
||
|
+ }
|
||
|
+ assign(mask, unop(Iop_NotV128, t));
|
||
|
+ put_vr_qw(v1, binop(Iop_AndV128, mkexpr(op2), mkexpr(mask)));
|
||
|
|
||
|
if (s390_vr_is_cs_set(m5)) {
|
||
|
+ IRTemp cc = newTemp(Ity_I64);
|
||
|
+ assign(cc, binop(Iop_And64, mkU64(3), unop(Iop_V128to64, mkexpr(mask))));
|
||
|
s390_cc_set(cc);
|
||
|
}
|
||
|
-
|
||
|
+ dis_res->hint = Dis_HintVerbose;
|
||
|
return "vistr";
|
||
|
}
|
||
|
|
||
|
|
||
|
commit 32312d588b77c5b5b5a0145bb0cc6f795b447790
|
||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||
|
Date: Fri Apr 16 12:44:44 2021 +0200
|
||
|
|
||
|
Bug 434296 - s390x: Add memcheck test cases for vector string insns
|
||
|
|
||
|
Bug 434296 addresses memcheck false positives with the vector string
|
||
|
instructions VISTR, VSTRC, VFAE, VFEE, and VFENE. Add test cases that
|
||
|
verify the fix for that bug. Without the fix, memcheck yields many
|
||
|
complains with these tests, most of which are false positives.
|
||
|
|
||
|
diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am
|
||
|
index e4e69eb38..d183841ef 100644
|
||
|
--- a/memcheck/tests/s390x/Makefile.am
|
||
|
+++ b/memcheck/tests/s390x/Makefile.am
|
||
|
@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
|
||
|
|
||
|
dist_noinst_SCRIPTS = filter_stderr
|
||
|
|
||
|
-INSN_TESTS = cdsg cu21 cu42 ltgjhe
|
||
|
+INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr
|
||
|
|
||
|
check_PROGRAMS = $(INSN_TESTS)
|
||
|
|
||
|
@@ -14,3 +14,7 @@ EXTRA_DIST = \
|
||
|
AM_CFLAGS += @FLAG_M64@
|
||
|
AM_CXXFLAGS += @FLAG_M64@
|
||
|
AM_CCASFLAGS += @FLAG_M64@
|
||
|
+
|
||
|
+vstrc_CFLAGS = $(AM_CFLAGS) -march=z13
|
||
|
+vfae_CFLAGS = $(AM_CFLAGS) -march=z13
|
||
|
+vistr_CFLAGS = $(AM_CFLAGS) -march=z13
|
||
|
diff --git a/memcheck/tests/s390x/vfae.c b/memcheck/tests/s390x/vfae.c
|
||
|
new file mode 100644
|
||
|
index 000000000..68781e7fb
|
||
|
--- /dev/null
|
||
|
+++ b/memcheck/tests/s390x/vfae.c
|
||
|
@@ -0,0 +1,72 @@
|
||
|
+#include <stdio.h>
|
||
|
+#include <string.h>
|
||
|
+
|
||
|
+#define VECTOR __attribute__ ((vector_size (16)))
|
||
|
+
|
||
|
+typedef char VECTOR char_v;
|
||
|
+
|
||
|
+volatile char tmp;
|
||
|
+static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV";
|
||
|
+
|
||
|
+static char_v to_char_vec(const char *str)
|
||
|
+{
|
||
|
+ char_v v;
|
||
|
+ char buf[17];
|
||
|
+ int len = strlen(str);
|
||
|
+
|
||
|
+ memcpy(buf, str, (len && str[len - 1] == '~') ? len - 1 : len + 1);
|
||
|
+ v = *(char_v *) buf;
|
||
|
+ return v;
|
||
|
+}
|
||
|
+
|
||
|
+#define GENERATE_TEST(mnem) \
|
||
|
+static void test_ ## mnem ## _char(const char *str, const char *match, \
|
||
|
+ int expect_res, int expect_cc) \
|
||
|
+{ \
|
||
|
+ int cc; \
|
||
|
+ char_v v1; \
|
||
|
+ char_v v2 = to_char_vec(str); \
|
||
|
+ char_v v3 = to_char_vec(match); \
|
||
|
+ \
|
||
|
+ __asm__( \
|
||
|
+ "cr 0,0\n\t" /* Clear CC */ \
|
||
|
+ #mnem " %[v1],%[v2],%[v3],0,3\n\t" \
|
||
|
+ "ipm %[cc]\n\t" \
|
||
|
+ "srl %[cc],28" \
|
||
|
+ : [v1] "=v" (v1), \
|
||
|
+ [cc] "=d" (cc) \
|
||
|
+ : [v2] "v" (v2), \
|
||
|
+ [v3] "v" (v3) \
|
||
|
+ : "cc"); \
|
||
|
+ \
|
||
|
+ tmp = hex_digit[v1[7] & 0x1f]; \
|
||
|
+ if (expect_res >= 0 && v1[7] != expect_res) \
|
||
|
+ printf("result %u != %d\n", v1[7], expect_res); \
|
||
|
+ \
|
||
|
+ tmp = hex_digit[cc & 0xf]; \
|
||
|
+ if (expect_cc >= 0 && cc != expect_cc) \
|
||
|
+ printf("CC %d != %d\n", cc, expect_cc); \
|
||
|
+}
|
||
|
+
|
||
|
+GENERATE_TEST(vfae)
|
||
|
+
|
||
|
+GENERATE_TEST(vfee)
|
||
|
+
|
||
|
+GENERATE_TEST(vfene)
|
||
|
+
|
||
|
+int main()
|
||
|
+{
|
||
|
+ test_vfae_char("not found", "................", 9, 0);
|
||
|
+ test_vfae_char("xy", "zzzzzzzzyyyyyyyy", 1, 2);
|
||
|
+ test_vfae_char("incomplete~", "xxxxxxxxxxxxxxxx", -1, -1);
|
||
|
+
|
||
|
+ test_vfee_char("same char here", "..........here", 10, 2);
|
||
|
+ test_vfee_char("and here too ...", "_________t~", 9, 1);
|
||
|
+ test_vfee_char("equality!~", "========!!~", 8, -1);
|
||
|
+
|
||
|
+ test_vfene_char("strings equal", "strings equal", 13, 0);
|
||
|
+ test_vfene_char(hex_digit, hex_digit, 16, 3);
|
||
|
+ test_vfene_char("undef~", "undefined", -1, -1);
|
||
|
+ test_vfene_char("active~", "actually ok", 3, 1);
|
||
|
+ return 0;
|
||
|
+}
|
||
|
diff --git a/memcheck/tests/s390x/vfae.stderr.exp b/memcheck/tests/s390x/vfae.stderr.exp
|
||
|
new file mode 100644
|
||
|
index 000000000..8aad3c87f
|
||
|
--- /dev/null
|
||
|
+++ b/memcheck/tests/s390x/vfae.stderr.exp
|
||
|
@@ -0,0 +1,20 @@
|
||
|
+Use of uninitialised value of size 8
|
||
|
+ at 0x........: test_vfae_char (vfae.c:51)
|
||
|
+ by 0x........: main (vfae.c:61)
|
||
|
+
|
||
|
+Use of uninitialised value of size 8
|
||
|
+ at 0x........: test_vfae_char (vfae.c:51)
|
||
|
+ by 0x........: main (vfae.c:61)
|
||
|
+
|
||
|
+Use of uninitialised value of size 8
|
||
|
+ at 0x........: test_vfee_char (vfae.c:53)
|
||
|
+ by 0x........: main (vfae.c:65)
|
||
|
+
|
||
|
+Use of uninitialised value of size 8
|
||
|
+ at 0x........: test_vfene_char (vfae.c:55)
|
||
|
+ by 0x........: main (vfae.c:69)
|
||
|
+
|
||
|
+Use of uninitialised value of size 8
|
||
|
+ at 0x........: test_vfene_char (vfae.c:55)
|
||
|
+ by 0x........: main (vfae.c:69)
|
||
|
+
|
||
|
diff --git a/memcheck/tests/s390x/vfae.stdout.exp b/memcheck/tests/s390x/vfae.stdout.exp
|
||
|
new file mode 100644
|
||
|
index 000000000..e69de29bb
|
||
|
diff --git a/memcheck/tests/s390x/vfae.vgtest b/memcheck/tests/s390x/vfae.vgtest
|
||
|
new file mode 100644
|
||
|
index 000000000..ae36c22fe
|
||
|
--- /dev/null
|
||
|
+++ b/memcheck/tests/s390x/vfae.vgtest
|
||
|
@@ -0,0 +1,2 @@
|
||
|
+prog: vfae
|
||
|
+vgopts: -q
|
||
|
diff --git a/memcheck/tests/s390x/vistr.c b/memcheck/tests/s390x/vistr.c
|
||
|
new file mode 100644
|
||
|
index 000000000..7ed59b94b
|
||
|
--- /dev/null
|
||
|
+++ b/memcheck/tests/s390x/vistr.c
|
||
|
@@ -0,0 +1,76 @@
|
||
|
+#include <stdio.h>
|
||
|
+#include <string.h>
|
||
|
+
|
||
|
+#define VECTOR __attribute__ ((vector_size (16)))
|
||
|
+
|
||
|
+typedef char VECTOR char_v;
|
||
|
+
|
||
|
+volatile char tmp;
|
||
|
+static const char *hex_digit = "0123456789abcdef";
|
||
|
+
|
||
|
+static char_v to_char_vec(const char *str, char_v *maskp)
|
||
|
+{
|
||
|
+ char buf[17];
|
||
|
+ char_v v;
|
||
|
+ char_v mask = {0};
|
||
|
+
|
||
|
+ for (int i = 0; i < sizeof(buf); i++) {
|
||
|
+ char ch = str[i];
|
||
|
+ if (ch == '\0')
|
||
|
+ break;
|
||
|
+ else if (ch == '$') {
|
||
|
+ buf[i] = '\0';
|
||
|
+ mask[i] = -1;
|
||
|
+ } else if (ch != '~') {
|
||
|
+ buf[i] = ch;
|
||
|
+ mask[i] = -1;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ v = *(char_v *) buf;
|
||
|
+ *maskp = mask;
|
||
|
+ return v;
|
||
|
+}
|
||
|
+
|
||
|
+static void test_vistr_char(const char *str, const char *expect_res,
|
||
|
+ int expect_cc)
|
||
|
+{
|
||
|
+ int cc, count;
|
||
|
+ char_v v1, mask;
|
||
|
+ char_v v2 = to_char_vec(str, &mask);
|
||
|
+ char_v exp_v1 = to_char_vec(expect_res, &mask);
|
||
|
+ char equal[16];
|
||
|
+
|
||
|
+ __asm__(
|
||
|
+ "cr 0,0\n\t" /* Clear CC */
|
||
|
+ "vistr %[v1],%[v2],0,1\n\t"
|
||
|
+ "ipm %[cc]\n\t"
|
||
|
+ "srl %[cc],28"
|
||
|
+ : [v1] "=v" (v1),
|
||
|
+ [cc] "=d" (cc)
|
||
|
+ : [v2] "v" (v2)
|
||
|
+ : "cc");
|
||
|
+
|
||
|
+ *(char_v *) equal = (v1 & mask) == (exp_v1 & mask);
|
||
|
+ if (memchr(equal, 0, sizeof(equal)))
|
||
|
+ printf("Result doesn't match `%s'\n", expect_res);
|
||
|
+
|
||
|
+ count = 0;
|
||
|
+ for (int i = 0; i < 16; i++) {
|
||
|
+ if (v1[i] == 0) count++;
|
||
|
+ }
|
||
|
+ tmp = hex_digit[count];
|
||
|
+
|
||
|
+ tmp = hex_digit[cc & 0xf];
|
||
|
+ if (expect_cc >= 0 && cc != expect_cc)
|
||
|
+ printf("CC %d != %d\n", cc, expect_cc);
|
||
|
+}
|
||
|
+
|
||
|
+int main()
|
||
|
+{
|
||
|
+ test_vistr_char("terminated$====~", "terminated$$$$$$", 0);
|
||
|
+ test_vistr_char("undef~~~~~~~~~~~", "undef", -1);
|
||
|
+ test_vistr_char("undef, 2nd half~", "undef, 2nd half", -1);
|
||
|
+ test_vistr_char("Not. Terminated.", "Not. Terminated.", 3);
|
||
|
+ test_vistr_char("partiallyOK~~$~~", "partiallyOK~~$$$", 0);
|
||
|
+ return 0;
|
||
|
+}
|
||
|
diff --git a/memcheck/tests/s390x/vistr.stderr.exp b/memcheck/tests/s390x/vistr.stderr.exp
|
||
|
new file mode 100644
|
||
|
index 000000000..e4f35fd74
|
||
|
--- /dev/null
|
||
|
+++ b/memcheck/tests/s390x/vistr.stderr.exp
|
||
|
@@ -0,0 +1,20 @@
|
||
|
+Conditional jump or move depends on uninitialised value(s)
|
||
|
+ at 0x........: test_vistr_char (vistr.c:59)
|
||
|
+ by 0x........: main (vistr.c:71)
|
||
|
+
|
||
|
+Use of uninitialised value of size 8
|
||
|
+ at 0x........: test_vistr_char (vistr.c:63)
|
||
|
+ by 0x........: main (vistr.c:71)
|
||
|
+
|
||
|
+Conditional jump or move depends on uninitialised value(s)
|
||
|
+ at 0x........: test_vistr_char (vistr.c:59)
|
||
|
+ by 0x........: main (vistr.c:72)
|
||
|
+
|
||
|
+Use of uninitialised value of size 8
|
||
|
+ at 0x........: test_vistr_char (vistr.c:63)
|
||
|
+ by 0x........: main (vistr.c:72)
|
||
|
+
|
||
|
+Conditional jump or move depends on uninitialised value(s)
|
||
|
+ at 0x........: test_vistr_char (vistr.c:59)
|
||
|
+ by 0x........: main (vistr.c:74)
|
||
|
+
|
||
|
diff --git a/memcheck/tests/s390x/vistr.vgtest b/memcheck/tests/s390x/vistr.vgtest
|
||
|
new file mode 100644
|
||
|
index 000000000..f99749d85
|
||
|
--- /dev/null
|
||
|
+++ b/memcheck/tests/s390x/vistr.vgtest
|
||
|
@@ -0,0 +1,2 @@
|
||
|
+prog: vistr
|
||
|
+vgopts: -q
|
||
|
diff --git a/memcheck/tests/s390x/vstrc.c b/memcheck/tests/s390x/vstrc.c
|
||
|
new file mode 100644
|
||
|
index 000000000..268e2f858
|
||
|
--- /dev/null
|
||
|
+++ b/memcheck/tests/s390x/vstrc.c
|
||
|
@@ -0,0 +1,92 @@
|
||
|
+#include <stdio.h>
|
||
|
+#include <string.h>
|
||
|
+
|
||
|
+#define VECTOR __attribute__ ((vector_size (16)))
|
||
|
+
|
||
|
+typedef char VECTOR char_v;
|
||
|
+
|
||
|
+struct vstrc_char_rng {
|
||
|
+ unsigned char range[16];
|
||
|
+ unsigned char flags[16];
|
||
|
+};
|
||
|
+
|
||
|
+#define RNG_FLAG_EQ 0x80
|
||
|
+#define RNG_FLAG_LT 0x40
|
||
|
+#define RNG_FLAG_GT 0x20
|
||
|
+#define RNG_FLAG_ANY 0xe0
|
||
|
+#define RNG_FLAG_NONE 0x00
|
||
|
+
|
||
|
+volatile char tmp;
|
||
|
+static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV";
|
||
|
+
|
||
|
+static void test_vstrc_char(const char *str, const struct vstrc_char_rng *rng,
|
||
|
+ int expect_res, int expect_cc)
|
||
|
+{
|
||
|
+ int cc;
|
||
|
+ char_v v1;
|
||
|
+ char_v v2 = *(const char_v *) str;
|
||
|
+ char_v v3 = *(const char_v *) rng->range;
|
||
|
+ char_v v4 = *(const char_v *) rng->flags;
|
||
|
+
|
||
|
+ __asm__(
|
||
|
+ "cr 0,0\n\t" /* Clear CC */
|
||
|
+ "vstrc %[v1],%[v2],%[v3],%[v4],0,3\n\t"
|
||
|
+ "ipm %[cc]\n\t"
|
||
|
+ "srl %[cc],28"
|
||
|
+ : [v1] "=v" (v1),
|
||
|
+ [cc] "=d" (cc)
|
||
|
+ : [v2] "v" (v2),
|
||
|
+ [v3] "v" (v3),
|
||
|
+ [v4] "v" (v4)
|
||
|
+ : "cc");
|
||
|
+
|
||
|
+ tmp = hex_digit[v1[7] & 0x1f];
|
||
|
+ if (expect_res >= 0 && v1[7] != expect_res)
|
||
|
+ printf("result %u != %d\n", v1[7], expect_res);
|
||
|
+
|
||
|
+ tmp = hex_digit[cc & 0xf];
|
||
|
+ if (expect_cc >= 0 && cc != expect_cc)
|
||
|
+ printf("CC %d != %d\n", cc, expect_cc);
|
||
|
+}
|
||
|
+
|
||
|
+int main()
|
||
|
+{
|
||
|
+ struct vstrc_char_rng rng;
|
||
|
+ char buf[16];
|
||
|
+
|
||
|
+ memset(rng.flags, RNG_FLAG_NONE, 16);
|
||
|
+
|
||
|
+ rng.range[4] = 'z';
|
||
|
+ rng.flags[4] = RNG_FLAG_GT | RNG_FLAG_EQ;
|
||
|
+ rng.flags[5] = RNG_FLAG_ANY;
|
||
|
+ /* OK: match at the 'z' */
|
||
|
+ test_vstrc_char("find the z", &rng, 9, 2);
|
||
|
+
|
||
|
+ rng.flags[12] = RNG_FLAG_GT | RNG_FLAG_EQ;
|
||
|
+ rng.flags[13] = RNG_FLAG_LT | RNG_FLAG_EQ;
|
||
|
+ /* Bad: undefined range */
|
||
|
+ test_vstrc_char("undefined", &rng, -1, -1);
|
||
|
+
|
||
|
+ rng.range[12] = 'a';
|
||
|
+ rng.range[13] = 'c';
|
||
|
+ /* OK: match at the 'a' */
|
||
|
+ test_vstrc_char("get the abc", &rng, 8, 2);
|
||
|
+
|
||
|
+ rng.flags[12] = RNG_FLAG_LT;
|
||
|
+ rng.flags[13] = RNG_FLAG_GT;
|
||
|
+ /* OK: no match up to null terminator */
|
||
|
+ test_vstrc_char("no match", &rng, 8, 0);
|
||
|
+
|
||
|
+ /* OK: no match, no null terminator */
|
||
|
+ test_vstrc_char("0123456789abcdef", &rng, 16, 3);
|
||
|
+
|
||
|
+ buf[0] = 'x';
|
||
|
+ /* Bad: undefined string */
|
||
|
+ test_vstrc_char(buf, &rng, -1, -1);
|
||
|
+
|
||
|
+ buf[1] = 'z';
|
||
|
+ /* Bad: valid match, but CC undefined */
|
||
|
+ test_vstrc_char(buf, &rng, 1, -1);
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
diff --git a/memcheck/tests/s390x/vstrc.stderr.exp b/memcheck/tests/s390x/vstrc.stderr.exp
|
||
|
new file mode 100644
|
||
|
index 000000000..c1125bea1
|
||
|
--- /dev/null
|
||
|
+++ b/memcheck/tests/s390x/vstrc.stderr.exp
|
||
|
@@ -0,0 +1,20 @@
|
||
|
+Use of uninitialised value of size 8
|
||
|
+ at 0x........: test_vstrc_char (vstrc.c:43)
|
||
|
+ by 0x........: main (vstrc.c:68)
|
||
|
+
|
||
|
+Use of uninitialised value of size 8
|
||
|
+ at 0x........: test_vstrc_char (vstrc.c:47)
|
||
|
+ by 0x........: main (vstrc.c:68)
|
||
|
+
|
||
|
+Use of uninitialised value of size 8
|
||
|
+ at 0x........: test_vstrc_char (vstrc.c:43)
|
||
|
+ by 0x........: main (vstrc.c:85)
|
||
|
+
|
||
|
+Use of uninitialised value of size 8
|
||
|
+ at 0x........: test_vstrc_char (vstrc.c:47)
|
||
|
+ by 0x........: main (vstrc.c:85)
|
||
|
+
|
||
|
+Use of uninitialised value of size 8
|
||
|
+ at 0x........: test_vstrc_char (vstrc.c:47)
|
||
|
+ by 0x........: main (vstrc.c:89)
|
||
|
+
|
||
|
diff --git a/memcheck/tests/s390x/vstrc.stdout.exp b/memcheck/tests/s390x/vstrc.stdout.exp
|
||
|
new file mode 100644
|
||
|
index 000000000..e69de29bb
|
||
|
diff --git a/memcheck/tests/s390x/vstrc.vgtest b/memcheck/tests/s390x/vstrc.vgtest
|
||
|
new file mode 100644
|
||
|
index 000000000..26f5db99b
|
||
|
--- /dev/null
|
||
|
+++ b/memcheck/tests/s390x/vstrc.vgtest
|
||
|
@@ -0,0 +1,2 @@
|
||
|
+prog: vstrc
|
||
|
+vgopts: -q
|
||
|
|
||
|
commit a0bb049ace14ab52d386bb1d49a399f39eec4986
|
||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||
|
Date: Tue Mar 23 14:55:09 2021 +0100
|
||
|
|
||
|
s390x: Improve handling of amodes without base register
|
||
|
|
||
|
Addressing modes without a base or index register represent constants.
|
||
|
They can occur in some special cases such as shift operations and when
|
||
|
accessing individual vector elements. Perform some minor improvements to
|
||
|
the handling of such amodes.
|
||
|
|
||
|
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
|
||
|
index 6e0734ae0..2587f81a1 100644
|
||
|
--- a/VEX/priv/host_s390_defs.c
|
||
|
+++ b/VEX/priv/host_s390_defs.c
|
||
|
@@ -360,7 +360,8 @@ s390_amode_is_sane(const s390_amode *am)
|
||
|
{
|
||
|
switch (am->tag) {
|
||
|
case S390_AMODE_B12:
|
||
|
- return is_virtual_gpr(am->b) && fits_unsigned_12bit(am->d);
|
||
|
+ return (is_virtual_gpr(am->b) || sameHReg(am->b, s390_hreg_gpr(0))) &&
|
||
|
+ fits_unsigned_12bit(am->d);
|
||
|
|
||
|
case S390_AMODE_B20:
|
||
|
return is_virtual_gpr(am->b) && fits_signed_20bit(am->d);
|
||
|
@@ -378,47 +379,31 @@ s390_amode_is_sane(const s390_amode *am)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
+static Bool
|
||
|
+s390_amode_is_constant(const s390_amode *am)
|
||
|
+{
|
||
|
+ return am->tag == S390_AMODE_B12 && sameHReg(am->b, s390_hreg_gpr(0));
|
||
|
+}
|
||
|
+
|
||
|
|
||
|
/* Record the register use of an amode */
|
||
|
static void
|
||
|
s390_amode_get_reg_usage(HRegUsage *u, const s390_amode *am)
|
||
|
{
|
||
|
- switch (am->tag) {
|
||
|
- case S390_AMODE_B12:
|
||
|
- case S390_AMODE_B20:
|
||
|
- addHRegUse(u, HRmRead, am->b);
|
||
|
- return;
|
||
|
-
|
||
|
- case S390_AMODE_BX12:
|
||
|
- case S390_AMODE_BX20:
|
||
|
+ if (!sameHReg(am->b, s390_hreg_gpr(0)))
|
||
|
addHRegUse(u, HRmRead, am->b);
|
||
|
+ if (!sameHReg(am->x, s390_hreg_gpr(0)))
|
||
|
addHRegUse(u, HRmRead, am->x);
|
||
|
- return;
|
||
|
-
|
||
|
- default:
|
||
|
- vpanic("s390_amode_get_reg_usage");
|
||
|
- }
|
||
|
}
|
||
|
|
||
|
|
||
|
static void
|
||
|
s390_amode_map_regs(HRegRemap *m, s390_amode *am)
|
||
|
{
|
||
|
- switch (am->tag) {
|
||
|
- case S390_AMODE_B12:
|
||
|
- case S390_AMODE_B20:
|
||
|
- am->b = lookupHRegRemap(m, am->b);
|
||
|
- return;
|
||
|
-
|
||
|
- case S390_AMODE_BX12:
|
||
|
- case S390_AMODE_BX20:
|
||
|
+ if (!sameHReg(am->b, s390_hreg_gpr(0)))
|
||
|
am->b = lookupHRegRemap(m, am->b);
|
||
|
+ if (!sameHReg(am->x, s390_hreg_gpr(0)))
|
||
|
am->x = lookupHRegRemap(m, am->x);
|
||
|
- return;
|
||
|
-
|
||
|
- default:
|
||
|
- vpanic("s390_amode_map_regs");
|
||
|
- }
|
||
|
}
|
||
|
|
||
|
|
||
|
@@ -653,6 +638,16 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off)
|
||
|
insn->variant.alu.dst, vreg_opnd);
|
||
|
}
|
||
|
|
||
|
+ /* v-vgetelem <reg>,<vreg> */
|
||
|
+ if (insn->tag == S390_INSN_VEC_AMODEOP
|
||
|
+ && insn->variant.vec_amodeop.tag == S390_VEC_GET_ELEM
|
||
|
+ && insn->size == 8
|
||
|
+ && sameHReg(insn->variant.vec_amodeop.op1, vreg)
|
||
|
+ && s390_amode_is_constant(insn->variant.vec_amodeop.op2)) {
|
||
|
+ vreg_am->d += 8 * insn->variant.vec_amodeop.op2->d;
|
||
|
+ return s390_insn_load(insn->size, insn->variant.vec_amodeop.dst, vreg_am);
|
||
|
+ }
|
||
|
+
|
||
|
/* v-<unop> <reg>,<vreg> */
|
||
|
if (insn->tag == S390_INSN_UNOP
|
||
|
&& insn->variant.unop.src.tag == S390_OPND_REG
|
||
|
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
|
||
|
index 5f79280c0..ceca6836e 100644
|
||
|
--- a/VEX/priv/host_s390_isel.c
|
||
|
+++ b/VEX/priv/host_s390_isel.c
|
||
|
@@ -312,7 +312,18 @@ s390_isel_amode_wrk(ISelEnv *env, IRExpr *expr,
|
||
|
Bool no_index __attribute__((unused)),
|
||
|
Bool short_displacement)
|
||
|
{
|
||
|
- if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) {
|
||
|
+ if (expr->tag == Iex_Unop && expr->Iex.Unop.op == Iop_8Uto64 &&
|
||
|
+ expr->Iex.Unop.arg->tag == Iex_Const) {
|
||
|
+ UChar value = expr->Iex.Unop.arg->Iex.Const.con->Ico.U8;
|
||
|
+ return s390_amode_b12((Int)value, s390_hreg_gpr(0));
|
||
|
+
|
||
|
+ } else if (expr->tag == Iex_Const) {
|
||
|
+ ULong value = expr->Iex.Const.con->Ico.U64;
|
||
|
+ if (ulong_fits_unsigned_12bit(value)) {
|
||
|
+ return s390_amode_b12((Int)value, s390_hreg_gpr(0));
|
||
|
+ }
|
||
|
+
|
||
|
+ } else if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) {
|
||
|
IRExpr *arg1 = expr->Iex.Binop.arg1;
|
||
|
IRExpr *arg2 = expr->Iex.Binop.arg2;
|
||
|
|
||
|
|
||
|
commit fd935e238d907d9c523a311ba795077d95ad6912
|
||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||
|
Date: Fri Mar 26 19:27:47 2021 +0100
|
||
|
|
||
|
s390x: Rework insn "v-vdup" and add "v-vrep"
|
||
|
|
||
|
So far the only s390x insn for filling a vector with copies of the same
|
||
|
element is "v-vdup" (S390_VEC_DUPLICATE), which replicates the first
|
||
|
element of its vector argument. This is fairly restrictive and can lead
|
||
|
to unnecessarily long code sequences.
|
||
|
|
||
|
Redefine "v-vdup" to replicate any scalar value instead. And add
|
||
|
"v-vrep" (S390_INSN_VEC_REPLICATE) for replicating any given element of a
|
||
|
vector. Select the latter for suitable expressions like
|
||
|
|
||
|
Iop_Dup8x16(Iop_GetElem8x16(vector_expr, i))
|
||
|
|
||
|
This improves the generated code for some vector string instructions,
|
||
|
where a lot of element replications are performed.
|
||
|
|
||
|
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
|
||
|
index 2587f81a1..c764d6ef9 100644
|
||
|
--- a/VEX/priv/host_s390_defs.c
|
||
|
+++ b/VEX/priv/host_s390_defs.c
|
||
|
@@ -670,6 +670,14 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off)
|
||
|
insn->variant.unop.dst, vreg_opnd);
|
||
|
}
|
||
|
|
||
|
+ /* v-vrep <reg>,<vreg>,<idx> */
|
||
|
+ if (insn->tag == S390_INSN_VEC_REPLICATE
|
||
|
+ && sameHReg(insn->variant.vec_replicate.op1, vreg)) {
|
||
|
+ vreg_am->d += insn->size * insn->variant.vec_replicate.idx;
|
||
|
+ return s390_insn_unop(insn->size, S390_VEC_DUPLICATE,
|
||
|
+ insn->variant.vec_replicate.dst, vreg_opnd);
|
||
|
+ }
|
||
|
+
|
||
|
no_match:
|
||
|
return NULL;
|
||
|
}
|
||
|
@@ -1050,6 +1058,11 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn)
|
||
|
addHRegUse(u, HRmRead, insn->variant.vec_triop.op3);
|
||
|
break;
|
||
|
|
||
|
+ case S390_INSN_VEC_REPLICATE:
|
||
|
+ addHRegUse(u, HRmWrite, insn->variant.vec_replicate.dst);
|
||
|
+ addHRegUse(u, HRmRead, insn->variant.vec_replicate.op1);
|
||
|
+ break;
|
||
|
+
|
||
|
default:
|
||
|
vpanic("s390_insn_get_reg_usage");
|
||
|
}
|
||
|
@@ -1433,6 +1446,14 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn)
|
||
|
insn->variant.vec_triop.op3 =
|
||
|
lookupHRegRemap(m, insn->variant.vec_triop.op3);
|
||
|
break;
|
||
|
+
|
||
|
+ case S390_INSN_VEC_REPLICATE:
|
||
|
+ insn->variant.vec_replicate.dst =
|
||
|
+ lookupHRegRemap(m, insn->variant.vec_replicate.dst);
|
||
|
+ insn->variant.vec_replicate.op1 =
|
||
|
+ lookupHRegRemap(m, insn->variant.vec_replicate.op1);
|
||
|
+ break;
|
||
|
+
|
||
|
default:
|
||
|
vpanic("s390_insn_map_regs");
|
||
|
}
|
||
|
@@ -1767,7 +1788,39 @@ emit_VRI_VI(UChar *p, ULong op, UChar v1, UShort i2)
|
||
|
|
||
|
|
||
|
static UChar *
|
||
|
-emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2)
|
||
|
+emit_VRI_VIM(UChar *p, ULong op, UChar v1, UShort i2, UChar m3)
|
||
|
+{
|
||
|
+ ULong the_insn = op;
|
||
|
+ ULong rxb = s390_update_rxb(0, 1, &v1);
|
||
|
+
|
||
|
+ the_insn |= ((ULong)v1) << 36;
|
||
|
+ the_insn |= ((ULong)i2) << 16;
|
||
|
+ the_insn |= ((ULong)m3) << 12;
|
||
|
+ the_insn |= ((ULong)rxb)<< 8;
|
||
|
+
|
||
|
+ return emit_6bytes(p, the_insn);
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
+static UChar *
|
||
|
+emit_VRI_VVMM(UChar *p, ULong op, UChar v1, UChar v3, UShort i2, UChar m4)
|
||
|
+{
|
||
|
+ ULong the_insn = op;
|
||
|
+ ULong rxb = s390_update_rxb(0, 1, &v1);
|
||
|
+ rxb = s390_update_rxb(rxb, 2, &v3);
|
||
|
+
|
||
|
+ the_insn |= ((ULong)v1) << 36;
|
||
|
+ the_insn |= ((ULong)v3) << 32;
|
||
|
+ the_insn |= ((ULong)i2) << 16;
|
||
|
+ the_insn |= ((ULong)m4) << 12;
|
||
|
+ the_insn |= ((ULong)rxb) << 8;
|
||
|
+
|
||
|
+ return emit_6bytes(p, the_insn);
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
+static UChar *
|
||
|
+emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2, UChar m3)
|
||
|
{
|
||
|
ULong the_insn = op;
|
||
|
ULong rxb = s390_update_rxb(0, 1, &v1);
|
||
|
@@ -1776,6 +1829,7 @@ emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2)
|
||
|
the_insn |= ((ULong)x2) << 32;
|
||
|
the_insn |= ((ULong)b2) << 28;
|
||
|
the_insn |= ((ULong)d2) << 16;
|
||
|
+ the_insn |= ((ULong)m3) << 12;
|
||
|
the_insn |= ((ULong)rxb)<< 8;
|
||
|
|
||
|
return emit_6bytes(p, the_insn);
|
||
|
@@ -5782,7 +5836,7 @@ s390_emit_VL(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2)
|
||
|
if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
|
||
|
s390_disasm(ENC3(MNM, VR, UDXB), "vl", v1, d2, x2, b2);
|
||
|
|
||
|
- return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2);
|
||
|
+ return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2, 0);
|
||
|
}
|
||
|
|
||
|
static UChar *
|
||
|
@@ -5795,13 +5849,23 @@ s390_emit_VLR(UChar *p, UChar v1, UChar v2)
|
||
|
}
|
||
|
|
||
|
|
||
|
+static UChar *
|
||
|
+s390_emit_VLREP(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2, UShort m3)
|
||
|
+{
|
||
|
+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
|
||
|
+ s390_disasm(ENC4(MNM, VR, UDXB, UINT), "vlrep", v1, d2, x2, b2, m3);
|
||
|
+
|
||
|
+ return emit_VRX(p, 0xE70000000005ULL, v1, x2, b2, d2, m3);
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
static UChar *
|
||
|
s390_emit_VST(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2)
|
||
|
{
|
||
|
if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
|
||
|
s390_disasm(ENC3(MNM, VR, UDXB), "vst", v1, d2, x2, b2);
|
||
|
|
||
|
- return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2);
|
||
|
+ return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2, 0);
|
||
|
}
|
||
|
|
||
|
|
||
|
@@ -5912,15 +5976,24 @@ s390_emit_VPKLS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4)
|
||
|
|
||
|
|
||
|
static UChar *
|
||
|
-s390_emit_VREP(UChar *p, UChar v1, UChar v3, UChar m3)
|
||
|
+s390_emit_VREP(UChar *p, UChar v1, UChar v3, UShort i2, UChar m4)
|
||
|
{
|
||
|
if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
|
||
|
- s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, 0, m3);
|
||
|
+ s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, i2, m4);
|
||
|
|
||
|
- return emit_VRR_VVM(p, 0xE7000000004DULL, v1, v3, m3);
|
||
|
+ return emit_VRI_VVMM(p, 0xE7000000004DULL, v1, v3, i2, m4);
|
||
|
}
|
||
|
|
||
|
|
||
|
+static UChar *
|
||
|
+s390_emit_VREPI(UChar *p, UChar v1, UShort i2, UChar m3)
|
||
|
+{
|
||
|
+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
|
||
|
+ s390_disasm(ENC4(MNM, VR, UINT, UINT), "vrepi", v1, i2, m3);
|
||
|
+
|
||
|
+ return emit_VRI_VIM(p, 0xE70000000045ULL, v1, i2, m3);
|
||
|
+}
|
||
|
+
|
||
|
|
||
|
static UChar *
|
||
|
s390_emit_VUPH(UChar *p, UChar v1, UChar v3, UChar m3)
|
||
|
@@ -7560,6 +7633,20 @@ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t tag, HReg dst,
|
||
|
return insn;
|
||
|
}
|
||
|
|
||
|
+s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1,
|
||
|
+ UChar idx)
|
||
|
+{
|
||
|
+ s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn));
|
||
|
+
|
||
|
+ insn->tag = S390_INSN_VEC_REPLICATE;
|
||
|
+ insn->size = size;
|
||
|
+ insn->variant.vec_replicate.dst = dst;
|
||
|
+ insn->variant.vec_replicate.op1 = op1;
|
||
|
+ insn->variant.vec_replicate.idx = idx;
|
||
|
+
|
||
|
+ return insn;
|
||
|
+}
|
||
|
+
|
||
|
/*---------------------------------------------------------------*/
|
||
|
/*--- Debug print ---*/
|
||
|
/*---------------------------------------------------------------*/
|
||
|
@@ -8284,6 +8371,13 @@ s390_insn_as_string(const s390_insn *insn)
|
||
|
insn->variant.vec_triop.op3);
|
||
|
break;
|
||
|
|
||
|
+ case S390_INSN_VEC_REPLICATE:
|
||
|
+ s390_sprintf(buf, "%M %R, %R, %I", "v-vrep",
|
||
|
+ insn->variant.vec_replicate.dst,
|
||
|
+ insn->variant.vec_replicate.op1,
|
||
|
+ insn->variant.vec_replicate.idx);
|
||
|
+ break;
|
||
|
+
|
||
|
default: goto fail;
|
||
|
}
|
||
|
|
||
|
@@ -9386,6 +9480,56 @@ s390_negate_emit(UChar *buf, const s390_insn *insn)
|
||
|
}
|
||
|
|
||
|
|
||
|
+static UChar *
|
||
|
+s390_vec_duplicate_emit(UChar *buf, const s390_insn *insn)
|
||
|
+{
|
||
|
+ UChar v1 = hregNumber(insn->variant.unop.dst);
|
||
|
+ s390_opnd_RMI opnd = insn->variant.unop.src;
|
||
|
+ UChar r2;
|
||
|
+
|
||
|
+ switch (opnd.tag) {
|
||
|
+ case S390_OPND_AMODE: {
|
||
|
+ s390_amode* am = opnd.variant.am;
|
||
|
+ UInt b = hregNumber(am->b);
|
||
|
+ UInt x = hregNumber(am->x);
|
||
|
+ UInt d = am->d;
|
||
|
+
|
||
|
+ if (fits_unsigned_12bit(d)) {
|
||
|
+ return s390_emit_VLREP(buf, v1, x, b, d,
|
||
|
+ s390_getM_from_size(insn->size));
|
||
|
+ }
|
||
|
+ buf = s390_emit_load_mem(buf, insn->size, R0, am);
|
||
|
+ r2 = R0;
|
||
|
+ goto duplicate_from_gpr;
|
||
|
+ }
|
||
|
+
|
||
|
+ case S390_OPND_IMMEDIATE: {
|
||
|
+ ULong val = opnd.variant.imm;
|
||
|
+
|
||
|
+ if (ulong_fits_signed_16bit(val)) {
|
||
|
+ return s390_emit_VREPI(buf, v1, val, s390_getM_from_size(insn->size));
|
||
|
+ }
|
||
|
+ buf = s390_emit_load_64imm(buf, R0, val);
|
||
|
+ r2 = R0;
|
||
|
+ goto duplicate_from_gpr;
|
||
|
+ }
|
||
|
+
|
||
|
+ case S390_OPND_REG:
|
||
|
+ r2 = hregNumber(opnd.variant.reg);
|
||
|
+
|
||
|
+ duplicate_from_gpr:
|
||
|
+ buf = s390_emit_VLVGP(buf, v1, r2, r2);
|
||
|
+ if (insn->size != 8) {
|
||
|
+ buf = s390_emit_VREP(buf, v1, v1, 8 / insn->size - 1,
|
||
|
+ s390_getM_from_size(insn->size));
|
||
|
+ }
|
||
|
+ return buf;
|
||
|
+ }
|
||
|
+
|
||
|
+ vpanic("s390_vec_duplicate_emit");
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
static UChar *
|
||
|
s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
|
||
|
{
|
||
|
@@ -9405,12 +9549,7 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
|
||
|
UShort i2 = insn->variant.unop.src.variant.imm;
|
||
|
return s390_emit_VGBM(buf, v1, i2);
|
||
|
}
|
||
|
- case S390_VEC_DUPLICATE: {
|
||
|
- vassert(insn->variant.unop.src.tag == S390_OPND_REG);
|
||
|
- UChar v1 = hregNumber(insn->variant.unop.dst);
|
||
|
- UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
|
||
|
- return s390_emit_VREP(buf, v1, v2, s390_getM_from_size(insn->size));
|
||
|
- }
|
||
|
+ case S390_VEC_DUPLICATE: return s390_vec_duplicate_emit(buf, insn);
|
||
|
case S390_VEC_UNPACKLOWS: {
|
||
|
vassert(insn->variant.unop.src.tag == S390_OPND_REG);
|
||
|
vassert(insn->size < 8);
|
||
|
@@ -11595,6 +11734,16 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn)
|
||
|
}
|
||
|
|
||
|
|
||
|
+static UChar *
|
||
|
+s390_insn_vec_replicate_emit(UChar *buf, const s390_insn *insn)
|
||
|
+{
|
||
|
+ UChar v1 = hregNumber(insn->variant.vec_replicate.dst);
|
||
|
+ UChar v2 = hregNumber(insn->variant.vec_replicate.op1);
|
||
|
+ UShort idx = (UShort) insn->variant.vec_replicate.idx;
|
||
|
+ return s390_emit_VREP(buf, v1, v2, idx, s390_getM_from_size(insn->size));
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
Int
|
||
|
emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn,
|
||
|
Bool mode64, VexEndness endness_host,
|
||
|
@@ -11791,6 +11940,11 @@ emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn,
|
||
|
case S390_INSN_VEC_TRIOP:
|
||
|
end = s390_insn_vec_triop_emit(buf, insn);
|
||
|
break;
|
||
|
+
|
||
|
+ case S390_INSN_VEC_REPLICATE:
|
||
|
+ end = s390_insn_vec_replicate_emit(buf, insn);
|
||
|
+ break;
|
||
|
+
|
||
|
fail:
|
||
|
default:
|
||
|
vpanic("emit_S390Instr");
|
||
|
diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
|
||
|
index 9b69f4d38..063fd3800 100644
|
||
|
--- a/VEX/priv/host_s390_defs.h
|
||
|
+++ b/VEX/priv/host_s390_defs.h
|
||
|
@@ -166,7 +166,8 @@ typedef enum {
|
||
|
S390_INSN_VEC_AMODEINTOP,
|
||
|
S390_INSN_VEC_UNOP,
|
||
|
S390_INSN_VEC_BINOP,
|
||
|
- S390_INSN_VEC_TRIOP
|
||
|
+ S390_INSN_VEC_TRIOP,
|
||
|
+ S390_INSN_VEC_REPLICATE
|
||
|
} s390_insn_tag;
|
||
|
|
||
|
|
||
|
@@ -738,6 +739,11 @@ typedef struct {
|
||
|
HReg op2; /* 128-bit second operand */
|
||
|
HReg op3; /* 128-bit third operand */
|
||
|
} vec_triop;
|
||
|
+ struct {
|
||
|
+ HReg dst; /* 128-bit result */
|
||
|
+ HReg op1; /* 128-bit first operand */
|
||
|
+ UChar idx; /* index of element to replicate */
|
||
|
+ } vec_replicate;
|
||
|
} variant;
|
||
|
} s390_insn;
|
||
|
|
||
|
@@ -853,6 +859,7 @@ s390_insn *s390_insn_vec_binop(UChar size, s390_vec_binop_t, HReg dst, HReg op1,
|
||
|
HReg op2);
|
||
|
s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t, HReg dst, HReg op1,
|
||
|
HReg op2, HReg op3);
|
||
|
+s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1, UChar idx);
|
||
|
|
||
|
const HChar *s390_insn_as_string(const s390_insn *);
|
||
|
|
||
|
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
|
||
|
index ceca6836e..968122596 100644
|
||
|
--- a/VEX/priv/host_s390_isel.c
|
||
|
+++ b/VEX/priv/host_s390_isel.c
|
||
|
@@ -3778,12 +3778,12 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
|
||
|
}
|
||
|
/* --------- UNARY OP --------- */
|
||
|
case Iex_Unop: {
|
||
|
- UChar size_for_int_arg = 0;
|
||
|
HReg dst = INVALID_HREG;
|
||
|
HReg reg1 = INVALID_HREG;
|
||
|
s390_unop_t vec_unop = S390_UNOP_T_INVALID;
|
||
|
s390_vec_binop_t vec_binop = S390_VEC_BINOP_T_INVALID;
|
||
|
IROp op = expr->Iex.Unop.op;
|
||
|
+ IROp arg_op = Iop_INVALID;
|
||
|
IRExpr* arg = expr->Iex.Unop.arg;
|
||
|
switch(op) {
|
||
|
case Iop_NotV128:
|
||
|
@@ -3839,59 +3839,63 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
|
||
|
}
|
||
|
|
||
|
case Iop_Dup8x16:
|
||
|
- size = size_for_int_arg = 1;
|
||
|
- vec_unop = S390_VEC_DUPLICATE;
|
||
|
- goto Iop_V_int_wrk;
|
||
|
+ size = 1;
|
||
|
+ arg_op = Iop_GetElem8x16;
|
||
|
+ goto Iop_V_dup_wrk;
|
||
|
case Iop_Dup16x8:
|
||
|
- size = size_for_int_arg = 2;
|
||
|
- vec_unop = S390_VEC_DUPLICATE;
|
||
|
- goto Iop_V_int_wrk;
|
||
|
+ size = 2;
|
||
|
+ arg_op = Iop_GetElem16x8;
|
||
|
+ goto Iop_V_dup_wrk;
|
||
|
case Iop_Dup32x4:
|
||
|
- size = size_for_int_arg = 4;
|
||
|
- vec_unop = S390_VEC_DUPLICATE;
|
||
|
- goto Iop_V_int_wrk;
|
||
|
+ size = 4;
|
||
|
+ arg_op = Iop_GetElem32x4;
|
||
|
+ goto Iop_V_dup_wrk;
|
||
|
+
|
||
|
+ Iop_V_dup_wrk: {
|
||
|
+ dst = newVRegV(env);
|
||
|
+ if (arg->tag == Iex_Binop && arg->Iex.Binop.op == arg_op &&
|
||
|
+ arg->Iex.Binop.arg2->tag == Iex_Const) {
|
||
|
+ ULong idx;
|
||
|
+ idx = get_const_value_as_ulong(arg->Iex.Binop.arg2-> Iex.Const.con);
|
||
|
+ reg1 = s390_isel_vec_expr(env, arg->Iex.Binop.arg1);
|
||
|
+ addInstr(env, s390_insn_vec_replicate(size, dst, reg1, (UChar)idx));
|
||
|
+ } else {
|
||
|
+ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg);
|
||
|
+ addInstr(env, s390_insn_unop(size, S390_VEC_DUPLICATE, dst, src));
|
||
|
+ }
|
||
|
+ return dst;
|
||
|
+ }
|
||
|
|
||
|
case Iop_Widen8Sto16x8:
|
||
|
size = 1;
|
||
|
- size_for_int_arg = 8;
|
||
|
vec_unop = S390_VEC_UNPACKLOWS;
|
||
|
- goto Iop_V_int_wrk;
|
||
|
+ goto Iop_V_widen_wrk;
|
||
|
case Iop_Widen16Sto32x4:
|
||
|
size = 2;
|
||
|
- size_for_int_arg = 8;
|
||
|
vec_unop = S390_VEC_UNPACKLOWS;
|
||
|
- goto Iop_V_int_wrk;
|
||
|
+ goto Iop_V_widen_wrk;
|
||
|
case Iop_Widen32Sto64x2:
|
||
|
size = 4;
|
||
|
- size_for_int_arg = 8;
|
||
|
vec_unop = S390_VEC_UNPACKLOWS;
|
||
|
- goto Iop_V_int_wrk;
|
||
|
+ goto Iop_V_widen_wrk;
|
||
|
case Iop_Widen8Uto16x8:
|
||
|
size = 1;
|
||
|
- size_for_int_arg = 8;
|
||
|
vec_unop = S390_VEC_UNPACKLOWU;
|
||
|
- goto Iop_V_int_wrk;
|
||
|
+ goto Iop_V_widen_wrk;
|
||
|
case Iop_Widen16Uto32x4:
|
||
|
size = 2;
|
||
|
- size_for_int_arg = 8;
|
||
|
vec_unop = S390_VEC_UNPACKLOWU;
|
||
|
- goto Iop_V_int_wrk;
|
||
|
+ goto Iop_V_widen_wrk;
|
||
|
case Iop_Widen32Uto64x2:
|
||
|
size = 4;
|
||
|
- size_for_int_arg = 8;
|
||
|
vec_unop = S390_VEC_UNPACKLOWU;
|
||
|
- goto Iop_V_int_wrk;
|
||
|
-
|
||
|
- Iop_V_int_wrk: {
|
||
|
- HReg vr1 = vec_generate_zeroes(env);
|
||
|
- s390_amode* amode2 = s390_isel_amode(env, IRExpr_Const(IRConst_U64(0)));
|
||
|
- reg1 = s390_isel_int_expr(env, arg);
|
||
|
+ goto Iop_V_widen_wrk;
|
||
|
|
||
|
+ Iop_V_widen_wrk: {
|
||
|
vassert(vec_unop != S390_UNOP_T_INVALID);
|
||
|
- addInstr(env,
|
||
|
- s390_insn_vec_amodeintop(size_for_int_arg, S390_VEC_SET_ELEM,
|
||
|
- vr1, amode2, reg1));
|
||
|
-
|
||
|
+ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg);
|
||
|
+ HReg vr1 = newVRegV(env);
|
||
|
+ addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, vr1, src));
|
||
|
dst = newVRegV(env);
|
||
|
addInstr(env, s390_insn_unop(size, vec_unop, dst, s390_opnd_reg(vr1)));
|
||
|
return dst;
|
||
|
|
||
|
commit 6c1cb1a0128b00858b973ef9344e12d6ddbaaf57
|
||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||
|
Date: Thu Mar 25 18:48:07 2021 +0100
|
||
|
|
||
|
s390x: Add support for emitting "vector or with complement"
|
||
|
|
||
|
In the instruction selector, look out for IR expressions that fit "vector
|
||
|
or with complement (VOC)". Emit when applicable.
|
||
|
|
||
|
This slighly reduces the generated code sometimes, such as for certain
|
||
|
vector string instructions, where such expressions occur quite frequently.
|
||
|
|
||
|
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
|
||
|
index c764d6ef9..239d9d299 100644
|
||
|
--- a/VEX/priv/host_s390_defs.c
|
||
|
+++ b/VEX/priv/host_s390_defs.c
|
||
|
@@ -5907,6 +5907,15 @@ s390_emit_VO(UChar *p, UChar v1, UChar v2, UChar v3)
|
||
|
return emit_VRR_VVV(p, 0xE7000000006aULL, v1, v2, v3);
|
||
|
}
|
||
|
|
||
|
+static UChar *
|
||
|
+s390_emit_VOC(UChar *p, UChar v1, UChar v2, UChar v3)
|
||
|
+{
|
||
|
+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
|
||
|
+ s390_disasm(ENC4(MNM, VR, VR, VR), "voc", v1, v2, v3);
|
||
|
+
|
||
|
+ return emit_VRR_VVV(p, 0xE7000000006fULL, v1, v2, v3);
|
||
|
+}
|
||
|
+
|
||
|
static UChar *
|
||
|
s390_emit_VX(UChar *p, UChar v1, UChar v2, UChar v3)
|
||
|
{
|
||
|
@@ -8312,6 +8321,7 @@ s390_insn_as_string(const s390_insn *insn)
|
||
|
case S390_VEC_PACK_SATURU: op = "v-vpacksaturu"; break;
|
||
|
case S390_VEC_COMPARE_EQUAL: op = "v-vcmpeq"; break;
|
||
|
case S390_VEC_OR: op = "v-vor"; break;
|
||
|
+ case S390_VEC_ORC: op = "v-vorc"; break;
|
||
|
case S390_VEC_XOR: op = "v-vxor"; break;
|
||
|
case S390_VEC_AND: op = "v-vand"; break;
|
||
|
case S390_VEC_MERGEL: op = "v-vmergel"; break;
|
||
|
@@ -11609,6 +11619,8 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn)
|
||
|
return s390_emit_VCEQ(buf, v1, v2, v3, s390_getM_from_size(size));
|
||
|
case S390_VEC_OR:
|
||
|
return s390_emit_VO(buf, v1, v2, v3);
|
||
|
+ case S390_VEC_ORC:
|
||
|
+ return s390_emit_VOC(buf, v1, v2, v3);
|
||
|
case S390_VEC_XOR:
|
||
|
return s390_emit_VX(buf, v1, v2, v3);
|
||
|
case S390_VEC_AND:
|
||
|
diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
|
||
|
index 063fd3800..dc116106e 100644
|
||
|
--- a/VEX/priv/host_s390_defs.h
|
||
|
+++ b/VEX/priv/host_s390_defs.h
|
||
|
@@ -366,6 +366,7 @@ typedef enum {
|
||
|
S390_VEC_PACK_SATURU,
|
||
|
S390_VEC_COMPARE_EQUAL,
|
||
|
S390_VEC_OR,
|
||
|
+ S390_VEC_ORC,
|
||
|
S390_VEC_XOR,
|
||
|
S390_VEC_AND,
|
||
|
S390_VEC_MERGEL,
|
||
|
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
|
||
|
index 968122596..53d76fe8a 100644
|
||
|
--- a/VEX/priv/host_s390_isel.c
|
||
|
+++ b/VEX/priv/host_s390_isel.c
|
||
|
@@ -4102,6 +4102,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
|
||
|
case Iop_OrV128:
|
||
|
size = 16;
|
||
|
vec_binop = S390_VEC_OR;
|
||
|
+ if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) {
|
||
|
+ IRExpr* orig_arg1 = arg1;
|
||
|
+ arg1 = arg2;
|
||
|
+ arg2 = orig_arg1->Iex.Unop.arg;
|
||
|
+ vec_binop = S390_VEC_ORC;
|
||
|
+ } else if (arg2->tag == Iex_Unop && arg2->Iex.Unop.op == Iop_NotV128) {
|
||
|
+ arg2 = arg2->Iex.Unop.arg;
|
||
|
+ vec_binop = S390_VEC_ORC;
|
||
|
+ }
|
||
|
goto Iop_VV_wrk;
|
||
|
|
||
|
case Iop_XorV128:
|
||
|
|
||
|
commit 0bd4263326b2d48f782339a9bbe1a069c7de45c7
|
||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||
|
Date: Tue Mar 30 17:45:20 2021 +0200
|
||
|
|
||
|
s390x: Fix/optimize Iop_64HLtoV128
|
||
|
|
||
|
In s390_vr_fill() in guest_s390_toIR.c, filling a vector with two copies
|
||
|
of a 64-bit value is realized with Iop_64HLtoV128, since there is no such
|
||
|
operator as Iop_Dup64x2. But the two args to Iop_64HLtoV128 use the same
|
||
|
expression, referenced twice. Although this hasn't been seen to cause
|
||
|
real trouble yet, it's problematic and potentially inefficient, so change
|
||
|
it: Assign to a temp and pass that twice instead.
|
||
|
|
||
|
In the instruction selector, if Iop_64HLtoV128 is found to be used for a
|
||
|
duplication as above, select "v-vdup" instead of "v-vinitfromgprs". This
|
||
|
mimicks the behavior we'd get if there actually was an operator
|
||
|
Iop_Dup64x2.
|
||
|
|
||
|
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
|
||
|
index dfea54259..a73dcfb14 100644
|
||
|
--- a/VEX/priv/guest_s390_toIR.c
|
||
|
+++ b/VEX/priv/guest_s390_toIR.c
|
||
|
@@ -2299,9 +2299,12 @@ s390_vr_fill(UChar v1, IRExpr *o2)
|
||
|
case Ity_I32:
|
||
|
put_vr_qw(v1, unop(Iop_Dup32x4, o2));
|
||
|
break;
|
||
|
- case Ity_I64:
|
||
|
- put_vr_qw(v1, binop(Iop_64HLtoV128, o2, o2));
|
||
|
+ case Ity_I64: {
|
||
|
+ IRTemp val = newTemp(Ity_I64);
|
||
|
+ assign(val, o2);
|
||
|
+ put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(val), mkexpr(val)));
|
||
|
break;
|
||
|
+ }
|
||
|
default:
|
||
|
ppIRType(o2type);
|
||
|
vpanic("s390_vr_fill: invalid IRType");
|
||
|
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
|
||
|
index 53d76fe8a..ee20c6711 100644
|
||
|
--- a/VEX/priv/host_s390_isel.c
|
||
|
+++ b/VEX/priv/host_s390_isel.c
|
||
|
@@ -4662,12 +4662,16 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
|
||
|
}
|
||
|
|
||
|
case Iop_64HLtoV128:
|
||
|
- reg1 = s390_isel_int_expr(env, arg1);
|
||
|
- reg2 = s390_isel_int_expr(env, arg2);
|
||
|
-
|
||
|
- addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS,
|
||
|
- dst, reg1, reg2));
|
||
|
-
|
||
|
+ if (arg1->tag == Iex_RdTmp && arg2->tag == Iex_RdTmp &&
|
||
|
+ arg1->Iex.RdTmp.tmp == arg2->Iex.RdTmp.tmp) {
|
||
|
+ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg1);
|
||
|
+ addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, dst, src));
|
||
|
+ } else {
|
||
|
+ reg1 = s390_isel_int_expr(env, arg1);
|
||
|
+ reg2 = s390_isel_int_expr(env, arg2);
|
||
|
+ addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS,
|
||
|
+ dst, reg1, reg2));
|
||
|
+ }
|
||
|
return dst;
|
||
|
|
||
|
default:
|
||
|
|
||
|
commit cae5062b05b95e0303b1122a0ea9aadc197e4f0a
|
||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||
|
Date: Fri May 7 18:13:03 2021 +0200
|
||
|
|
||
|
s390x: Add missing stdout.exp for vector string memcheck test
|
||
|
|
||
|
The file vistr.stdout.exp was missing from commit 32312d588. Add it.
|
||
|
|
||
|
diff --git a/memcheck/tests/s390x/vistr.stdout.exp b/memcheck/tests/s390x/vistr.stdout.exp
|
||
|
new file mode 100644
|
||
|
index 000000000..e69de29bb
|