3.14.0-4 gcc ppc64le inlined memcmp vs memcheck (#1652926)
- Add valgrind-3.14.0-get_otrack_shadow_offset_wrk-ppc.patch, valgrind-3.14.0-new-strlen-IROps.patch, valgrind-3.14.0-ppc-instr-new-IROps.patch, valgrind-3.14.0-memcheck-new-IROps.patch, valgrind-3.14.0-ppc-frontend-new-IROps.patch, valgrind-3.14.0-transform-popcount64-ctznat64.patch and valgrind-3.14.0-enable-ppc-Iop_Sar_Shr8.patch (#1652926)
This commit is contained in:
parent
06ef44fd1a
commit
b3eda9b80b
18
valgrind-3.14.0-enable-ppc-Iop_Sar_Shr8.patch
Normal file
18
valgrind-3.14.0-enable-ppc-Iop_Sar_Shr8.patch
Normal file
@ -0,0 +1,18 @@
|
||||
commit 27fe22378da38424102c5292b782cacdd9d7b9e4
|
||||
Author: Julian Seward <jseward@acm.org>
|
||||
Date: Tue Nov 20 12:09:03 2018 +0100
|
||||
|
||||
Add support for Iop_{Sar,Shr}8 on ppc. --expensive-definedness-checks=yes needs them.
|
||||
|
||||
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
|
||||
index 5242176..750cf8d 100644
|
||||
--- a/VEX/priv/host_ppc_isel.c
|
||||
+++ b/VEX/priv/host_ppc_isel.c
|
||||
@@ -1528,7 +1528,6 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
|
||||
True/*32bit shift*/,
|
||||
tmp, tmp, amt));
|
||||
r_srcL = tmp;
|
||||
- vassert(0); /* AWAITING TEST CASE */
|
||||
}
|
||||
}
|
||||
/* Only 64 expressions need 64bit shifts,
|
81
valgrind-3.14.0-get_otrack_shadow_offset_wrk-ppc.patch
Normal file
81
valgrind-3.14.0-get_otrack_shadow_offset_wrk-ppc.patch
Normal file
@ -0,0 +1,81 @@
|
||||
commit 7f1dd9d5aec1f1fd4eb0ae3a311358a914f1d73f
|
||||
Author: Julian Seward <jseward@acm.org>
|
||||
Date: Tue Nov 20 10:18:29 2018 +0100
|
||||
|
||||
get_otrack_shadow_offset_wrk for ppc32 and ppc64: add missing cases for XER_OV32, XER_CA32 and C_FPCC.
|
||||
|
||||
The missing cases were discovered whilst testing fixes for bug 386945, but are
|
||||
otherwise unrelated to that bug.
|
||||
|
||||
diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c
|
||||
index 5ed101f..4ce746e 100644
|
||||
--- a/memcheck/mc_machine.c
|
||||
+++ b/memcheck/mc_machine.c
|
||||
@@ -120,11 +120,11 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
|
||||
Int o = offset;
|
||||
tl_assert(sz > 0);
|
||||
|
||||
-#if defined(VGA_ppc64be)
|
||||
+# if defined(VGA_ppc64be)
|
||||
tl_assert(host_is_big_endian());
|
||||
-#elif defined(VGA_ppc64le)
|
||||
+# elif defined(VGA_ppc64le)
|
||||
tl_assert(host_is_little_endian());
|
||||
-#endif
|
||||
+# endif
|
||||
|
||||
if (sz == 8 || sz == 4) {
|
||||
/* The point of this is to achieve
|
||||
@@ -132,11 +132,11 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
|
||||
return GOF(GPRn);
|
||||
by testing ox instead of o, and setting ox back 4 bytes when sz == 4.
|
||||
*/
|
||||
-#if defined(VGA_ppc64le)
|
||||
+# if defined(VGA_ppc64le)
|
||||
Int ox = o;
|
||||
-#else
|
||||
+# else
|
||||
Int ox = sz == 8 ? o : (o - 4);
|
||||
-#endif
|
||||
+# endif
|
||||
if (ox == GOF(GPR0)) return ox;
|
||||
if (ox == GOF(GPR1)) return ox;
|
||||
if (ox == GOF(GPR2)) return ox;
|
||||
@@ -240,11 +240,13 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
|
||||
if (o == GOF(VSR31) && sz == 8) return o;
|
||||
|
||||
/* For the various byte sized XER/CR pieces, use offset 8
|
||||
- in VSR0 .. VSR19. */
|
||||
+ in VSR0 .. VSR21. */
|
||||
tl_assert(SZB(VSR0) == 16);
|
||||
if (o == GOF(XER_SO) && sz == 1) return 8 +GOF(VSR0);
|
||||
if (o == GOF(XER_OV) && sz == 1) return 8 +GOF(VSR1);
|
||||
+ if (o == GOF(XER_OV32) && sz == 1) return 8 +GOF(VSR20);
|
||||
if (o == GOF(XER_CA) && sz == 1) return 8 +GOF(VSR2);
|
||||
+ if (o == GOF(XER_CA32) && sz == 1) return 8 +GOF(VSR21);
|
||||
if (o == GOF(XER_BC) && sz == 1) return 8 +GOF(VSR3);
|
||||
|
||||
if (o == GOF(CR0_321) && sz == 1) return 8 +GOF(VSR4);
|
||||
@@ -388,6 +390,7 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
|
||||
if (o == GOF(IP_AT_SYSCALL) && sz == 4) return -1; /* slot unused */
|
||||
if (o == GOF(FPROUND) && sz == 1) return -1;
|
||||
if (o == GOF(DFPROUND) && sz == 1) return -1;
|
||||
+ if (o == GOF(C_FPCC) && sz == 1) return -1;
|
||||
if (o == GOF(VRSAVE) && sz == 4) return -1;
|
||||
if (o == GOF(EMNOTE) && sz == 4) return -1;
|
||||
if (o == GOF(CMSTART) && sz == 4) return -1;
|
||||
@@ -440,11 +443,13 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
|
||||
if (o == GOF(VSR31) && sz == 8) return o;
|
||||
|
||||
/* For the various byte sized XER/CR pieces, use offset 8
|
||||
- in VSR0 .. VSR19. */
|
||||
+ in VSR0 .. VSR21. */
|
||||
tl_assert(SZB(VSR0) == 16);
|
||||
if (o == GOF(XER_SO) && sz == 1) return 8 +GOF(VSR0);
|
||||
if (o == GOF(XER_OV) && sz == 1) return 8 +GOF(VSR1);
|
||||
+ if (o == GOF(XER_OV32) && sz == 1) return 8 +GOF(VSR20);
|
||||
if (o == GOF(XER_CA) && sz == 1) return 8 +GOF(VSR2);
|
||||
+ if (o == GOF(XER_CA32) && sz == 1) return 8 +GOF(VSR21);
|
||||
if (o == GOF(XER_BC) && sz == 1) return 8 +GOF(VSR3);
|
||||
|
||||
if (o == GOF(CR0_321) && sz == 1) return 8 +GOF(VSR4);
|
453
valgrind-3.14.0-memcheck-new-IROps.patch
Normal file
453
valgrind-3.14.0-memcheck-new-IROps.patch
Normal file
@ -0,0 +1,453 @@
|
||||
commit e221eca26be6b2396e3fcbf4117e630fc22e79f6
|
||||
Author: Julian Seward <jseward@acm.org>
|
||||
Date: Tue Nov 20 11:28:42 2018 +0100
|
||||
|
||||
Add Memcheck support for IROps added in 42719898.
|
||||
|
||||
memcheck/mc_translate.c:
|
||||
|
||||
Add mkRight{32,64} as right-travelling analogues to mkLeft{32,64}.
|
||||
|
||||
doCmpORD: for the cases of a signed comparison against zero, compute
|
||||
definedness of the 3 result bits (lt,gt,eq) separately, and, for the lt and eq
|
||||
bits, do it exactly accurately.
|
||||
|
||||
expensiveCountTrailingZeroes: no functional change. Re-analyse/verify and add
|
||||
comments.
|
||||
|
||||
expensiveCountLeadingZeroes: add. Very similar to
|
||||
expensiveCountTrailingZeroes.
|
||||
|
||||
Add some comments to mark unary ops which are self-shadowing.
|
||||
|
||||
Route Iop_Ctz{,Nat}{32,64} through expensiveCountTrailingZeroes.
|
||||
Route Iop_Clz{,Nat}{32,64} through expensiveCountLeadingZeroes.
|
||||
|
||||
Add instrumentation for Iop_PopCount{32,64} and Iop_Reverse8sIn32_x1.
|
||||
|
||||
memcheck/tests/vbit-test/irops.c
|
||||
|
||||
Add dummy new entries for all new IROps, just enough to make it compile and
|
||||
run.
|
||||
|
||||
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
|
||||
index 68a2ab3..c24db91 100644
|
||||
--- a/memcheck/mc_translate.c
|
||||
+++ b/memcheck/mc_translate.c
|
||||
@@ -737,6 +737,34 @@ static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
|
||||
return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
|
||||
}
|
||||
|
||||
+/* --------- The Right-family of operations. --------- */
|
||||
+
|
||||
+/* Unfortunately these are a lot more expensive then their Left
|
||||
+ counterparts. Fortunately they are only very rarely used -- only for
|
||||
+ count-leading-zeroes instrumentation. */
|
||||
+
|
||||
+static IRAtom* mkRight32 ( MCEnv* mce, IRAtom* a1 )
|
||||
+{
|
||||
+ for (Int i = 1; i <= 16; i *= 2) {
|
||||
+ // a1 |= (a1 >>u i)
|
||||
+ IRAtom* tmp
|
||||
+ = assignNew('V', mce, Ity_I32, binop(Iop_Shr32, a1, mkU8(i)));
|
||||
+ a1 = assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, tmp));
|
||||
+ }
|
||||
+ return a1;
|
||||
+}
|
||||
+
|
||||
+static IRAtom* mkRight64 ( MCEnv* mce, IRAtom* a1 )
|
||||
+{
|
||||
+ for (Int i = 1; i <= 32; i *= 2) {
|
||||
+ // a1 |= (a1 >>u i)
|
||||
+ IRAtom* tmp
|
||||
+ = assignNew('V', mce, Ity_I64, binop(Iop_Shr64, a1, mkU8(i)));
|
||||
+ a1 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, tmp));
|
||||
+ }
|
||||
+ return a1;
|
||||
+}
|
||||
+
|
||||
/* --------- 'Improvement' functions for AND/OR. --------- */
|
||||
|
||||
/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
|
||||
@@ -1280,20 +1308,18 @@ static IRAtom* doCmpORD ( MCEnv* mce,
|
||||
IRAtom* xxhash, IRAtom* yyhash,
|
||||
IRAtom* xx, IRAtom* yy )
|
||||
{
|
||||
- Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
|
||||
- Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
|
||||
- IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
|
||||
- IROp opAND = m64 ? Iop_And64 : Iop_And32;
|
||||
- IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
|
||||
- IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
|
||||
- IRType ty = m64 ? Ity_I64 : Ity_I32;
|
||||
- Int width = m64 ? 64 : 32;
|
||||
+ Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
|
||||
+ Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
|
||||
+ IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
|
||||
+ IROp opAND = m64 ? Iop_And64 : Iop_And32;
|
||||
+ IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
|
||||
+ IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
|
||||
+ IROp op1UtoWS = m64 ? Iop_1Uto64 : Iop_1Uto32;
|
||||
+ IRType ty = m64 ? Ity_I64 : Ity_I32;
|
||||
+ Int width = m64 ? 64 : 32;
|
||||
|
||||
Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
|
||||
|
||||
- IRAtom* threeLeft1 = NULL;
|
||||
- IRAtom* sevenLeft1 = NULL;
|
||||
-
|
||||
tl_assert(isShadowAtom(mce,xxhash));
|
||||
tl_assert(isShadowAtom(mce,yyhash));
|
||||
tl_assert(isOriginalAtom(mce,xx));
|
||||
@@ -1312,30 +1338,55 @@ static IRAtom* doCmpORD ( MCEnv* mce,
|
||||
/* fancy interpretation */
|
||||
/* if yy is zero, then it must be fully defined (zero#). */
|
||||
tl_assert(isZero(yyhash));
|
||||
- threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
|
||||
+ // This is still inaccurate, but I don't think it matters, since
|
||||
+ // nobody writes code of the form
|
||||
+ // "is <partially-undefined-value> signedly greater than zero?".
|
||||
+ // We therefore simply declare "x >s 0" to be undefined if any bit in
|
||||
+ // x is undefined. That's clearly suboptimal in some cases. Eg, if
|
||||
+ // the highest order bit is a defined 1 then x is negative so it
|
||||
+ // doesn't matter whether the remaining bits are defined or not.
|
||||
+ IRAtom* t_0_gt_0_0
|
||||
+ = assignNew(
|
||||
+ 'V', mce,ty,
|
||||
+ binop(
|
||||
+ opAND,
|
||||
+ mkPCastTo(mce,ty, xxhash),
|
||||
+ m64 ? mkU64(1<<2) : mkU32(1<<2)
|
||||
+ ));
|
||||
+ // For "x <s 0", we can just copy the definedness of the top bit of x
|
||||
+ // and we have a precise result.
|
||||
+ IRAtom* t_lt_0_0_0
|
||||
+ = assignNew(
|
||||
+ 'V', mce,ty,
|
||||
+ binop(
|
||||
+ opSHL,
|
||||
+ assignNew(
|
||||
+ 'V', mce,ty,
|
||||
+ binop(opSHR, xxhash, mkU8(width-1))),
|
||||
+ mkU8(3)
|
||||
+ ));
|
||||
+ // For "x == 0" we can hand the problem off to expensiveCmpEQorNE.
|
||||
+ IRAtom* t_0_0_eq_0
|
||||
+ = assignNew(
|
||||
+ 'V', mce,ty,
|
||||
+ binop(
|
||||
+ opSHL,
|
||||
+ assignNew('V', mce,ty,
|
||||
+ unop(
|
||||
+ op1UtoWS,
|
||||
+ expensiveCmpEQorNE(mce, ty, xxhash, yyhash, xx, yy))
|
||||
+ ),
|
||||
+ mkU8(1)
|
||||
+ ));
|
||||
return
|
||||
binop(
|
||||
opOR,
|
||||
- assignNew(
|
||||
- 'V', mce,ty,
|
||||
- binop(
|
||||
- opAND,
|
||||
- mkPCastTo(mce,ty, xxhash),
|
||||
- threeLeft1
|
||||
- )),
|
||||
- assignNew(
|
||||
- 'V', mce,ty,
|
||||
- binop(
|
||||
- opSHL,
|
||||
- assignNew(
|
||||
- 'V', mce,ty,
|
||||
- binop(opSHR, xxhash, mkU8(width-1))),
|
||||
- mkU8(3)
|
||||
- ))
|
||||
- );
|
||||
+ assignNew('V', mce,ty, binop(opOR, t_lt_0_0_0, t_0_gt_0_0)),
|
||||
+ t_0_0_eq_0
|
||||
+ );
|
||||
} else {
|
||||
/* standard interpretation */
|
||||
- sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
|
||||
+ IRAtom* sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
|
||||
return
|
||||
binop(
|
||||
opAND,
|
||||
@@ -2211,14 +2262,14 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
|
||||
tl_assert(sameKindedAtoms(atom,vatom));
|
||||
|
||||
switch (czop) {
|
||||
- case Iop_Ctz32:
|
||||
+ case Iop_Ctz32: case Iop_CtzNat32:
|
||||
ty = Ity_I32;
|
||||
xorOp = Iop_Xor32;
|
||||
subOp = Iop_Sub32;
|
||||
andOp = Iop_And32;
|
||||
one = mkU32(1);
|
||||
break;
|
||||
- case Iop_Ctz64:
|
||||
+ case Iop_Ctz64: case Iop_CtzNat64:
|
||||
ty = Ity_I64;
|
||||
xorOp = Iop_Xor64;
|
||||
subOp = Iop_Sub64;
|
||||
@@ -2232,8 +2283,30 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
|
||||
|
||||
// improver = atom ^ (atom - 1)
|
||||
//
|
||||
- // That is, improver has its low ctz(atom) bits equal to one;
|
||||
- // higher bits (if any) equal to zero.
|
||||
+ // That is, improver has its low ctz(atom)+1 bits equal to one;
|
||||
+ // higher bits (if any) equal to zero. So it's exactly the right
|
||||
+ // mask to use to remove the irrelevant undefined input bits.
|
||||
+ /* Here are some examples:
|
||||
+ atom = U...U 1 0...0
|
||||
+ atom-1 = U...U 0 1...1
|
||||
+ ^ed = 0...0 1 11111, which correctly describes which bits of |atom|
|
||||
+ actually influence the result
|
||||
+ A boundary case
|
||||
+ atom = 0...0
|
||||
+ atom-1 = 1...1
|
||||
+ ^ed = 11111, also a correct mask for the input: all input bits
|
||||
+ are relevant
|
||||
+ Another boundary case
|
||||
+ atom = 1..1 1
|
||||
+ atom-1 = 1..1 0
|
||||
+ ^ed = 0..0 1, also a correct mask: only the rightmost input bit
|
||||
+ is relevant
|
||||
+ Now with misc U bits interspersed:
|
||||
+ atom = U...U 1 0 U...U 0 1 0...0
|
||||
+ atom-1 = U...U 1 0 U...U 0 0 1...1
|
||||
+ ^ed = 0...0 0 0 0...0 0 1 1...1, also correct
|
||||
+ (Per re-check/analysis of 14 Nov 2018)
|
||||
+ */
|
||||
improver = assignNew('V', mce,ty,
|
||||
binop(xorOp,
|
||||
atom,
|
||||
@@ -2242,8 +2315,96 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
|
||||
|
||||
// improved = vatom & improver
|
||||
//
|
||||
- // That is, treat any V bits above the first ctz(atom) bits as
|
||||
- // "defined".
|
||||
+ // That is, treat any V bits to the left of the rightmost ctz(atom)+1
|
||||
+ // bits as "defined".
|
||||
+ improved = assignNew('V', mce, ty,
|
||||
+ binop(andOp, vatom, improver));
|
||||
+
|
||||
+ // Return pessimizing cast of improved.
|
||||
+ return mkPCastTo(mce, ty, improved);
|
||||
+}
|
||||
+
|
||||
+static
|
||||
+IRAtom* expensiveCountLeadingZeroes ( MCEnv* mce, IROp czop,
|
||||
+ IRAtom* atom, IRAtom* vatom )
|
||||
+{
|
||||
+ IRType ty;
|
||||
+ IROp shrOp, notOp, andOp;
|
||||
+ IRAtom* (*mkRight)(MCEnv*, IRAtom*);
|
||||
+ IRAtom *improver, *improved;
|
||||
+ tl_assert(isShadowAtom(mce,vatom));
|
||||
+ tl_assert(isOriginalAtom(mce,atom));
|
||||
+ tl_assert(sameKindedAtoms(atom,vatom));
|
||||
+
|
||||
+ switch (czop) {
|
||||
+ case Iop_Clz32: case Iop_ClzNat32:
|
||||
+ ty = Ity_I32;
|
||||
+ shrOp = Iop_Shr32;
|
||||
+ notOp = Iop_Not32;
|
||||
+ andOp = Iop_And32;
|
||||
+ mkRight = mkRight32;
|
||||
+ break;
|
||||
+ case Iop_Clz64: case Iop_ClzNat64:
|
||||
+ ty = Ity_I64;
|
||||
+ shrOp = Iop_Shr64;
|
||||
+ notOp = Iop_Not64;
|
||||
+ andOp = Iop_And64;
|
||||
+ mkRight = mkRight64;
|
||||
+ break;
|
||||
+ default:
|
||||
+ ppIROp(czop);
|
||||
+ VG_(tool_panic)("memcheck:expensiveCountLeadingZeroes");
|
||||
+ }
|
||||
+
|
||||
+ // This is in principle very similar to how expensiveCountTrailingZeroes
|
||||
+ // works. That function computed an "improver", which it used to mask
|
||||
+ // off all but the rightmost 1-bit and the zeroes to the right of it,
|
||||
+ // hence removing irrelevant bits from the input. Here, we play the
|
||||
+ // exact same game but with the left-vs-right roles interchanged.
|
||||
+ // Unfortunately calculation of the improver in this case is
|
||||
+ // significantly more expensive.
|
||||
+ //
|
||||
+ // improver = ~(RIGHT(atom) >>u 1)
|
||||
+ //
|
||||
+ // That is, improver has its upper clz(atom)+1 bits equal to one;
|
||||
+ // lower bits (if any) equal to zero. So it's exactly the right
|
||||
+ // mask to use to remove the irrelevant undefined input bits.
|
||||
+ /* Here are some examples:
|
||||
+ atom = 0...0 1 U...U
|
||||
+ R(atom) = 0...0 1 1...1
|
||||
+ R(atom) >>u 1 = 0...0 0 1...1
|
||||
+ ~(R(atom) >>u 1) = 1...1 1 0...0
|
||||
+ which correctly describes which bits of |atom|
|
||||
+ actually influence the result
|
||||
+ A boundary case
|
||||
+ atom = 0...0
|
||||
+ R(atom) = 0...0
|
||||
+ R(atom) >>u 1 = 0...0
|
||||
+ ~(R(atom) >>u 1) = 1...1
|
||||
+ also a correct mask for the input: all input bits
|
||||
+ are relevant
|
||||
+ Another boundary case
|
||||
+ atom = 1 1..1
|
||||
+ R(atom) = 1 1..1
|
||||
+ R(atom) >>u 1 = 0 1..1
|
||||
+ ~(R(atom) >>u 1) = 1 0..0
|
||||
+ also a correct mask: only the leftmost input bit
|
||||
+ is relevant
|
||||
+ Now with misc U bits interspersed:
|
||||
+ atom = 0...0 1 U...U 0 1 U...U
|
||||
+ R(atom) = 0...0 1 1...1 1 1 1...1
|
||||
+ R(atom) >>u 1 = 0...0 0 1...1 1 1 1...1
|
||||
+ ~(R(atom) >>u 1) = 1...1 1 0...0 0 0 0...0, also correct
|
||||
+ (Per initial implementation of 15 Nov 2018)
|
||||
+ */
|
||||
+ improver = mkRight(mce, atom);
|
||||
+ improver = assignNew('V', mce, ty, binop(shrOp, improver, mkU8(1)));
|
||||
+ improver = assignNew('V', mce, ty, unop(notOp, improver));
|
||||
+
|
||||
+ // improved = vatom & improver
|
||||
+ //
|
||||
+ // That is, treat any V bits to the right of the leftmost clz(atom)+1
|
||||
+ // bits as "defined".
|
||||
improved = assignNew('V', mce, ty,
|
||||
binop(andOp, vatom, improver));
|
||||
|
||||
@@ -4705,6 +4866,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||
case Iop_RecipEst32F0x4:
|
||||
return unary32F0x4(mce, vatom);
|
||||
|
||||
+ // These are self-shadowing.
|
||||
case Iop_32UtoV128:
|
||||
case Iop_64UtoV128:
|
||||
case Iop_Dup8x16:
|
||||
@@ -4745,6 +4907,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||
case Iop_MulI128by10Carry:
|
||||
case Iop_F16toF64x2:
|
||||
case Iop_F64toF16x2:
|
||||
+ // FIXME JRS 2018-Nov-15. This is surely not correct!
|
||||
return vatom;
|
||||
|
||||
case Iop_I32StoF128: /* signed I32 -> F128 */
|
||||
@@ -4770,7 +4933,6 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||
case Iop_RoundF64toF64_NegINF:
|
||||
case Iop_RoundF64toF64_PosINF:
|
||||
case Iop_RoundF64toF64_ZERO:
|
||||
- case Iop_Clz64:
|
||||
case Iop_D32toD64:
|
||||
case Iop_I32StoD64:
|
||||
case Iop_I32UtoD64:
|
||||
@@ -4785,17 +4947,32 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||
case Iop_D64toD128:
|
||||
return mkPCastTo(mce, Ity_I128, vatom);
|
||||
|
||||
- case Iop_Clz32:
|
||||
case Iop_TruncF64asF32:
|
||||
case Iop_NegF32:
|
||||
case Iop_AbsF32:
|
||||
case Iop_F16toF32:
|
||||
return mkPCastTo(mce, Ity_I32, vatom);
|
||||
|
||||
- case Iop_Ctz32:
|
||||
- case Iop_Ctz64:
|
||||
+ case Iop_Ctz32: case Iop_CtzNat32:
|
||||
+ case Iop_Ctz64: case Iop_CtzNat64:
|
||||
return expensiveCountTrailingZeroes(mce, op, atom, vatom);
|
||||
|
||||
+ case Iop_Clz32: case Iop_ClzNat32:
|
||||
+ case Iop_Clz64: case Iop_ClzNat64:
|
||||
+ return expensiveCountLeadingZeroes(mce, op, atom, vatom);
|
||||
+
|
||||
+ // PopCount32: this is slightly pessimistic. It is true that the
|
||||
+ // result depends on all input bits, so that aspect of the PCast is
|
||||
+ // correct. However, regardless of the input, only the lowest 5 bits
|
||||
+ // out of the output can ever be undefined. So we could actually
|
||||
+ // "improve" the results here by marking the top 27 bits of output as
|
||||
+ // defined. A similar comment applies for PopCount64.
|
||||
+ case Iop_PopCount32:
|
||||
+ return mkPCastTo(mce, Ity_I32, vatom);
|
||||
+ case Iop_PopCount64:
|
||||
+ return mkPCastTo(mce, Ity_I64, vatom);
|
||||
+
|
||||
+ // These are self-shadowing.
|
||||
case Iop_1Uto64:
|
||||
case Iop_1Sto64:
|
||||
case Iop_8Uto64:
|
||||
@@ -4821,6 +4998,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||
case Iop_V256to64_2: case Iop_V256to64_3:
|
||||
return assignNew('V', mce, Ity_I64, unop(op, vatom));
|
||||
|
||||
+ // These are self-shadowing.
|
||||
case Iop_64to32:
|
||||
case Iop_64HIto32:
|
||||
case Iop_1Uto32:
|
||||
@@ -4830,8 +5008,10 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||
case Iop_16Sto32:
|
||||
case Iop_8Sto32:
|
||||
case Iop_V128to32:
|
||||
+ case Iop_Reverse8sIn32_x1:
|
||||
return assignNew('V', mce, Ity_I32, unop(op, vatom));
|
||||
|
||||
+ // These are self-shadowing.
|
||||
case Iop_8Sto16:
|
||||
case Iop_8Uto16:
|
||||
case Iop_32to16:
|
||||
@@ -4840,6 +5020,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||
case Iop_GetMSBs8x16:
|
||||
return assignNew('V', mce, Ity_I16, unop(op, vatom));
|
||||
|
||||
+ // These are self-shadowing.
|
||||
case Iop_1Uto8:
|
||||
case Iop_1Sto8:
|
||||
case Iop_16to8:
|
||||
@@ -4868,6 +5049,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||
case Iop_Not16:
|
||||
case Iop_Not8:
|
||||
case Iop_Not1:
|
||||
+ // FIXME JRS 2018-Nov-15. This is surely not correct!
|
||||
return vatom;
|
||||
|
||||
case Iop_CmpNEZ8x8:
|
||||
@@ -4929,6 +5111,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||
case Iop_Ctz64x2:
|
||||
return mkPCast64x2(mce, vatom);
|
||||
|
||||
+ // This is self-shadowing.
|
||||
case Iop_PwBitMtxXpose64x2:
|
||||
return assignNew('V', mce, Ity_V128, unop(op, vatom));
|
||||
|
||||
diff --git a/memcheck/tests/vbit-test/irops.c b/memcheck/tests/vbit-test/irops.c
|
||||
index bfd82fc..e8bf67d 100644
|
||||
--- a/memcheck/tests/vbit-test/irops.c
|
||||
+++ b/memcheck/tests/vbit-test/irops.c
|
||||
@@ -111,6 +111,12 @@ static irop_t irops[] = {
|
||||
{ DEFOP(Iop_Clz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
|
||||
{ DEFOP(Iop_Ctz64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
|
||||
{ DEFOP(Iop_Ctz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
|
||||
+ { DEFOP(Iop_ClzNat64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, // ppc32 asserts
|
||||
+ { DEFOP(Iop_ClzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
|
||||
+ { DEFOP(Iop_CtzNat64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
|
||||
+ { DEFOP(Iop_CtzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
|
||||
+ { DEFOP(Iop_PopCount64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
|
||||
+ { DEFOP(Iop_PopCount32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
|
||||
{ DEFOP(Iop_CmpLT32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
|
||||
{ DEFOP(Iop_CmpLT64S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc, mips assert
|
||||
{ DEFOP(Iop_CmpLE32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
|
||||
@@ -336,6 +342,7 @@ static irop_t irops[] = {
|
||||
{ DEFOP(Iop_Sad8Ux4, UNDEF_UNKNOWN), },
|
||||
{ DEFOP(Iop_CmpNEZ16x2, UNDEF_UNKNOWN), },
|
||||
{ DEFOP(Iop_CmpNEZ8x4, UNDEF_UNKNOWN), },
|
||||
+ { DEFOP(Iop_Reverse8sIn32_x1, UNDEF_UNKNOWN) },
|
||||
/* ------------------ 64-bit SIMD FP ------------------------ */
|
||||
{ DEFOP(Iop_I32UtoFx2, UNDEF_UNKNOWN), },
|
||||
{ DEFOP(Iop_I32StoFx2, UNDEF_UNKNOWN), },
|
124
valgrind-3.14.0-new-strlen-IROps.patch
Normal file
124
valgrind-3.14.0-new-strlen-IROps.patch
Normal file
@ -0,0 +1,124 @@
|
||||
commit 4271989815b5fc933c1e29bc75507c2726dc3738
|
||||
Author: Julian Seward <jseward@acm.org>
|
||||
Date: Tue Nov 20 10:52:33 2018 +0100
|
||||
|
||||
Add some new IROps to support improved Memcheck analysis of strlen etc.
|
||||
|
||||
This is part of the fix for bug 386945. It adds the following IROps, plus
|
||||
their supporting type- and printing- fragments:
|
||||
|
||||
Iop_Reverse8sIn32_x1: 32-bit byteswap. A fancy name, but it is consistent
|
||||
with naming for the other swapping IROps that already exist.
|
||||
|
||||
Iop_PopCount64, Iop_PopCount32: population count
|
||||
|
||||
Iop_ClzNat64, Iop_ClzNat32, Iop_CtzNat64, Iop_CtzNat32: counting leading and
|
||||
trailing zeroes, with "natural" (Nat) semantics for a zero input, meaning, in
|
||||
the case of zero input, return the number of bits in the word. These
|
||||
functionally overlap with the existing Iop_Clz64, Iop_Clz32, Iop_Ctz64,
|
||||
Iop_Ctz32. The existing operations are undefined in case of a zero input.
|
||||
Adding these new variants avoids the complexity of having to change the
|
||||
declared semantics of the existing operations. Instead they are deprecated
|
||||
but still available for use.
|
||||
|
||||
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
|
||||
index 823b6be..3221033 100644
|
||||
--- a/VEX/priv/ir_defs.c
|
||||
+++ b/VEX/priv/ir_defs.c
|
||||
@@ -194,6 +194,14 @@ void ppIROp ( IROp op )
|
||||
case Iop_Ctz64: vex_printf("Ctz64"); return;
|
||||
case Iop_Ctz32: vex_printf("Ctz32"); return;
|
||||
|
||||
+ case Iop_ClzNat64: vex_printf("ClzNat64"); return;
|
||||
+ case Iop_ClzNat32: vex_printf("ClzNat32"); return;
|
||||
+ case Iop_CtzNat64: vex_printf("CtzNat64"); return;
|
||||
+ case Iop_CtzNat32: vex_printf("CtzNat32"); return;
|
||||
+
|
||||
+ case Iop_PopCount64: vex_printf("PopCount64"); return;
|
||||
+ case Iop_PopCount32: vex_printf("PopCount32"); return;
|
||||
+
|
||||
case Iop_CmpLT32S: vex_printf("CmpLT32S"); return;
|
||||
case Iop_CmpLE32S: vex_printf("CmpLE32S"); return;
|
||||
case Iop_CmpLT32U: vex_printf("CmpLT32U"); return;
|
||||
@@ -395,6 +403,7 @@ void ppIROp ( IROp op )
|
||||
|
||||
case Iop_CmpNEZ16x2: vex_printf("CmpNEZ16x2"); return;
|
||||
case Iop_CmpNEZ8x4: vex_printf("CmpNEZ8x4"); return;
|
||||
+ case Iop_Reverse8sIn32_x1: vex_printf("Reverse8sIn32_x1"); return;
|
||||
|
||||
case Iop_CmpF64: vex_printf("CmpF64"); return;
|
||||
|
||||
@@ -2719,6 +2728,7 @@ void typeOfPrimop ( IROp op,
|
||||
UNARY(Ity_I16, Ity_I16);
|
||||
case Iop_Not32:
|
||||
case Iop_CmpNEZ16x2: case Iop_CmpNEZ8x4:
|
||||
+ case Iop_Reverse8sIn32_x1:
|
||||
UNARY(Ity_I32, Ity_I32);
|
||||
|
||||
case Iop_Not64:
|
||||
@@ -2782,9 +2792,13 @@ void typeOfPrimop ( IROp op,
|
||||
BINARY(Ity_I64,Ity_I64, Ity_I128);
|
||||
|
||||
case Iop_Clz32: case Iop_Ctz32:
|
||||
+ case Iop_ClzNat32: case Iop_CtzNat32:
|
||||
+ case Iop_PopCount32:
|
||||
UNARY(Ity_I32, Ity_I32);
|
||||
|
||||
case Iop_Clz64: case Iop_Ctz64:
|
||||
+ case Iop_ClzNat64: case Iop_CtzNat64:
|
||||
+ case Iop_PopCount64:
|
||||
UNARY(Ity_I64, Ity_I64);
|
||||
|
||||
case Iop_DivU32: case Iop_DivS32: case Iop_DivU32E: case Iop_DivS32E:
|
||||
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
|
||||
index 17bcb55..93fa5ac 100644
|
||||
--- a/VEX/pub/libvex_ir.h
|
||||
+++ b/VEX/pub/libvex_ir.h
|
||||
@@ -452,12 +452,21 @@ typedef
|
||||
Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
|
||||
Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
|
||||
|
||||
- /* Wierdo integer stuff */
|
||||
+ /* Counting bits */
|
||||
+ /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of zero.
|
||||
+ You must ensure they are never given a zero argument. As of
|
||||
+ 2018-Nov-14 they are deprecated. Try to use the Nat variants
|
||||
+ immediately below, if you can.
|
||||
+ */
|
||||
Iop_Clz64, Iop_Clz32, /* count leading zeroes */
|
||||
Iop_Ctz64, Iop_Ctz32, /* count trailing zeros */
|
||||
- /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
|
||||
- zero. You must ensure they are never given a zero argument.
|
||||
- */
|
||||
+ /* Count leading/trailing zeroes, with "natural" semantics for the
|
||||
+ case where the input is zero: then the result is the number of bits
|
||||
+ in the word. */
|
||||
+ Iop_ClzNat64, Iop_ClzNat32,
|
||||
+ Iop_CtzNat64, Iop_CtzNat32,
|
||||
+ /* Population count -- compute the number of 1 bits in the argument. */
|
||||
+ Iop_PopCount64, Iop_PopCount32,
|
||||
|
||||
/* Standard integer comparisons */
|
||||
Iop_CmpLT32S, Iop_CmpLT64S,
|
||||
@@ -831,6 +840,9 @@ typedef
|
||||
/* MISC (vector integer cmp != 0) */
|
||||
Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
|
||||
|
||||
+ /* Byte swap in a 32-bit word */
|
||||
+ Iop_Reverse8sIn32_x1,
|
||||
+
|
||||
/* ------------------ 64-bit SIMD FP ------------------------ */
|
||||
|
||||
/* Convertion to/from int */
|
||||
@@ -1034,8 +1046,9 @@ typedef
|
||||
Iop_Slice64, // (I64, I64, I8) -> I64
|
||||
|
||||
/* REVERSE the order of chunks in vector lanes. Chunks must be
|
||||
- smaller than the vector lanes (obviously) and so may be 8-,
|
||||
- 16- and 32-bit in size. */
|
||||
+ smaller than the vector lanes (obviously) and so may be 8-, 16- and
|
||||
+ 32-bit in size. Note that the degenerate case,
|
||||
+ Iop_Reverse8sIn64_x1, is a simply a vanilla byte-swap. */
|
||||
/* Examples:
|
||||
Reverse8sIn16_x4([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
|
||||
Reverse8sIn32_x2([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
|
381
valgrind-3.14.0-ppc-frontend-new-IROps.patch
Normal file
381
valgrind-3.14.0-ppc-frontend-new-IROps.patch
Normal file
@ -0,0 +1,381 @@
|
||||
commit 81d9832226d6e3d1ee78ee3133189d7b520e7eea
|
||||
Author: Julian Seward <jseward@acm.org>
|
||||
Date: Tue Nov 20 11:36:53 2018 +0100
|
||||
|
||||
ppc front end: use new IROps added in 42719898.
|
||||
|
||||
This pertains to bug 386945.
|
||||
|
||||
VEX/priv/guest_ppc_toIR.c:
|
||||
|
||||
gen_POPCOUNT: use Iop_PopCount{32,64} where possible.
|
||||
|
||||
gen_vpopcntd_mode32: use Iop_PopCount32.
|
||||
|
||||
for cntlz{w,d}, use Iop_CtzNat{32,64}.
|
||||
|
||||
gen_byterev32: use Iop_Reverse8sIn32_x1 instead of lengthy sequence.
|
||||
|
||||
verbose_Clz32: remove (was unused anyway).
|
||||
|
||||
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
|
||||
index cb1cae1..8977d4f 100644
|
||||
--- a/VEX/priv/guest_ppc_toIR.c
|
||||
+++ b/VEX/priv/guest_ppc_toIR.c
|
||||
@@ -1595,7 +1595,8 @@ typedef enum {
|
||||
/* Generate an IR sequence to do a popcount operation on the supplied
|
||||
IRTemp, and return a new IRTemp holding the result. 'ty' may be
|
||||
Ity_I32 or Ity_I64 only. */
|
||||
-static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_type )
|
||||
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src,
|
||||
+ _popcount_data_type data_type )
|
||||
{
|
||||
/* Do count across 2^data_type bits,
|
||||
byte: data_type = 3
|
||||
@@ -1611,6 +1612,22 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
|
||||
|
||||
vassert(ty == Ity_I64 || ty == Ity_I32);
|
||||
|
||||
+ // Use a single IROp in cases where we can.
|
||||
+
|
||||
+ if (ty == Ity_I64 && data_type == DWORD) {
|
||||
+ IRTemp res = newTemp(Ity_I64);
|
||||
+ assign(res, unop(Iop_PopCount64, mkexpr(src)));
|
||||
+ return res;
|
||||
+ }
|
||||
+
|
||||
+ if (ty == Ity_I32 && data_type == WORD) {
|
||||
+ IRTemp res = newTemp(Ity_I32);
|
||||
+ assign(res, unop(Iop_PopCount32, mkexpr(src)));
|
||||
+ return res;
|
||||
+ }
|
||||
+
|
||||
+ // For the rest, we have to do it the slow way.
|
||||
+
|
||||
if (ty == Ity_I32) {
|
||||
|
||||
for (idx = 0; idx < WORD; idx++) {
|
||||
@@ -1638,7 +1655,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
|
||||
return nyu;
|
||||
}
|
||||
|
||||
-// else, ty == Ity_I64
|
||||
+ // else, ty == Ity_I64
|
||||
vassert(mode64);
|
||||
|
||||
for (i = 0; i < DWORD; i++) {
|
||||
@@ -1670,52 +1687,15 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
|
||||
*/
|
||||
static IRTemp gen_vpopcntd_mode32 ( IRTemp src1, IRTemp src2 )
|
||||
{
|
||||
- Int i, shift[6];
|
||||
- IRTemp mask[6];
|
||||
- IRTemp old = IRTemp_INVALID;
|
||||
- IRTemp nyu1 = IRTemp_INVALID;
|
||||
- IRTemp nyu2 = IRTemp_INVALID;
|
||||
IRTemp retval = newTemp(Ity_I64);
|
||||
|
||||
vassert(!mode64);
|
||||
|
||||
- for (i = 0; i < WORD; i++) {
|
||||
- mask[i] = newTemp(Ity_I32);
|
||||
- shift[i] = 1 << i;
|
||||
- }
|
||||
- assign(mask[0], mkU32(0x55555555));
|
||||
- assign(mask[1], mkU32(0x33333333));
|
||||
- assign(mask[2], mkU32(0x0F0F0F0F));
|
||||
- assign(mask[3], mkU32(0x00FF00FF));
|
||||
- assign(mask[4], mkU32(0x0000FFFF));
|
||||
- old = src1;
|
||||
- for (i = 0; i < WORD; i++) {
|
||||
- nyu1 = newTemp(Ity_I32);
|
||||
- assign(nyu1,
|
||||
- binop(Iop_Add32,
|
||||
- binop(Iop_And32,
|
||||
- mkexpr(old),
|
||||
- mkexpr(mask[i])),
|
||||
- binop(Iop_And32,
|
||||
- binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
|
||||
- mkexpr(mask[i]))));
|
||||
- old = nyu1;
|
||||
- }
|
||||
-
|
||||
- old = src2;
|
||||
- for (i = 0; i < WORD; i++) {
|
||||
- nyu2 = newTemp(Ity_I32);
|
||||
- assign(nyu2,
|
||||
- binop(Iop_Add32,
|
||||
- binop(Iop_And32,
|
||||
- mkexpr(old),
|
||||
- mkexpr(mask[i])),
|
||||
- binop(Iop_And32,
|
||||
- binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
|
||||
- mkexpr(mask[i]))));
|
||||
- old = nyu2;
|
||||
- }
|
||||
- assign(retval, unop(Iop_32Uto64, binop(Iop_Add32, mkexpr(nyu1), mkexpr(nyu2))));
|
||||
+ assign(retval,
|
||||
+ unop(Iop_32Uto64,
|
||||
+ binop(Iop_Add32,
|
||||
+ unop(Iop_PopCount32, mkexpr(src1)),
|
||||
+ unop(Iop_PopCount32, mkexpr(src2)))));
|
||||
return retval;
|
||||
}
|
||||
|
||||
@@ -5715,7 +5695,7 @@ static Bool dis_modulo_int ( UInt theInstr )
|
||||
rA_address, rS_address);
|
||||
|
||||
assign( rS, getIReg( rS_address ) );
|
||||
- assign( result, unop( Iop_Ctz32,
|
||||
+ assign( result, unop( Iop_CtzNat32,
|
||||
unop( Iop_64to32, mkexpr( rS ) ) ) );
|
||||
assign( rA, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( result ) ) );
|
||||
|
||||
@@ -5746,7 +5726,7 @@ static Bool dis_modulo_int ( UInt theInstr )
|
||||
rA_address, rS_address);
|
||||
|
||||
assign( rS, getIReg( rS_address ) );
|
||||
- assign( rA, unop( Iop_Ctz64, mkexpr( rS ) ) );
|
||||
+ assign( rA, unop( Iop_CtzNat64, mkexpr( rS ) ) );
|
||||
|
||||
if ( flag_rC == 1 )
|
||||
set_CR0( mkexpr( rA ) );
|
||||
@@ -6307,7 +6287,6 @@ static Bool dis_int_logic ( UInt theInstr )
|
||||
IRTemp rS = newTemp(ty);
|
||||
IRTemp rA = newTemp(ty);
|
||||
IRTemp rB = newTemp(ty);
|
||||
- IRExpr* irx;
|
||||
Bool do_rc = False;
|
||||
|
||||
assign( rS, getIReg(rS_addr) );
|
||||
@@ -6404,26 +6383,16 @@ static Bool dis_int_logic ( UInt theInstr )
|
||||
break;
|
||||
|
||||
case 0x01A: { // cntlzw (Count Leading Zeros Word, PPC32 p371)
|
||||
- IRExpr* lo32;
|
||||
if (rB_addr!=0) {
|
||||
vex_printf("dis_int_logic(ppc)(cntlzw,rB_addr)\n");
|
||||
return False;
|
||||
}
|
||||
- DIP("cntlzw%s r%u,r%u\n",
|
||||
- flag_rC ? ".":"", rA_addr, rS_addr);
|
||||
+ DIP("cntlzw%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
|
||||
|
||||
// mode64: count in low word only
|
||||
- lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
|
||||
-
|
||||
- // Iop_Clz32 undefined for arg==0, so deal with that case:
|
||||
- irx = binop(Iop_CmpNE32, lo32, mkU32(0));
|
||||
- assign(rA, mkWidenFrom32(ty,
|
||||
- IRExpr_ITE( irx,
|
||||
- unop(Iop_Clz32, lo32),
|
||||
- mkU32(32)),
|
||||
- False));
|
||||
-
|
||||
- // TODO: alternatively: assign(rA, verbose_Clz32(rS));
|
||||
+ IRExpr* lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
|
||||
+ IRExpr* res32 = unop(Iop_ClzNat32, lo32);
|
||||
+ assign(rA, mode64 ? unop(Iop_32Uto64, res32) : res32);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -6521,14 +6490,8 @@ static Bool dis_int_logic ( UInt theInstr )
|
||||
vex_printf("dis_int_logic(ppc)(cntlzd,rB_addr)\n");
|
||||
return False;
|
||||
}
|
||||
- DIP("cntlzd%s r%u,r%u\n",
|
||||
- flag_rC ? ".":"", rA_addr, rS_addr);
|
||||
- // Iop_Clz64 undefined for arg==0, so deal with that case:
|
||||
- irx = binop(Iop_CmpNE64, mkexpr(rS), mkU64(0));
|
||||
- assign(rA, IRExpr_ITE( irx,
|
||||
- unop(Iop_Clz64, mkexpr(rS)),
|
||||
- mkU64(64) ));
|
||||
- // TODO: alternatively: assign(rA, verbose_Clz64(rS));
|
||||
+ DIP("cntlzd%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
|
||||
+ assign(rA, unop(Iop_ClzNat64, mkexpr(rS)));
|
||||
break;
|
||||
|
||||
case 0x1FC: // cmpb (Power6: compare bytes)
|
||||
@@ -6574,8 +6537,9 @@ static Bool dis_int_logic ( UInt theInstr )
|
||||
putFReg( rS_addr, mkexpr(frA));
|
||||
return True;
|
||||
}
|
||||
- case 0x1FA: // popcntd (population count doubleword
|
||||
+ case 0x1FA: // popcntd (population count doubleword)
|
||||
{
|
||||
+ vassert(mode64);
|
||||
DIP("popcntd r%u,r%u\n", rA_addr, rS_addr);
|
||||
IRTemp result = gen_POPCOUNT(ty, rS, DWORD);
|
||||
putIReg( rA_addr, mkexpr(result) );
|
||||
@@ -9154,18 +9118,7 @@ static Bool dis_int_shift ( UInt theInstr )
|
||||
static IRExpr* /* :: Ity_I32 */ gen_byterev32 ( IRTemp t )
|
||||
{
|
||||
vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32);
|
||||
- return
|
||||
- binop(Iop_Or32,
|
||||
- binop(Iop_Shl32, mkexpr(t), mkU8(24)),
|
||||
- binop(Iop_Or32,
|
||||
- binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)),
|
||||
- mkU32(0x00FF0000)),
|
||||
- binop(Iop_Or32,
|
||||
- binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)),
|
||||
- mkU32(0x0000FF00)),
|
||||
- binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(24)),
|
||||
- mkU32(0x000000FF) )
|
||||
- )));
|
||||
+ return unop(Iop_Reverse8sIn32_x1, mkexpr(t));
|
||||
}
|
||||
|
||||
/* Generates code to swap the byte order in the lower half of an Ity_I32,
|
||||
@@ -9225,6 +9178,10 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
|
||||
|
||||
case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
|
||||
{
|
||||
+ // JRS FIXME:
|
||||
+ // * is the host_endness conditional below actually necessary?
|
||||
+ // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
|
||||
+ // That would be a lot more efficient.
|
||||
IRExpr * nextAddr;
|
||||
IRTemp w3 = newTemp( Ity_I32 );
|
||||
IRTemp w4 = newTemp( Ity_I32 );
|
||||
@@ -17056,8 +17013,8 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
|
||||
case 0x7C3: // vpopcntd
|
||||
{
|
||||
if (mode64) {
|
||||
- /* Break vector into 64-bit double words and do the population count
|
||||
- * on each double word.
|
||||
+ /* Break vector into 64-bit double words and do the population
|
||||
+ count on each double word.
|
||||
*/
|
||||
IRType ty = Ity_I64;
|
||||
IRTemp bits0_63 = newTemp(Ity_I64);
|
||||
@@ -17077,15 +17034,16 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
|
||||
mkexpr( cnt_bits0_63 ) ) );
|
||||
} else {
|
||||
/* Break vector into 32-bit words and do the population count
|
||||
- * on each doubleword.
|
||||
+ on each 32-bit word.
|
||||
*/
|
||||
IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
|
||||
bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
|
||||
- IRTemp cnt_bits0_63 = newTemp(Ity_I64);
|
||||
+ IRTemp cnt_bits0_63 = newTemp(Ity_I64);
|
||||
IRTemp cnt_bits64_127 = newTemp(Ity_I64);
|
||||
|
||||
DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr);
|
||||
- breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
|
||||
+ breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95,
|
||||
+ &bits32_63, &bits0_31 );
|
||||
|
||||
cnt_bits0_63 = gen_vpopcntd_mode32(bits0_31, bits32_63);
|
||||
cnt_bits64_127 = gen_vpopcntd_mode32(bits64_95, bits96_127);
|
||||
@@ -29103,10 +29061,12 @@ DisResult disInstr_PPC_WRK (
|
||||
|
||||
/* Miscellaneous ISA 2.06 instructions */
|
||||
case 0x1FA: // popcntd
|
||||
+ if (!mode64) goto decode_failure;
|
||||
+ /* else fallthru */
|
||||
case 0x17A: // popcntw
|
||||
case 0x7A: // popcntb
|
||||
- if (dis_int_logic( theInstr )) goto decode_success;
|
||||
- goto decode_failure;
|
||||
+ if (dis_int_logic( theInstr )) goto decode_success;
|
||||
+ goto decode_failure;
|
||||
|
||||
case 0x0FC: // bpermd
|
||||
if (!mode64) goto decode_failure;
|
||||
@@ -29669,94 +29629,6 @@ DisResult disInstr_PPC ( IRSB* irsb_IN,
|
||||
return dres;
|
||||
}
|
||||
|
||||
-
|
||||
-/*------------------------------------------------------------*/
|
||||
-/*--- Unused stuff ---*/
|
||||
-/*------------------------------------------------------------*/
|
||||
-
|
||||
-///* A potentially more memcheck-friendly implementation of Clz32, with
|
||||
-// the boundary case Clz32(0) = 32, which is what ppc requires. */
|
||||
-//
|
||||
-//static IRExpr* /* :: Ity_I32 */ verbose_Clz32 ( IRTemp arg )
|
||||
-//{
|
||||
-// /* Welcome ... to SSA R Us. */
|
||||
-// IRTemp n1 = newTemp(Ity_I32);
|
||||
-// IRTemp n2 = newTemp(Ity_I32);
|
||||
-// IRTemp n3 = newTemp(Ity_I32);
|
||||
-// IRTemp n4 = newTemp(Ity_I32);
|
||||
-// IRTemp n5 = newTemp(Ity_I32);
|
||||
-// IRTemp n6 = newTemp(Ity_I32);
|
||||
-// IRTemp n7 = newTemp(Ity_I32);
|
||||
-// IRTemp n8 = newTemp(Ity_I32);
|
||||
-// IRTemp n9 = newTemp(Ity_I32);
|
||||
-// IRTemp n10 = newTemp(Ity_I32);
|
||||
-// IRTemp n11 = newTemp(Ity_I32);
|
||||
-// IRTemp n12 = newTemp(Ity_I32);
|
||||
-//
|
||||
-// /* First, propagate the most significant 1-bit into all lower
|
||||
-// positions in the word. */
|
||||
-// /* unsigned int clz ( unsigned int n )
|
||||
-// {
|
||||
-// n |= (n >> 1);
|
||||
-// n |= (n >> 2);
|
||||
-// n |= (n >> 4);
|
||||
-// n |= (n >> 8);
|
||||
-// n |= (n >> 16);
|
||||
-// return bitcount(~n);
|
||||
-// }
|
||||
-// */
|
||||
-// assign(n1, mkexpr(arg));
|
||||
-// assign(n2, binop(Iop_Or32, mkexpr(n1), binop(Iop_Shr32, mkexpr(n1), mkU8(1))));
|
||||
-// assign(n3, binop(Iop_Or32, mkexpr(n2), binop(Iop_Shr32, mkexpr(n2), mkU8(2))));
|
||||
-// assign(n4, binop(Iop_Or32, mkexpr(n3), binop(Iop_Shr32, mkexpr(n3), mkU8(4))));
|
||||
-// assign(n5, binop(Iop_Or32, mkexpr(n4), binop(Iop_Shr32, mkexpr(n4), mkU8(8))));
|
||||
-// assign(n6, binop(Iop_Or32, mkexpr(n5), binop(Iop_Shr32, mkexpr(n5), mkU8(16))));
|
||||
-// /* This gives a word of the form 0---01---1. Now invert it, giving
|
||||
-// a word of the form 1---10---0, then do a population-count idiom
|
||||
-// (to count the 1s, which is the number of leading zeroes, or 32
|
||||
-// if the original word was 0. */
|
||||
-// assign(n7, unop(Iop_Not32, mkexpr(n6)));
|
||||
-//
|
||||
-// /* unsigned int bitcount ( unsigned int n )
|
||||
-// {
|
||||
-// n = n - ((n >> 1) & 0x55555555);
|
||||
-// n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
|
||||
-// n = (n + (n >> 4)) & 0x0F0F0F0F;
|
||||
-// n = n + (n >> 8);
|
||||
-// n = (n + (n >> 16)) & 0x3F;
|
||||
-// return n;
|
||||
-// }
|
||||
-// */
|
||||
-// assign(n8,
|
||||
-// binop(Iop_Sub32,
|
||||
-// mkexpr(n7),
|
||||
-// binop(Iop_And32,
|
||||
-// binop(Iop_Shr32, mkexpr(n7), mkU8(1)),
|
||||
-// mkU32(0x55555555))));
|
||||
-// assign(n9,
|
||||
-// binop(Iop_Add32,
|
||||
-// binop(Iop_And32, mkexpr(n8), mkU32(0x33333333)),
|
||||
-// binop(Iop_And32,
|
||||
-// binop(Iop_Shr32, mkexpr(n8), mkU8(2)),
|
||||
-// mkU32(0x33333333))));
|
||||
-// assign(n10,
|
||||
-// binop(Iop_And32,
|
||||
-// binop(Iop_Add32,
|
||||
-// mkexpr(n9),
|
||||
-// binop(Iop_Shr32, mkexpr(n9), mkU8(4))),
|
||||
-// mkU32(0x0F0F0F0F)));
|
||||
-// assign(n11,
|
||||
-// binop(Iop_Add32,
|
||||
-// mkexpr(n10),
|
||||
-// binop(Iop_Shr32, mkexpr(n10), mkU8(8))));
|
||||
-// assign(n12,
|
||||
-// binop(Iop_Add32,
|
||||
-// mkexpr(n11),
|
||||
-// binop(Iop_Shr32, mkexpr(n11), mkU8(16))));
|
||||
-// return
|
||||
-// binop(Iop_And32, mkexpr(n12), mkU32(0x3F));
|
||||
-//}
|
||||
-
|
||||
/*--------------------------------------------------------------------*/
|
||||
/*--- end guest_ppc_toIR.c ---*/
|
||||
/*--------------------------------------------------------------------*/
|
257
valgrind-3.14.0-ppc-instr-new-IROps.patch
Normal file
257
valgrind-3.14.0-ppc-instr-new-IROps.patch
Normal file
@ -0,0 +1,257 @@
|
||||
commit 97d336b79e36f6c99d8b07f49ebc9b780e6df84e
|
||||
Author: Julian Seward <jseward@acm.org>
|
||||
Date: Tue Nov 20 11:07:37 2018 +0100
|
||||
|
||||
Add ppc host-side isel and instruction support for IROps added in previous commit.
|
||||
|
||||
VEX/priv/host_ppc_defs.c, VEX/priv/host_ppc_defs.h:
|
||||
|
||||
Dont emit cnttz{w,d}. We may need them on a target which doesn't support
|
||||
them. Instead we can generate a fairly reasonable alternative sequence with
|
||||
cntlz{w,d} instead.
|
||||
|
||||
Add support for emitting popcnt{w,d}.
|
||||
|
||||
VEX/priv/host_ppc_isel.c
|
||||
|
||||
Add support for: Iop_ClzNat32 Iop_ClzNat64
|
||||
|
||||
Redo support for: Iop_Ctz{32,64} and their Nat equivalents, so as to not use
|
||||
cnttz{w,d}, as mentioned above.
|
||||
|
||||
Add support for: Iop_PopCount64 Iop_PopCount32 Iop_Reverse8sIn32_x1
|
||||
|
||||
diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c
|
||||
index b073c1d..f4b52e4 100644
|
||||
--- a/VEX/priv/host_ppc_defs.c
|
||||
+++ b/VEX/priv/host_ppc_defs.c
|
||||
@@ -501,9 +501,9 @@ const HChar* showPPCUnaryOp ( PPCUnaryOp op ) {
|
||||
case Pun_NEG: return "neg";
|
||||
case Pun_CLZ32: return "cntlzw";
|
||||
case Pun_CLZ64: return "cntlzd";
|
||||
- case Pun_CTZ32: return "cnttzw";
|
||||
- case Pun_CTZ64: return "cnttzd";
|
||||
case Pun_EXTSW: return "extsw";
|
||||
+ case Pun_POP32: return "popcntw";
|
||||
+ case Pun_POP64: return "popcntd";
|
||||
default: vpanic("showPPCUnaryOp");
|
||||
}
|
||||
}
|
||||
@@ -4265,20 +4265,19 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
|
||||
vassert(mode64);
|
||||
p = mkFormX(p, 31, r_src, r_dst, 0, 58, 0, endness_host);
|
||||
break;
|
||||
- case Pun_CTZ32: // cnttzw r_dst, r_src
|
||||
- /* Note oder of src and dst is backwards from normal */
|
||||
- p = mkFormX(p, 31, r_src, r_dst, 0, 538, 0, endness_host);
|
||||
- break;
|
||||
- case Pun_CTZ64: // cnttzd r_dst, r_src
|
||||
- /* Note oder of src and dst is backwards from normal */
|
||||
- vassert(mode64);
|
||||
- p = mkFormX(p, 31, r_src, r_dst, 0, 570, 0, endness_host);
|
||||
- break;
|
||||
case Pun_EXTSW: // extsw r_dst, r_src
|
||||
vassert(mode64);
|
||||
p = mkFormX(p, 31, r_src, r_dst, 0, 986, 0, endness_host);
|
||||
break;
|
||||
- default: goto bad;
|
||||
+ case Pun_POP32: // popcntw r_dst, r_src
|
||||
+ p = mkFormX(p, 31, r_src, r_dst, 0, 378, 0, endness_host);
|
||||
+ break;
|
||||
+ case Pun_POP64: // popcntd r_dst, r_src
|
||||
+ vassert(mode64);
|
||||
+ p = mkFormX(p, 31, r_src, r_dst, 0, 506, 0, endness_host);
|
||||
+ break;
|
||||
+ default:
|
||||
+ goto bad;
|
||||
}
|
||||
goto done;
|
||||
}
|
||||
diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h
|
||||
index 17baff5..321fba9 100644
|
||||
--- a/VEX/priv/host_ppc_defs.h
|
||||
+++ b/VEX/priv/host_ppc_defs.h
|
||||
@@ -291,9 +291,9 @@ typedef
|
||||
Pun_NOT,
|
||||
Pun_CLZ32,
|
||||
Pun_CLZ64,
|
||||
- Pun_CTZ32,
|
||||
- Pun_CTZ64,
|
||||
- Pun_EXTSW
|
||||
+ Pun_EXTSW,
|
||||
+ Pun_POP32, // popcntw
|
||||
+ Pun_POP64 // popcntd
|
||||
}
|
||||
PPCUnaryOp;
|
||||
|
||||
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
|
||||
index 6bdb5f7..5242176 100644
|
||||
--- a/VEX/priv/host_ppc_isel.c
|
||||
+++ b/VEX/priv/host_ppc_isel.c
|
||||
@@ -2065,12 +2065,15 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
|
||||
return r_dst;
|
||||
}
|
||||
break;
|
||||
- case Iop_Clz32:
|
||||
- case Iop_Clz64: {
|
||||
+
|
||||
+ case Iop_Clz32: case Iop_ClzNat32:
|
||||
+ case Iop_Clz64: case Iop_ClzNat64: {
|
||||
+ // cntlz is available even in the most basic (earliest) ppc
|
||||
+ // variants, so it's safe to generate it unconditionally.
|
||||
HReg r_src, r_dst;
|
||||
- PPCUnaryOp op_clz = (op_unop == Iop_Clz32) ? Pun_CLZ32 :
|
||||
- Pun_CLZ64;
|
||||
- if (op_unop == Iop_Clz64 && !mode64)
|
||||
+ PPCUnaryOp op_clz = (op_unop == Iop_Clz32 || op_unop == Iop_ClzNat32)
|
||||
+ ? Pun_CLZ32 : Pun_CLZ64;
|
||||
+ if ((op_unop == Iop_Clz64 || op_unop == Iop_ClzNat64) && !mode64)
|
||||
goto irreducible;
|
||||
/* Count leading zeroes. */
|
||||
r_dst = newVRegI(env);
|
||||
@@ -2079,18 +2082,133 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
|
||||
return r_dst;
|
||||
}
|
||||
|
||||
- case Iop_Ctz32:
|
||||
- case Iop_Ctz64: {
|
||||
- HReg r_src, r_dst;
|
||||
- PPCUnaryOp op_clz = (op_unop == Iop_Ctz32) ? Pun_CTZ32 :
|
||||
- Pun_CTZ64;
|
||||
- if (op_unop == Iop_Ctz64 && !mode64)
|
||||
- goto irreducible;
|
||||
- /* Count trailing zeroes. */
|
||||
- r_dst = newVRegI(env);
|
||||
- r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
|
||||
- addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
|
||||
- return r_dst;
|
||||
+ //case Iop_Ctz32:
|
||||
+ case Iop_CtzNat32:
|
||||
+ //case Iop_Ctz64:
|
||||
+ case Iop_CtzNat64:
|
||||
+ {
|
||||
+ // Generate code using Clz, because we can't assume the host has
|
||||
+ // Ctz. In particular, part of the fix for bug 386945 involves
|
||||
+ // creating a Ctz in ir_opt.c from smaller fragments.
|
||||
+ PPCUnaryOp op_clz = Pun_CLZ64;
|
||||
+ Int WS = 64;
|
||||
+ if (op_unop == Iop_Ctz32 || op_unop == Iop_CtzNat32) {
|
||||
+ op_clz = Pun_CLZ32;
|
||||
+ WS = 32;
|
||||
+ }
|
||||
+ /* Compute ctz(arg) = wordsize - clz(~arg & (arg - 1)), thusly:
|
||||
+ t1 = arg - 1
|
||||
+ t2 = not arg
|
||||
+ t2 = t2 & t1
|
||||
+ t2 = clz t2
|
||||
+ t1 = WS
|
||||
+ t2 = t1 - t2
|
||||
+ // result in t2
|
||||
+ */
|
||||
+ HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
|
||||
+ HReg t1 = newVRegI(env);
|
||||
+ HReg t2 = newVRegI(env);
|
||||
+ addInstr(env, PPCInstr_Alu(Palu_SUB, t1, arg, PPCRH_Imm(True, 1)));
|
||||
+ addInstr(env, PPCInstr_Unary(Pun_NOT, t2, arg));
|
||||
+ addInstr(env, PPCInstr_Alu(Palu_AND, t2, t2, PPCRH_Reg(t1)));
|
||||
+ addInstr(env, PPCInstr_Unary(op_clz, t2, t2));
|
||||
+ addInstr(env, PPCInstr_LI(t1, WS, False/*!64-bit imm*/));
|
||||
+ addInstr(env, PPCInstr_Alu(Palu_SUB, t2, t1, PPCRH_Reg(t2)));
|
||||
+ return t2;
|
||||
+ }
|
||||
+
|
||||
+ case Iop_PopCount64: {
|
||||
+ // popcnt{x,d} is only available in later arch revs (ISA 3.0,
|
||||
+ // maybe) so it's not really correct to emit it here without a caps
|
||||
+ // check for the host.
|
||||
+ if (mode64) {
|
||||
+ HReg r_dst = newVRegI(env);
|
||||
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
|
||||
+ addInstr(env, PPCInstr_Unary(Pun_POP64, r_dst, r_src));
|
||||
+ return r_dst;
|
||||
+ }
|
||||
+ // We don't expect to be required to handle this in 32-bit mode.
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ case Iop_PopCount32: {
|
||||
+ // Similar comment as for Ctz just above applies -- we really
|
||||
+ // should have a caps check here.
|
||||
+
|
||||
+ HReg r_dst = newVRegI(env);
|
||||
+ // This actually generates popcntw, which in 64 bit mode does a
|
||||
+ // 32-bit count individually for both low and high halves of the
|
||||
+ // word. Per the comment at the top of iselIntExpr_R, in the 64
|
||||
+ // bit mode case, the user of this result is required to ignore
|
||||
+ // the upper 32 bits of the result. In 32 bit mode this is all
|
||||
+ // moot. It is however unclear from the PowerISA 3.0 docs that
|
||||
+ // the instruction exists in 32 bit mode; however our own front
|
||||
+ // end (guest_ppc_toIR.c) accepts it, so I guess it does exist.
|
||||
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
|
||||
+ addInstr(env, PPCInstr_Unary(Pun_POP32, r_dst, r_src));
|
||||
+ return r_dst;
|
||||
+ }
|
||||
+
|
||||
+ case Iop_Reverse8sIn32_x1: {
|
||||
+ // A bit of a mouthful, but simply .. 32-bit byte swap.
|
||||
+ // This is pretty rubbish code. We could do vastly better if
|
||||
+ // rotates, and better, rotate-inserts, were allowed. Note that
|
||||
+ // even on a 64 bit target, the right shifts must be done as 32-bit
|
||||
+ // so as to introduce zero bits in the right places. So it seems
|
||||
+ // simplest to do the whole sequence in 32-bit insns.
|
||||
+ /*
|
||||
+ r = <argument> // working temporary, initial byte order ABCD
|
||||
+ Mask = 00FF00FF
|
||||
+ nMask = not Mask
|
||||
+ tHi = and r, Mask
|
||||
+ tHi = shl tHi, 8
|
||||
+ tLo = and r, nMask
|
||||
+ tLo = shr tLo, 8
|
||||
+ r = or tHi, tLo // now r has order BADC
|
||||
+ and repeat for 16 bit chunks ..
|
||||
+ Mask = 0000FFFF
|
||||
+ nMask = not Mask
|
||||
+ tHi = and r, Mask
|
||||
+ tHi = shl tHi, 16
|
||||
+ tLo = and r, nMask
|
||||
+ tLo = shr tLo, 16
|
||||
+ r = or tHi, tLo // now r has order DCBA
|
||||
+ */
|
||||
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
|
||||
+ HReg rr = newVRegI(env);
|
||||
+ HReg rMask = newVRegI(env);
|
||||
+ HReg rnMask = newVRegI(env);
|
||||
+ HReg rtHi = newVRegI(env);
|
||||
+ HReg rtLo = newVRegI(env);
|
||||
+ // Copy r_src since we need to modify it
|
||||
+ addInstr(env, mk_iMOVds_RR(rr, r_src));
|
||||
+ // Swap within 16-bit lanes
|
||||
+ addInstr(env, PPCInstr_LI(rMask, 0x00FF00FFULL,
|
||||
+ False/* !64bit imm*/));
|
||||
+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
|
||||
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
|
||||
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
|
||||
+ rtHi, rtHi,
|
||||
+ PPCRH_Imm(False/*!signed imm*/, 8)));
|
||||
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
|
||||
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
|
||||
+ rtLo, rtLo,
|
||||
+ PPCRH_Imm(False/*!signed imm*/, 8)));
|
||||
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
|
||||
+ // And now swap the two 16-bit chunks
|
||||
+ addInstr(env, PPCInstr_LI(rMask, 0x0000FFFFULL,
|
||||
+ False/* !64bit imm*/));
|
||||
+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
|
||||
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
|
||||
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
|
||||
+ rtHi, rtHi,
|
||||
+ PPCRH_Imm(False/*!signed imm*/, 16)));
|
||||
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
|
||||
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
|
||||
+ rtLo, rtLo,
|
||||
+ PPCRH_Imm(False/*!signed imm*/, 16)));
|
||||
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
|
||||
+ return rr;
|
||||
}
|
||||
|
||||
case Iop_Left8:
|
82
valgrind-3.14.0-transform-popcount64-ctznat64.patch
Normal file
82
valgrind-3.14.0-transform-popcount64-ctznat64.patch
Normal file
@ -0,0 +1,82 @@
|
||||
commit cb5d7e047598bff6d0f1d707a70d9fb1a1c7f0e2
|
||||
Author: Julian Seward <jseward@acm.org>
|
||||
Date: Tue Nov 20 11:46:55 2018 +0100
|
||||
|
||||
VEX/priv/ir_opt.c
|
||||
|
||||
fold_Expr: transform PopCount64(And64(Add64(x,-1),Not64(x))) into CtzNat64(x).
|
||||
|
||||
This is part of the fix for bug 386945.
|
||||
|
||||
diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c
|
||||
index f40870b..23964be 100644
|
||||
--- a/VEX/priv/ir_opt.c
|
||||
+++ b/VEX/priv/ir_opt.c
|
||||
@@ -1377,6 +1377,8 @@ static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
|
||||
case Iex_Unop:
|
||||
/* UNARY ops */
|
||||
if (e->Iex.Unop.arg->tag == Iex_Const) {
|
||||
+
|
||||
+ /* cases where the arg is a const */
|
||||
switch (e->Iex.Unop.op) {
|
||||
case Iop_1Uto8:
|
||||
e2 = IRExpr_Const(IRConst_U8(toUChar(
|
||||
@@ -1690,8 +1692,56 @@ static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
|
||||
|
||||
default:
|
||||
goto unhandled;
|
||||
- }
|
||||
- }
|
||||
+ } // switch (e->Iex.Unop.op)
|
||||
+
|
||||
+ } else {
|
||||
+
|
||||
+ /* other cases (identities, etc) */
|
||||
+ switch (e->Iex.Unop.op) {
|
||||
+ case Iop_PopCount64: {
|
||||
+ // PopCount64( And64( Add64(x,-1), Not64(x) ) ) ==> CtzNat64(x)
|
||||
+ // bindings:
|
||||
+ // a1:And64( a11:Add64(a111:x,a112:-1), a12:Not64(a121:x) )
|
||||
+ IRExpr* a1 = chase(env, e->Iex.Unop.arg);
|
||||
+ if (!a1)
|
||||
+ goto nomatch;
|
||||
+ if (a1->tag != Iex_Binop || a1->Iex.Binop.op != Iop_And64)
|
||||
+ goto nomatch;
|
||||
+ // a1 is established
|
||||
+ IRExpr* a11 = chase(env, a1->Iex.Binop.arg1);
|
||||
+ if (!a11)
|
||||
+ goto nomatch;
|
||||
+ if (a11->tag != Iex_Binop || a11->Iex.Binop.op != Iop_Add64)
|
||||
+ goto nomatch;
|
||||
+ // a11 is established
|
||||
+ IRExpr* a12 = chase(env, a1->Iex.Binop.arg2);
|
||||
+ if (!a12)
|
||||
+ goto nomatch;
|
||||
+ if (a12->tag != Iex_Unop || a12->Iex.Unop.op != Iop_Not64)
|
||||
+ goto nomatch;
|
||||
+ // a12 is established
|
||||
+ IRExpr* a111 = a11->Iex.Binop.arg1;
|
||||
+ IRExpr* a112 = chase(env, a11->Iex.Binop.arg2);
|
||||
+ IRExpr* a121 = a12->Iex.Unop.arg;
|
||||
+ if (!a111 || !a112 || !a121)
|
||||
+ goto nomatch;
|
||||
+ // a111 and a121 need to be the same temp.
|
||||
+ if (!eqIRAtom(a111, a121))
|
||||
+ goto nomatch;
|
||||
+ // Finally, a112 must be a 64-bit version of -1.
|
||||
+ if (!isOnesU(a112))
|
||||
+ goto nomatch;
|
||||
+ // Match established. Transform.
|
||||
+ e2 = IRExpr_Unop(Iop_CtzNat64, a111);
|
||||
+ break;
|
||||
+ nomatch:
|
||||
+ break;
|
||||
+ }
|
||||
+ default:
|
||||
+ break;
|
||||
+ } // switch (e->Iex.Unop.op)
|
||||
+
|
||||
+ } // if (e->Iex.Unop.arg->tag == Iex_Const)
|
||||
break;
|
||||
|
||||
case Iex_Binop:
|
@ -3,7 +3,7 @@
|
||||
Summary: Tool for finding memory management bugs in programs
|
||||
Name: %{?scl_prefix}valgrind
|
||||
Version: 3.14.0
|
||||
Release: 3%{?dist}
|
||||
Release: 4%{?dist}
|
||||
Epoch: 1
|
||||
License: GPLv2+
|
||||
URL: http://www.valgrind.org/
|
||||
@ -119,6 +119,15 @@ Patch8: valgrind-3.14.0-s390x-vec-float-point-tests.patch
|
||||
# KDE#401277 More bugs in z13 support
|
||||
Patch9: valgrind-3.14.0-s390z-more-z13-fixes.patch
|
||||
|
||||
# KDE#386945 Bogus memcheck errors on ppc64(le) when using strcmp
|
||||
Patch10: valgrind-3.14.0-get_otrack_shadow_offset_wrk-ppc.patch
|
||||
Patch11: valgrind-3.14.0-new-strlen-IROps.patch
|
||||
Patch12: valgrind-3.14.0-ppc-instr-new-IROps.patch
|
||||
Patch13: valgrind-3.14.0-memcheck-new-IROps.patch
|
||||
Patch14: valgrind-3.14.0-ppc-frontend-new-IROps.patch
|
||||
Patch15: valgrind-3.14.0-transform-popcount64-ctznat64.patch
|
||||
Patch16: valgrind-3.14.0-enable-ppc-Iop_Sar_Shr8.patch
|
||||
|
||||
%if %{build_multilib}
|
||||
# Ensure glibc{,-devel} is installed for both multilib arches
|
||||
BuildRequires: /lib/libc.so.6 /usr/lib/libc.so /lib64/libc.so.6 /usr/lib64/libc.so
|
||||
@ -260,6 +269,13 @@ Valgrind User Manual for details.
|
||||
%patch7 -p1
|
||||
%patch8 -p1
|
||||
%patch9 -p1
|
||||
%patch10 -p1
|
||||
%patch11 -p1
|
||||
%patch12 -p1
|
||||
%patch13 -p1
|
||||
%patch14 -p1
|
||||
%patch15 -p1
|
||||
%patch16 -p1
|
||||
|
||||
%build
|
||||
CC=gcc
|
||||
@ -494,6 +510,15 @@ fi
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Fri Nov 23 2018 Mark Wielaard <mjw@fedoraproject.org> - 3.14.0-4
|
||||
- Add valgrind-3.14.0-get_otrack_shadow_offset_wrk-ppc.patch,
|
||||
valgrind-3.14.0-new-strlen-IROps.patch,
|
||||
valgrind-3.14.0-ppc-instr-new-IROps.patch,
|
||||
valgrind-3.14.0-memcheck-new-IROps.patch,
|
||||
valgrind-3.14.0-ppc-frontend-new-IROps.patch,
|
||||
valgrind-3.14.0-transform-popcount64-ctznat64.patch and
|
||||
valgrind-3.14.0-enable-ppc-Iop_Sar_Shr8.patch (#1652926)
|
||||
|
||||
* Wed Nov 21 2018 Mark Wielaard <mjw@fedoraproject.org> - 3.14.0-3
|
||||
- Add valgrind-3.14.0-s390z-more-z13-fixes.patch.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user