valgrind/valgrind-3.14.0-ppc-frontend-new-IROps.patch
Mark Wielaard b3eda9b80b 3.14.0-4 gcc ppc64le inlined memcmp vs memcheck (#1652926)
- Add valgrind-3.14.0-get_otrack_shadow_offset_wrk-ppc.patch,
  valgrind-3.14.0-new-strlen-IROps.patch,
  valgrind-3.14.0-ppc-instr-new-IROps.patch,
  valgrind-3.14.0-memcheck-new-IROps.patch,
  valgrind-3.14.0-ppc-frontend-new-IROps.patch,
  valgrind-3.14.0-transform-popcount64-ctznat64.patch and
  valgrind-3.14.0-enable-ppc-Iop_Sar_Shr8.patch (#1652926)
2018-11-23 22:31:07 +01:00

382 lines
14 KiB
Diff

commit 81d9832226d6e3d1ee78ee3133189d7b520e7eea
Author: Julian Seward <jseward@acm.org>
Date: Tue Nov 20 11:36:53 2018 +0100
ppc front end: use new IROps added in 42719898.
This pertains to bug 386945.
VEX/priv/guest_ppc_toIR.c:
gen_POPCOUNT: use Iop_PopCount{32,64} where possible.
gen_vpopcntd_mode32: use Iop_PopCount32.
for cntlz{w,d}, use Iop_CtzNat{32,64}.
gen_byterev32: use Iop_Reverse8sIn32_x1 instead of lengthy sequence.
verbose_Clz32: remove (was unused anyway).
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
index cb1cae1..8977d4f 100644
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -1595,7 +1595,8 @@ typedef enum {
/* Generate an IR sequence to do a popcount operation on the supplied
IRTemp, and return a new IRTemp holding the result. 'ty' may be
Ity_I32 or Ity_I64 only. */
-static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_type )
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src,
+ _popcount_data_type data_type )
{
/* Do count across 2^data_type bits,
byte: data_type = 3
@@ -1611,6 +1612,22 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
vassert(ty == Ity_I64 || ty == Ity_I32);
+ // Use a single IROp in cases where we can.
+
+ if (ty == Ity_I64 && data_type == DWORD) {
+ IRTemp res = newTemp(Ity_I64);
+ assign(res, unop(Iop_PopCount64, mkexpr(src)));
+ return res;
+ }
+
+ if (ty == Ity_I32 && data_type == WORD) {
+ IRTemp res = newTemp(Ity_I32);
+ assign(res, unop(Iop_PopCount32, mkexpr(src)));
+ return res;
+ }
+
+ // For the rest, we have to do it the slow way.
+
if (ty == Ity_I32) {
for (idx = 0; idx < WORD; idx++) {
@@ -1638,7 +1655,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
return nyu;
}
-// else, ty == Ity_I64
+ // else, ty == Ity_I64
vassert(mode64);
for (i = 0; i < DWORD; i++) {
@@ -1670,52 +1687,15 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
*/
static IRTemp gen_vpopcntd_mode32 ( IRTemp src1, IRTemp src2 )
{
- Int i, shift[6];
- IRTemp mask[6];
- IRTemp old = IRTemp_INVALID;
- IRTemp nyu1 = IRTemp_INVALID;
- IRTemp nyu2 = IRTemp_INVALID;
IRTemp retval = newTemp(Ity_I64);
vassert(!mode64);
- for (i = 0; i < WORD; i++) {
- mask[i] = newTemp(Ity_I32);
- shift[i] = 1 << i;
- }
- assign(mask[0], mkU32(0x55555555));
- assign(mask[1], mkU32(0x33333333));
- assign(mask[2], mkU32(0x0F0F0F0F));
- assign(mask[3], mkU32(0x00FF00FF));
- assign(mask[4], mkU32(0x0000FFFF));
- old = src1;
- for (i = 0; i < WORD; i++) {
- nyu1 = newTemp(Ity_I32);
- assign(nyu1,
- binop(Iop_Add32,
- binop(Iop_And32,
- mkexpr(old),
- mkexpr(mask[i])),
- binop(Iop_And32,
- binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
- mkexpr(mask[i]))));
- old = nyu1;
- }
-
- old = src2;
- for (i = 0; i < WORD; i++) {
- nyu2 = newTemp(Ity_I32);
- assign(nyu2,
- binop(Iop_Add32,
- binop(Iop_And32,
- mkexpr(old),
- mkexpr(mask[i])),
- binop(Iop_And32,
- binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
- mkexpr(mask[i]))));
- old = nyu2;
- }
- assign(retval, unop(Iop_32Uto64, binop(Iop_Add32, mkexpr(nyu1), mkexpr(nyu2))));
+ assign(retval,
+ unop(Iop_32Uto64,
+ binop(Iop_Add32,
+ unop(Iop_PopCount32, mkexpr(src1)),
+ unop(Iop_PopCount32, mkexpr(src2)))));
return retval;
}
@@ -5715,7 +5695,7 @@ static Bool dis_modulo_int ( UInt theInstr )
rA_address, rS_address);
assign( rS, getIReg( rS_address ) );
- assign( result, unop( Iop_Ctz32,
+ assign( result, unop( Iop_CtzNat32,
unop( Iop_64to32, mkexpr( rS ) ) ) );
assign( rA, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( result ) ) );
@@ -5746,7 +5726,7 @@ static Bool dis_modulo_int ( UInt theInstr )
rA_address, rS_address);
assign( rS, getIReg( rS_address ) );
- assign( rA, unop( Iop_Ctz64, mkexpr( rS ) ) );
+ assign( rA, unop( Iop_CtzNat64, mkexpr( rS ) ) );
if ( flag_rC == 1 )
set_CR0( mkexpr( rA ) );
@@ -6307,7 +6287,6 @@ static Bool dis_int_logic ( UInt theInstr )
IRTemp rS = newTemp(ty);
IRTemp rA = newTemp(ty);
IRTemp rB = newTemp(ty);
- IRExpr* irx;
Bool do_rc = False;
assign( rS, getIReg(rS_addr) );
@@ -6404,26 +6383,16 @@ static Bool dis_int_logic ( UInt theInstr )
break;
case 0x01A: { // cntlzw (Count Leading Zeros Word, PPC32 p371)
- IRExpr* lo32;
if (rB_addr!=0) {
vex_printf("dis_int_logic(ppc)(cntlzw,rB_addr)\n");
return False;
}
- DIP("cntlzw%s r%u,r%u\n",
- flag_rC ? ".":"", rA_addr, rS_addr);
+ DIP("cntlzw%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
// mode64: count in low word only
- lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
-
- // Iop_Clz32 undefined for arg==0, so deal with that case:
- irx = binop(Iop_CmpNE32, lo32, mkU32(0));
- assign(rA, mkWidenFrom32(ty,
- IRExpr_ITE( irx,
- unop(Iop_Clz32, lo32),
- mkU32(32)),
- False));
-
- // TODO: alternatively: assign(rA, verbose_Clz32(rS));
+ IRExpr* lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
+ IRExpr* res32 = unop(Iop_ClzNat32, lo32);
+ assign(rA, mode64 ? unop(Iop_32Uto64, res32) : res32);
break;
}
@@ -6521,14 +6490,8 @@ static Bool dis_int_logic ( UInt theInstr )
vex_printf("dis_int_logic(ppc)(cntlzd,rB_addr)\n");
return False;
}
- DIP("cntlzd%s r%u,r%u\n",
- flag_rC ? ".":"", rA_addr, rS_addr);
- // Iop_Clz64 undefined for arg==0, so deal with that case:
- irx = binop(Iop_CmpNE64, mkexpr(rS), mkU64(0));
- assign(rA, IRExpr_ITE( irx,
- unop(Iop_Clz64, mkexpr(rS)),
- mkU64(64) ));
- // TODO: alternatively: assign(rA, verbose_Clz64(rS));
+ DIP("cntlzd%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
+ assign(rA, unop(Iop_ClzNat64, mkexpr(rS)));
break;
case 0x1FC: // cmpb (Power6: compare bytes)
@@ -6574,8 +6537,9 @@ static Bool dis_int_logic ( UInt theInstr )
putFReg( rS_addr, mkexpr(frA));
return True;
}
- case 0x1FA: // popcntd (population count doubleword
+ case 0x1FA: // popcntd (population count doubleword)
{
+ vassert(mode64);
DIP("popcntd r%u,r%u\n", rA_addr, rS_addr);
IRTemp result = gen_POPCOUNT(ty, rS, DWORD);
putIReg( rA_addr, mkexpr(result) );
@@ -9154,18 +9118,7 @@ static Bool dis_int_shift ( UInt theInstr )
static IRExpr* /* :: Ity_I32 */ gen_byterev32 ( IRTemp t )
{
vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32);
- return
- binop(Iop_Or32,
- binop(Iop_Shl32, mkexpr(t), mkU8(24)),
- binop(Iop_Or32,
- binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)),
- mkU32(0x00FF0000)),
- binop(Iop_Or32,
- binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)),
- mkU32(0x0000FF00)),
- binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(24)),
- mkU32(0x000000FF) )
- )));
+ return unop(Iop_Reverse8sIn32_x1, mkexpr(t));
}
/* Generates code to swap the byte order in the lower half of an Ity_I32,
@@ -9225,6 +9178,10 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
{
+ // JRS FIXME:
+ // * is the host_endness conditional below actually necessary?
+ // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
+ // That would be a lot more efficient.
IRExpr * nextAddr;
IRTemp w3 = newTemp( Ity_I32 );
IRTemp w4 = newTemp( Ity_I32 );
@@ -17056,8 +17013,8 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
case 0x7C3: // vpopcntd
{
if (mode64) {
- /* Break vector into 64-bit double words and do the population count
- * on each double word.
+ /* Break vector into 64-bit double words and do the population
+ count on each double word.
*/
IRType ty = Ity_I64;
IRTemp bits0_63 = newTemp(Ity_I64);
@@ -17077,15 +17034,16 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
mkexpr( cnt_bits0_63 ) ) );
} else {
/* Break vector into 32-bit words and do the population count
- * on each doubleword.
+ on each 32-bit word.
*/
IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
- IRTemp cnt_bits0_63 = newTemp(Ity_I64);
+ IRTemp cnt_bits0_63 = newTemp(Ity_I64);
IRTemp cnt_bits64_127 = newTemp(Ity_I64);
DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr);
- breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
+ breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95,
+ &bits32_63, &bits0_31 );
cnt_bits0_63 = gen_vpopcntd_mode32(bits0_31, bits32_63);
cnt_bits64_127 = gen_vpopcntd_mode32(bits64_95, bits96_127);
@@ -29103,10 +29061,12 @@ DisResult disInstr_PPC_WRK (
/* Miscellaneous ISA 2.06 instructions */
case 0x1FA: // popcntd
+ if (!mode64) goto decode_failure;
+ /* else fallthru */
case 0x17A: // popcntw
case 0x7A: // popcntb
- if (dis_int_logic( theInstr )) goto decode_success;
- goto decode_failure;
+ if (dis_int_logic( theInstr )) goto decode_success;
+ goto decode_failure;
case 0x0FC: // bpermd
if (!mode64) goto decode_failure;
@@ -29669,94 +29629,6 @@ DisResult disInstr_PPC ( IRSB* irsb_IN,
return dres;
}
-
-/*------------------------------------------------------------*/
-/*--- Unused stuff ---*/
-/*------------------------------------------------------------*/
-
-///* A potentially more memcheck-friendly implementation of Clz32, with
-// the boundary case Clz32(0) = 32, which is what ppc requires. */
-//
-//static IRExpr* /* :: Ity_I32 */ verbose_Clz32 ( IRTemp arg )
-//{
-// /* Welcome ... to SSA R Us. */
-// IRTemp n1 = newTemp(Ity_I32);
-// IRTemp n2 = newTemp(Ity_I32);
-// IRTemp n3 = newTemp(Ity_I32);
-// IRTemp n4 = newTemp(Ity_I32);
-// IRTemp n5 = newTemp(Ity_I32);
-// IRTemp n6 = newTemp(Ity_I32);
-// IRTemp n7 = newTemp(Ity_I32);
-// IRTemp n8 = newTemp(Ity_I32);
-// IRTemp n9 = newTemp(Ity_I32);
-// IRTemp n10 = newTemp(Ity_I32);
-// IRTemp n11 = newTemp(Ity_I32);
-// IRTemp n12 = newTemp(Ity_I32);
-//
-// /* First, propagate the most significant 1-bit into all lower
-// positions in the word. */
-// /* unsigned int clz ( unsigned int n )
-// {
-// n |= (n >> 1);
-// n |= (n >> 2);
-// n |= (n >> 4);
-// n |= (n >> 8);
-// n |= (n >> 16);
-// return bitcount(~n);
-// }
-// */
-// assign(n1, mkexpr(arg));
-// assign(n2, binop(Iop_Or32, mkexpr(n1), binop(Iop_Shr32, mkexpr(n1), mkU8(1))));
-// assign(n3, binop(Iop_Or32, mkexpr(n2), binop(Iop_Shr32, mkexpr(n2), mkU8(2))));
-// assign(n4, binop(Iop_Or32, mkexpr(n3), binop(Iop_Shr32, mkexpr(n3), mkU8(4))));
-// assign(n5, binop(Iop_Or32, mkexpr(n4), binop(Iop_Shr32, mkexpr(n4), mkU8(8))));
-// assign(n6, binop(Iop_Or32, mkexpr(n5), binop(Iop_Shr32, mkexpr(n5), mkU8(16))));
-// /* This gives a word of the form 0---01---1. Now invert it, giving
-// a word of the form 1---10---0, then do a population-count idiom
-// (to count the 1s, which is the number of leading zeroes, or 32
-// if the original word was 0. */
-// assign(n7, unop(Iop_Not32, mkexpr(n6)));
-//
-// /* unsigned int bitcount ( unsigned int n )
-// {
-// n = n - ((n >> 1) & 0x55555555);
-// n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
-// n = (n + (n >> 4)) & 0x0F0F0F0F;
-// n = n + (n >> 8);
-// n = (n + (n >> 16)) & 0x3F;
-// return n;
-// }
-// */
-// assign(n8,
-// binop(Iop_Sub32,
-// mkexpr(n7),
-// binop(Iop_And32,
-// binop(Iop_Shr32, mkexpr(n7), mkU8(1)),
-// mkU32(0x55555555))));
-// assign(n9,
-// binop(Iop_Add32,
-// binop(Iop_And32, mkexpr(n8), mkU32(0x33333333)),
-// binop(Iop_And32,
-// binop(Iop_Shr32, mkexpr(n8), mkU8(2)),
-// mkU32(0x33333333))));
-// assign(n10,
-// binop(Iop_And32,
-// binop(Iop_Add32,
-// mkexpr(n9),
-// binop(Iop_Shr32, mkexpr(n9), mkU8(4))),
-// mkU32(0x0F0F0F0F)));
-// assign(n11,
-// binop(Iop_Add32,
-// mkexpr(n10),
-// binop(Iop_Shr32, mkexpr(n10), mkU8(8))));
-// assign(n12,
-// binop(Iop_Add32,
-// mkexpr(n11),
-// binop(Iop_Shr32, mkexpr(n11), mkU8(16))));
-// return
-// binop(Iop_And32, mkexpr(n12), mkU32(0x3F));
-//}
-
/*--------------------------------------------------------------------*/
/*--- end guest_ppc_toIR.c ---*/
/*--------------------------------------------------------------------*/