b3eda9b80b
- Add valgrind-3.14.0-get_otrack_shadow_offset_wrk-ppc.patch, valgrind-3.14.0-new-strlen-IROps.patch, valgrind-3.14.0-ppc-instr-new-IROps.patch, valgrind-3.14.0-memcheck-new-IROps.patch, valgrind-3.14.0-ppc-frontend-new-IROps.patch, valgrind-3.14.0-transform-popcount64-ctznat64.patch and valgrind-3.14.0-enable-ppc-Iop_Sar_Shr8.patch (#1652926)
382 lines
14 KiB
Diff
382 lines
14 KiB
Diff
commit 81d9832226d6e3d1ee78ee3133189d7b520e7eea
|
|
Author: Julian Seward <jseward@acm.org>
|
|
Date: Tue Nov 20 11:36:53 2018 +0100
|
|
|
|
ppc front end: use new IROps added in 42719898.
|
|
|
|
This pertains to bug 386945.
|
|
|
|
VEX/priv/guest_ppc_toIR.c:
|
|
|
|
gen_POPCOUNT: use Iop_PopCount{32,64} where possible.
|
|
|
|
gen_vpopcntd_mode32: use Iop_PopCount32.
|
|
|
|
for cntlz{w,d}, use Iop_CtzNat{32,64}.
|
|
|
|
gen_byterev32: use Iop_Reverse8sIn32_x1 instead of lengthy sequence.
|
|
|
|
verbose_Clz32: remove (was unused anyway).
|
|
|
|
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
|
|
index cb1cae1..8977d4f 100644
|
|
--- a/VEX/priv/guest_ppc_toIR.c
|
|
+++ b/VEX/priv/guest_ppc_toIR.c
|
|
@@ -1595,7 +1595,8 @@ typedef enum {
|
|
/* Generate an IR sequence to do a popcount operation on the supplied
|
|
IRTemp, and return a new IRTemp holding the result. 'ty' may be
|
|
Ity_I32 or Ity_I64 only. */
|
|
-static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_type )
|
|
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src,
|
|
+ _popcount_data_type data_type )
|
|
{
|
|
/* Do count across 2^data_type bits,
|
|
byte: data_type = 3
|
|
@@ -1611,6 +1612,22 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
|
|
|
|
vassert(ty == Ity_I64 || ty == Ity_I32);
|
|
|
|
+ // Use a single IROp in cases where we can.
|
|
+
|
|
+ if (ty == Ity_I64 && data_type == DWORD) {
|
|
+ IRTemp res = newTemp(Ity_I64);
|
|
+ assign(res, unop(Iop_PopCount64, mkexpr(src)));
|
|
+ return res;
|
|
+ }
|
|
+
|
|
+ if (ty == Ity_I32 && data_type == WORD) {
|
|
+ IRTemp res = newTemp(Ity_I32);
|
|
+ assign(res, unop(Iop_PopCount32, mkexpr(src)));
|
|
+ return res;
|
|
+ }
|
|
+
|
|
+ // For the rest, we have to do it the slow way.
|
|
+
|
|
if (ty == Ity_I32) {
|
|
|
|
for (idx = 0; idx < WORD; idx++) {
|
|
@@ -1638,7 +1655,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
|
|
return nyu;
|
|
}
|
|
|
|
-// else, ty == Ity_I64
|
|
+ // else, ty == Ity_I64
|
|
vassert(mode64);
|
|
|
|
for (i = 0; i < DWORD; i++) {
|
|
@@ -1670,52 +1687,15 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
|
|
*/
|
|
static IRTemp gen_vpopcntd_mode32 ( IRTemp src1, IRTemp src2 )
|
|
{
|
|
- Int i, shift[6];
|
|
- IRTemp mask[6];
|
|
- IRTemp old = IRTemp_INVALID;
|
|
- IRTemp nyu1 = IRTemp_INVALID;
|
|
- IRTemp nyu2 = IRTemp_INVALID;
|
|
IRTemp retval = newTemp(Ity_I64);
|
|
|
|
vassert(!mode64);
|
|
|
|
- for (i = 0; i < WORD; i++) {
|
|
- mask[i] = newTemp(Ity_I32);
|
|
- shift[i] = 1 << i;
|
|
- }
|
|
- assign(mask[0], mkU32(0x55555555));
|
|
- assign(mask[1], mkU32(0x33333333));
|
|
- assign(mask[2], mkU32(0x0F0F0F0F));
|
|
- assign(mask[3], mkU32(0x00FF00FF));
|
|
- assign(mask[4], mkU32(0x0000FFFF));
|
|
- old = src1;
|
|
- for (i = 0; i < WORD; i++) {
|
|
- nyu1 = newTemp(Ity_I32);
|
|
- assign(nyu1,
|
|
- binop(Iop_Add32,
|
|
- binop(Iop_And32,
|
|
- mkexpr(old),
|
|
- mkexpr(mask[i])),
|
|
- binop(Iop_And32,
|
|
- binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
|
|
- mkexpr(mask[i]))));
|
|
- old = nyu1;
|
|
- }
|
|
-
|
|
- old = src2;
|
|
- for (i = 0; i < WORD; i++) {
|
|
- nyu2 = newTemp(Ity_I32);
|
|
- assign(nyu2,
|
|
- binop(Iop_Add32,
|
|
- binop(Iop_And32,
|
|
- mkexpr(old),
|
|
- mkexpr(mask[i])),
|
|
- binop(Iop_And32,
|
|
- binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
|
|
- mkexpr(mask[i]))));
|
|
- old = nyu2;
|
|
- }
|
|
- assign(retval, unop(Iop_32Uto64, binop(Iop_Add32, mkexpr(nyu1), mkexpr(nyu2))));
|
|
+ assign(retval,
|
|
+ unop(Iop_32Uto64,
|
|
+ binop(Iop_Add32,
|
|
+ unop(Iop_PopCount32, mkexpr(src1)),
|
|
+ unop(Iop_PopCount32, mkexpr(src2)))));
|
|
return retval;
|
|
}
|
|
|
|
@@ -5715,7 +5695,7 @@ static Bool dis_modulo_int ( UInt theInstr )
|
|
rA_address, rS_address);
|
|
|
|
assign( rS, getIReg( rS_address ) );
|
|
- assign( result, unop( Iop_Ctz32,
|
|
+ assign( result, unop( Iop_CtzNat32,
|
|
unop( Iop_64to32, mkexpr( rS ) ) ) );
|
|
assign( rA, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( result ) ) );
|
|
|
|
@@ -5746,7 +5726,7 @@ static Bool dis_modulo_int ( UInt theInstr )
|
|
rA_address, rS_address);
|
|
|
|
assign( rS, getIReg( rS_address ) );
|
|
- assign( rA, unop( Iop_Ctz64, mkexpr( rS ) ) );
|
|
+ assign( rA, unop( Iop_CtzNat64, mkexpr( rS ) ) );
|
|
|
|
if ( flag_rC == 1 )
|
|
set_CR0( mkexpr( rA ) );
|
|
@@ -6307,7 +6287,6 @@ static Bool dis_int_logic ( UInt theInstr )
|
|
IRTemp rS = newTemp(ty);
|
|
IRTemp rA = newTemp(ty);
|
|
IRTemp rB = newTemp(ty);
|
|
- IRExpr* irx;
|
|
Bool do_rc = False;
|
|
|
|
assign( rS, getIReg(rS_addr) );
|
|
@@ -6404,26 +6383,16 @@ static Bool dis_int_logic ( UInt theInstr )
|
|
break;
|
|
|
|
case 0x01A: { // cntlzw (Count Leading Zeros Word, PPC32 p371)
|
|
- IRExpr* lo32;
|
|
if (rB_addr!=0) {
|
|
vex_printf("dis_int_logic(ppc)(cntlzw,rB_addr)\n");
|
|
return False;
|
|
}
|
|
- DIP("cntlzw%s r%u,r%u\n",
|
|
- flag_rC ? ".":"", rA_addr, rS_addr);
|
|
+ DIP("cntlzw%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
|
|
|
|
// mode64: count in low word only
|
|
- lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
|
|
-
|
|
- // Iop_Clz32 undefined for arg==0, so deal with that case:
|
|
- irx = binop(Iop_CmpNE32, lo32, mkU32(0));
|
|
- assign(rA, mkWidenFrom32(ty,
|
|
- IRExpr_ITE( irx,
|
|
- unop(Iop_Clz32, lo32),
|
|
- mkU32(32)),
|
|
- False));
|
|
-
|
|
- // TODO: alternatively: assign(rA, verbose_Clz32(rS));
|
|
+ IRExpr* lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
|
|
+ IRExpr* res32 = unop(Iop_ClzNat32, lo32);
|
|
+ assign(rA, mode64 ? unop(Iop_32Uto64, res32) : res32);
|
|
break;
|
|
}
|
|
|
|
@@ -6521,14 +6490,8 @@ static Bool dis_int_logic ( UInt theInstr )
|
|
vex_printf("dis_int_logic(ppc)(cntlzd,rB_addr)\n");
|
|
return False;
|
|
}
|
|
- DIP("cntlzd%s r%u,r%u\n",
|
|
- flag_rC ? ".":"", rA_addr, rS_addr);
|
|
- // Iop_Clz64 undefined for arg==0, so deal with that case:
|
|
- irx = binop(Iop_CmpNE64, mkexpr(rS), mkU64(0));
|
|
- assign(rA, IRExpr_ITE( irx,
|
|
- unop(Iop_Clz64, mkexpr(rS)),
|
|
- mkU64(64) ));
|
|
- // TODO: alternatively: assign(rA, verbose_Clz64(rS));
|
|
+ DIP("cntlzd%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
|
|
+ assign(rA, unop(Iop_ClzNat64, mkexpr(rS)));
|
|
break;
|
|
|
|
case 0x1FC: // cmpb (Power6: compare bytes)
|
|
@@ -6574,8 +6537,9 @@ static Bool dis_int_logic ( UInt theInstr )
|
|
putFReg( rS_addr, mkexpr(frA));
|
|
return True;
|
|
}
|
|
- case 0x1FA: // popcntd (population count doubleword
|
|
+ case 0x1FA: // popcntd (population count doubleword)
|
|
{
|
|
+ vassert(mode64);
|
|
DIP("popcntd r%u,r%u\n", rA_addr, rS_addr);
|
|
IRTemp result = gen_POPCOUNT(ty, rS, DWORD);
|
|
putIReg( rA_addr, mkexpr(result) );
|
|
@@ -9154,18 +9118,7 @@ static Bool dis_int_shift ( UInt theInstr )
|
|
static IRExpr* /* :: Ity_I32 */ gen_byterev32 ( IRTemp t )
|
|
{
|
|
vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32);
|
|
- return
|
|
- binop(Iop_Or32,
|
|
- binop(Iop_Shl32, mkexpr(t), mkU8(24)),
|
|
- binop(Iop_Or32,
|
|
- binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)),
|
|
- mkU32(0x00FF0000)),
|
|
- binop(Iop_Or32,
|
|
- binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)),
|
|
- mkU32(0x0000FF00)),
|
|
- binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(24)),
|
|
- mkU32(0x000000FF) )
|
|
- )));
|
|
+ return unop(Iop_Reverse8sIn32_x1, mkexpr(t));
|
|
}
|
|
|
|
/* Generates code to swap the byte order in the lower half of an Ity_I32,
|
|
@@ -9225,6 +9178,10 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
|
|
|
|
case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
|
|
{
|
|
+ // JRS FIXME:
|
|
+ // * is the host_endness conditional below actually necessary?
|
|
+ // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
|
|
+ // That would be a lot more efficient.
|
|
IRExpr * nextAddr;
|
|
IRTemp w3 = newTemp( Ity_I32 );
|
|
IRTemp w4 = newTemp( Ity_I32 );
|
|
@@ -17056,8 +17013,8 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
|
|
case 0x7C3: // vpopcntd
|
|
{
|
|
if (mode64) {
|
|
- /* Break vector into 64-bit double words and do the population count
|
|
- * on each double word.
|
|
+ /* Break vector into 64-bit double words and do the population
|
|
+ count on each double word.
|
|
*/
|
|
IRType ty = Ity_I64;
|
|
IRTemp bits0_63 = newTemp(Ity_I64);
|
|
@@ -17077,15 +17034,16 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
|
|
mkexpr( cnt_bits0_63 ) ) );
|
|
} else {
|
|
/* Break vector into 32-bit words and do the population count
|
|
- * on each doubleword.
|
|
+ on each 32-bit word.
|
|
*/
|
|
IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
|
|
bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
|
|
- IRTemp cnt_bits0_63 = newTemp(Ity_I64);
|
|
+ IRTemp cnt_bits0_63 = newTemp(Ity_I64);
|
|
IRTemp cnt_bits64_127 = newTemp(Ity_I64);
|
|
|
|
DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr);
|
|
- breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
|
|
+ breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95,
|
|
+ &bits32_63, &bits0_31 );
|
|
|
|
cnt_bits0_63 = gen_vpopcntd_mode32(bits0_31, bits32_63);
|
|
cnt_bits64_127 = gen_vpopcntd_mode32(bits64_95, bits96_127);
|
|
@@ -29103,10 +29061,12 @@ DisResult disInstr_PPC_WRK (
|
|
|
|
/* Miscellaneous ISA 2.06 instructions */
|
|
case 0x1FA: // popcntd
|
|
+ if (!mode64) goto decode_failure;
|
|
+ /* else fallthru */
|
|
case 0x17A: // popcntw
|
|
case 0x7A: // popcntb
|
|
- if (dis_int_logic( theInstr )) goto decode_success;
|
|
- goto decode_failure;
|
|
+ if (dis_int_logic( theInstr )) goto decode_success;
|
|
+ goto decode_failure;
|
|
|
|
case 0x0FC: // bpermd
|
|
if (!mode64) goto decode_failure;
|
|
@@ -29669,94 +29629,6 @@ DisResult disInstr_PPC ( IRSB* irsb_IN,
|
|
return dres;
|
|
}
|
|
|
|
-
|
|
-/*------------------------------------------------------------*/
|
|
-/*--- Unused stuff ---*/
|
|
-/*------------------------------------------------------------*/
|
|
-
|
|
-///* A potentially more memcheck-friendly implementation of Clz32, with
|
|
-// the boundary case Clz32(0) = 32, which is what ppc requires. */
|
|
-//
|
|
-//static IRExpr* /* :: Ity_I32 */ verbose_Clz32 ( IRTemp arg )
|
|
-//{
|
|
-// /* Welcome ... to SSA R Us. */
|
|
-// IRTemp n1 = newTemp(Ity_I32);
|
|
-// IRTemp n2 = newTemp(Ity_I32);
|
|
-// IRTemp n3 = newTemp(Ity_I32);
|
|
-// IRTemp n4 = newTemp(Ity_I32);
|
|
-// IRTemp n5 = newTemp(Ity_I32);
|
|
-// IRTemp n6 = newTemp(Ity_I32);
|
|
-// IRTemp n7 = newTemp(Ity_I32);
|
|
-// IRTemp n8 = newTemp(Ity_I32);
|
|
-// IRTemp n9 = newTemp(Ity_I32);
|
|
-// IRTemp n10 = newTemp(Ity_I32);
|
|
-// IRTemp n11 = newTemp(Ity_I32);
|
|
-// IRTemp n12 = newTemp(Ity_I32);
|
|
-//
|
|
-// /* First, propagate the most significant 1-bit into all lower
|
|
-// positions in the word. */
|
|
-// /* unsigned int clz ( unsigned int n )
|
|
-// {
|
|
-// n |= (n >> 1);
|
|
-// n |= (n >> 2);
|
|
-// n |= (n >> 4);
|
|
-// n |= (n >> 8);
|
|
-// n |= (n >> 16);
|
|
-// return bitcount(~n);
|
|
-// }
|
|
-// */
|
|
-// assign(n1, mkexpr(arg));
|
|
-// assign(n2, binop(Iop_Or32, mkexpr(n1), binop(Iop_Shr32, mkexpr(n1), mkU8(1))));
|
|
-// assign(n3, binop(Iop_Or32, mkexpr(n2), binop(Iop_Shr32, mkexpr(n2), mkU8(2))));
|
|
-// assign(n4, binop(Iop_Or32, mkexpr(n3), binop(Iop_Shr32, mkexpr(n3), mkU8(4))));
|
|
-// assign(n5, binop(Iop_Or32, mkexpr(n4), binop(Iop_Shr32, mkexpr(n4), mkU8(8))));
|
|
-// assign(n6, binop(Iop_Or32, mkexpr(n5), binop(Iop_Shr32, mkexpr(n5), mkU8(16))));
|
|
-// /* This gives a word of the form 0---01---1. Now invert it, giving
|
|
-// a word of the form 1---10---0, then do a population-count idiom
|
|
-// (to count the 1s, which is the number of leading zeroes, or 32
|
|
-// if the original word was 0. */
|
|
-// assign(n7, unop(Iop_Not32, mkexpr(n6)));
|
|
-//
|
|
-// /* unsigned int bitcount ( unsigned int n )
|
|
-// {
|
|
-// n = n - ((n >> 1) & 0x55555555);
|
|
-// n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
|
|
-// n = (n + (n >> 4)) & 0x0F0F0F0F;
|
|
-// n = n + (n >> 8);
|
|
-// n = (n + (n >> 16)) & 0x3F;
|
|
-// return n;
|
|
-// }
|
|
-// */
|
|
-// assign(n8,
|
|
-// binop(Iop_Sub32,
|
|
-// mkexpr(n7),
|
|
-// binop(Iop_And32,
|
|
-// binop(Iop_Shr32, mkexpr(n7), mkU8(1)),
|
|
-// mkU32(0x55555555))));
|
|
-// assign(n9,
|
|
-// binop(Iop_Add32,
|
|
-// binop(Iop_And32, mkexpr(n8), mkU32(0x33333333)),
|
|
-// binop(Iop_And32,
|
|
-// binop(Iop_Shr32, mkexpr(n8), mkU8(2)),
|
|
-// mkU32(0x33333333))));
|
|
-// assign(n10,
|
|
-// binop(Iop_And32,
|
|
-// binop(Iop_Add32,
|
|
-// mkexpr(n9),
|
|
-// binop(Iop_Shr32, mkexpr(n9), mkU8(4))),
|
|
-// mkU32(0x0F0F0F0F)));
|
|
-// assign(n11,
|
|
-// binop(Iop_Add32,
|
|
-// mkexpr(n10),
|
|
-// binop(Iop_Shr32, mkexpr(n10), mkU8(8))));
|
|
-// assign(n12,
|
|
-// binop(Iop_Add32,
|
|
-// mkexpr(n11),
|
|
-// binop(Iop_Shr32, mkexpr(n11), mkU8(16))));
|
|
-// return
|
|
-// binop(Iop_And32, mkexpr(n12), mkU32(0x3F));
|
|
-//}
|
|
-
|
|
/*--------------------------------------------------------------------*/
|
|
/*--- end guest_ppc_toIR.c ---*/
|
|
/*--------------------------------------------------------------------*/
|