valgrind/valgrind-3.14.0-ppc64-ldbrx.patch

commit 7bdd6731f8337fd57bf91772aa1917e44239d7c2
Author: Mark Wielaard <mark@klomp.org>
Date:   Fri Dec 7 10:42:22 2018 -0500

    Implement ppc64 ldbrx as 64-bit load and Iop_Reverse8sIn64_x1.
    
    This makes it possible for memcheck to analyse the new gcc strcmp
    inlined code correctly even if the ldbrx load is partly beyond an
    addressable block.
    
    Partially resolves bug 386945.

diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
index 8977d4f..a81dace 100644
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -9178,24 +9178,28 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
 
       case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
       {
-         // JRS FIXME:
-         // * is the host_endness conditional below actually necessary?
-         // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
-         //   That would be a lot more efficient.
-         IRExpr * nextAddr;
-         IRTemp w3 = newTemp( Ity_I32 );
-         IRTemp w4 = newTemp( Ity_I32 );
-         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
-         assign( w1, load( Ity_I32, mkexpr( EA ) ) );
-         assign( w2, gen_byterev32( w1 ) );
-         nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
-                           ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );
-         assign( w3, load( Ity_I32, nextAddr ) );
-         assign( w4, gen_byterev32( w3 ) );
-         if (host_endness == VexEndnessLE)
-            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w2 ), mkexpr( w4 ) ) );
+         /* Caller makes sure we are only called in mode64. */
+
+         /* If we supported swapping LE/BE loads in the backend then we could
+            just load the value with the bytes reversed by doing a BE load
+            on an LE machine and a LE load on a BE machine.
+
+         IRTemp dw1 = newTemp(Ity_I64);
+         if (host_endness == VexEndnessBE)
+            assign( dw1, IRExpr_Load(Iend_LE, Ity_I64, mkexpr(EA)));
          else
-            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
+            assign( dw1, IRExpr_Load(Iend_BE, Ity_I64, mkexpr(EA)));
+         putIReg( rD_addr, mkexpr(dw1) );
+
+         But since we currently don't we load the value as is and then
+         switch it around with Iop_Reverse8sIn64_x1. */
+
+         IRTemp dw1 = newTemp(Ity_I64);
+         IRTemp dw2 = newTemp(Ity_I64);
+         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+         assign( dw1, load(Ity_I64, mkexpr(EA)) );
+         assign( dw2, unop(Iop_Reverse8sIn64_x1, mkexpr(dw1)) );
+         putIReg( rD_addr, mkexpr(dw2) );
          break;
       }
 
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
index 750cf8d..4fc3eb5 100644
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -2210,6 +2210,63 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
          return rr;
       }
 
+      case Iop_Reverse8sIn64_x1: {
+	 /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
+            Can only be used in 64bit mode.  */
+         vassert (mode64);
+
+         HReg r_src  = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+         HReg rr     = newVRegI(env);
+         HReg rMask  = newVRegI(env);
+         HReg rnMask = newVRegI(env);
+         HReg rtHi   = newVRegI(env);
+         HReg rtLo   = newVRegI(env);
+
+         // Copy r_src since we need to modify it
+         addInstr(env, mk_iMOVds_RR(rr, r_src));
+
+         // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
+         addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
+                                   True/* 64bit imm*/));
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
+                                     rtHi, rtHi,
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
+                                     rtLo, rtLo,
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
+
+         // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
+         addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
+                                   True/* !64bit imm*/));
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
+                                     rtHi, rtHi,
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
+                                     rtLo, rtLo,
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
+
+         // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
+         /* We don't need to mask anymore, just two more shifts and an or.  */
+         addInstr(env, mk_iMOVds_RR(rtLo, rr));
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
+                                     rtLo, rtLo,
+                                     PPCRH_Imm(False/*!signed imm*/, 32)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
+                                     rr, rr,
+                                     PPCRH_Imm(False/*!signed imm*/, 32)));
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
+
+         return rr;
+      }
+
       case Iop_Left8:
       case Iop_Left16:
       case Iop_Left32:
Add valgrind-3.14.0-ppc64-ldbrx.patch 2018-12-12 22:05:23 +00:00			`commit 7bdd6731f8337fd57bf91772aa1917e44239d7c2`
			`Author: Mark Wielaard <mark@klomp.org>`
			`Date: Fri Dec 7 10:42:22 2018 -0500`

			`Implement ppc64 ldbrx as 64-bit load and Iop_Reverse8sIn64_x1.`

			`This makes it possible for memcheck to analyse the new gcc strcmp`
			`inlined code correctly even if the ldbrx load is partly beyond an`
			`addressable block.`

			`Partially resolves bug 386945.`

			`diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c`
			`index 8977d4f..a81dace 100644`
			`--- a/VEX/priv/guest_ppc_toIR.c`
			`+++ b/VEX/priv/guest_ppc_toIR.c`
			`@@ -9178,24 +9178,28 @@ static Bool dis_int_ldst_rev ( UInt theInstr )`

			`case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)`
			`{`
			`- // JRS FIXME:`
			`- // * is the host_endness conditional below actually necessary?`
			`- // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?`
			`- // That would be a lot more efficient.`
			`- IRExpr * nextAddr;`
			`- IRTemp w3 = newTemp( Ity_I32 );`
			`- IRTemp w4 = newTemp( Ity_I32 );`
			`- DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);`
			`- assign( w1, load( Ity_I32, mkexpr( EA ) ) );`
			`- assign( w2, gen_byterev32( w1 ) );`
			`- nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),`
			`- ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );`
			`- assign( w3, load( Ity_I32, nextAddr ) );`
			`- assign( w4, gen_byterev32( w3 ) );`
			`- if (host_endness == VexEndnessLE)`
			`- putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w2 ), mkexpr( w4 ) ) );`
			`+ /* Caller makes sure we are only called in mode64. */`
			`+`
			`+ /* If we supported swapping LE/BE loads in the backend then we could`
			`+ just load the value with the bytes reversed by doing a BE load`
			`+ on an LE machine and a LE load on a BE machine.`
			`+`
			`+ IRTemp dw1 = newTemp(Ity_I64);`
			`+ if (host_endness == VexEndnessBE)`
			`+ assign( dw1, IRExpr_Load(Iend_LE, Ity_I64, mkexpr(EA)));`
			`else`
			`- putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );`
			`+ assign( dw1, IRExpr_Load(Iend_BE, Ity_I64, mkexpr(EA)));`
			`+ putIReg( rD_addr, mkexpr(dw1) );`
			`+`
			`+ But since we currently don't we load the value as is and then`
			`+ switch it around with Iop_Reverse8sIn64_x1. */`
			`+`
			`+ IRTemp dw1 = newTemp(Ity_I64);`
			`+ IRTemp dw2 = newTemp(Ity_I64);`
			`+ DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);`
			`+ assign( dw1, load(Ity_I64, mkexpr(EA)) );`
			`+ assign( dw2, unop(Iop_Reverse8sIn64_x1, mkexpr(dw1)) );`
			`+ putIReg( rD_addr, mkexpr(dw2) );`
			`break;`
			`}`

			`diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c`
			`index 750cf8d..4fc3eb5 100644`
			`--- a/VEX/priv/host_ppc_isel.c`
			`+++ b/VEX/priv/host_ppc_isel.c`
			`@@ -2210,6 +2210,63 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,`
			`return rr;`
			`}`

			`+ case Iop_Reverse8sIn64_x1: {`
			`+ /* See Iop_Reverse8sIn32_x1, but extended to 64bit.`
			`+ Can only be used in 64bit mode. */`
			`+ vassert (mode64);`
			`+`
			`+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);`
			`+ HReg rr = newVRegI(env);`
			`+ HReg rMask = newVRegI(env);`
			`+ HReg rnMask = newVRegI(env);`
			`+ HReg rtHi = newVRegI(env);`
			`+ HReg rtLo = newVRegI(env);`
			`+`
			`+ // Copy r_src since we need to modify it`
			`+ addInstr(env, mk_iMOVds_RR(rr, r_src));`
			`+`
			`+ // r = (r & 0x00FF00FF00FF00FF) << 8 \| (r & 0xFF00FF00FF00FF00) >> 8`
			`+ addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,`
			`+ True/* 64bit imm*/));`
			`+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));`
			`+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));`
			`+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/64 bit shift/,`
			`+ rtHi, rtHi,`
			`+ PPCRH_Imm(False/!signed imm/, 8)));`
			`+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));`
			`+ addInstr(env, PPCInstr_Shft(Pshft_SHR, False/64 bit shift/,`
			`+ rtLo, rtLo,`
			`+ PPCRH_Imm(False/!signed imm/, 8)));`
			`+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));`
			`+`
			`+ // r = (r & 0x0000FFFF0000FFFF) << 16 \| (r & 0xFFFF0000FFFF0000) >> 16`
			`+ addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,`
			`+ True/* !64bit imm*/));`
			`+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));`
			`+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));`
			`+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/64 bit shift/,`
			`+ rtHi, rtHi,`
			`+ PPCRH_Imm(False/!signed imm/, 16)));`
			`+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));`
			`+ addInstr(env, PPCInstr_Shft(Pshft_SHR, False/64 bit shift/,`
			`+ rtLo, rtLo,`
			`+ PPCRH_Imm(False/!signed imm/, 16)));`
			`+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));`
			`+`
			`+ // r = (r & 0x00000000FFFFFFFF) << 32 \| (r & 0xFFFFFFFF00000000) >> 32`
			`+ /* We don't need to mask anymore, just two more shifts and an or. */`
			`+ addInstr(env, mk_iMOVds_RR(rtLo, rr));`
			`+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/64 bit shift/,`
			`+ rtLo, rtLo,`
			`+ PPCRH_Imm(False/!signed imm/, 32)));`
			`+ addInstr(env, PPCInstr_Shft(Pshft_SHR, False/64 bit shift/,`
			`+ rr, rr,`
			`+ PPCRH_Imm(False/!signed imm/, 32)));`
			`+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));`
			`+`
			`+ return rr;`
			`+ }`
			`+`
			`case Iop_Left8:`
			`case Iop_Left16:`
			`case Iop_Left32:`