Add valgrind-3.14.0-set_AV_CR6.patch
This commit is contained in:
parent
22343e31a3
commit
1b2d1f45f3
145
valgrind-3.14.0-set_AV_CR6.patch
Normal file
145
valgrind-3.14.0-set_AV_CR6.patch
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
commit dc1523fb3550b4ed9dd4c178741626daaa474da7
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Mon Dec 10 17:18:20 2018 +0100
|
||||||
|
|
||||||
|
PR386945 set_AV_CR6 patch
|
||||||
|
|
||||||
|
https://bugs.kde.org/show_bug.cgi?id=386945#c62
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
index ec2f90a..c3cc6d0 100644
|
||||||
|
--- a/VEX/priv/guest_ppc_toIR.c
|
||||||
|
+++ b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
@@ -2062,45 +2062,88 @@ static void set_CR0 ( IRExpr* result )
|
||||||
|
static void set_AV_CR6 ( IRExpr* result, Bool test_all_ones )
|
||||||
|
{
|
||||||
|
/* CR6[0:3] = {all_ones, 0, all_zeros, 0}
|
||||||
|
- all_ones = (v[0] && v[1] && v[2] && v[3])
|
||||||
|
- all_zeros = ~(v[0] || v[1] || v[2] || v[3])
|
||||||
|
+ 32 bit: all_zeros = (v[0] || v[1] || v[2] || v[3]) == 0x0000'0000
|
||||||
|
+ all_ones = ~(v[0] && v[1] && v[2] && v[3]) == 0x0000'0000
|
||||||
|
+ where v[] denotes 32-bit lanes
|
||||||
|
+ or
|
||||||
|
+ 64 bit: all_zeros = (v[0] || v[1]) == 0x0000'0000'0000'0000
|
||||||
|
+ all_ones = ~(v[0] && v[1]) == 0x0000'0000'0000'0000
|
||||||
|
+ where v[] denotes 64-bit lanes
|
||||||
|
+
|
||||||
|
+ The 32- and 64-bit versions compute the same thing, but the 64-bit one
|
||||||
|
+ tries to be a bit more efficient.
|
||||||
|
*/
|
||||||
|
- IRTemp v0 = newTemp(Ity_V128);
|
||||||
|
- IRTemp v1 = newTemp(Ity_V128);
|
||||||
|
- IRTemp v2 = newTemp(Ity_V128);
|
||||||
|
- IRTemp v3 = newTemp(Ity_V128);
|
||||||
|
- IRTemp rOnes = newTemp(Ity_I8);
|
||||||
|
- IRTemp rZeros = newTemp(Ity_I8);
|
||||||
|
-
|
||||||
|
vassert(typeOfIRExpr(irsb->tyenv,result) == Ity_V128);
|
||||||
|
|
||||||
|
- assign( v0, result );
|
||||||
|
- assign( v1, binop(Iop_ShrV128, result, mkU8(32)) );
|
||||||
|
- assign( v2, binop(Iop_ShrV128, result, mkU8(64)) );
|
||||||
|
- assign( v3, binop(Iop_ShrV128, result, mkU8(96)) );
|
||||||
|
+ IRTemp overlappedOred = newTemp(Ity_V128);
|
||||||
|
+ IRTemp overlappedAnded = newTemp(Ity_V128);
|
||||||
|
+
|
||||||
|
+ if (mode64) {
|
||||||
|
+ IRTemp v0 = newTemp(Ity_V128);
|
||||||
|
+ IRTemp v1 = newTemp(Ity_V128);
|
||||||
|
+ assign( v0, result );
|
||||||
|
+ assign( v1, binop(Iop_ShrV128, result, mkU8(64)) );
|
||||||
|
+ assign(overlappedOred,
|
||||||
|
+ binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)));
|
||||||
|
+ assign(overlappedAnded,
|
||||||
|
+ binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)));
|
||||||
|
+ } else {
|
||||||
|
+ IRTemp v0 = newTemp(Ity_V128);
|
||||||
|
+ IRTemp v1 = newTemp(Ity_V128);
|
||||||
|
+ IRTemp v2 = newTemp(Ity_V128);
|
||||||
|
+ IRTemp v3 = newTemp(Ity_V128);
|
||||||
|
+ assign( v0, result );
|
||||||
|
+ assign( v1, binop(Iop_ShrV128, result, mkU8(32)) );
|
||||||
|
+ assign( v2, binop(Iop_ShrV128, result, mkU8(64)) );
|
||||||
|
+ assign( v3, binop(Iop_ShrV128, result, mkU8(96)) );
|
||||||
|
+ assign(overlappedOred,
|
||||||
|
+ binop(Iop_OrV128,
|
||||||
|
+ binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)),
|
||||||
|
+ binop(Iop_OrV128, mkexpr(v2), mkexpr(v3))));
|
||||||
|
+ assign(overlappedAnded,
|
||||||
|
+ binop(Iop_AndV128,
|
||||||
|
+ binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
|
||||||
|
+ binop(Iop_AndV128, mkexpr(v2), mkexpr(v3))));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ IRTemp rOnes = newTemp(Ity_I8);
|
||||||
|
+ IRTemp rZeroes = newTemp(Ity_I8);
|
||||||
|
|
||||||
|
- assign( rZeros, unop(Iop_1Uto8,
|
||||||
|
- binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
|
||||||
|
- unop(Iop_Not32,
|
||||||
|
- unop(Iop_V128to32,
|
||||||
|
- binop(Iop_OrV128,
|
||||||
|
- binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)),
|
||||||
|
- binop(Iop_OrV128, mkexpr(v2), mkexpr(v3))))
|
||||||
|
- ))) );
|
||||||
|
+ if (mode64) {
|
||||||
|
+ assign(rZeroes,
|
||||||
|
+ unop(Iop_1Uto8,
|
||||||
|
+ binop(Iop_CmpEQ64,
|
||||||
|
+ mkU64(0),
|
||||||
|
+ unop(Iop_V128to64, mkexpr(overlappedOred)))));
|
||||||
|
+ assign(rOnes,
|
||||||
|
+ unop(Iop_1Uto8,
|
||||||
|
+ binop(Iop_CmpEQ64,
|
||||||
|
+ mkU64(0),
|
||||||
|
+ unop(Iop_Not64,
|
||||||
|
+ unop(Iop_V128to64, mkexpr(overlappedAnded))))));
|
||||||
|
+ } else {
|
||||||
|
+ assign(rZeroes,
|
||||||
|
+ unop(Iop_1Uto8,
|
||||||
|
+ binop(Iop_CmpEQ32,
|
||||||
|
+ mkU32(0),
|
||||||
|
+ unop(Iop_V128to32, mkexpr(overlappedOred)))));
|
||||||
|
+ assign(rOnes,
|
||||||
|
+ unop(Iop_1Uto8,
|
||||||
|
+ binop(Iop_CmpEQ32,
|
||||||
|
+ mkU32(0),
|
||||||
|
+ unop(Iop_Not32,
|
||||||
|
+ unop(Iop_V128to32, mkexpr(overlappedAnded))))));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // rOnes might not be used below. But iropt will remove it, so there's no
|
||||||
|
+ // inefficiency as a result.
|
||||||
|
|
||||||
|
if (test_all_ones) {
|
||||||
|
- assign( rOnes, unop(Iop_1Uto8,
|
||||||
|
- binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
|
||||||
|
- unop(Iop_V128to32,
|
||||||
|
- binop(Iop_AndV128,
|
||||||
|
- binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
|
||||||
|
- binop(Iop_AndV128, mkexpr(v2), mkexpr(v3)))
|
||||||
|
- ))) );
|
||||||
|
putCR321( 6, binop(Iop_Or8,
|
||||||
|
binop(Iop_Shl8, mkexpr(rOnes), mkU8(3)),
|
||||||
|
- binop(Iop_Shl8, mkexpr(rZeros), mkU8(1))) );
|
||||||
|
+ binop(Iop_Shl8, mkexpr(rZeroes), mkU8(1))) );
|
||||||
|
} else {
|
||||||
|
- putCR321( 6, binop(Iop_Shl8, mkexpr(rZeros), mkU8(1)) );
|
||||||
|
+ putCR321( 6, binop(Iop_Shl8, mkexpr(rZeroes), mkU8(1)) );
|
||||||
|
}
|
||||||
|
putCR0( 6, mkU8(0) );
|
||||||
|
}
|
||||||
|
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
|
||||||
|
index c24db91..7f69ee3 100644
|
||||||
|
--- a/memcheck/mc_translate.c
|
||||||
|
+++ b/memcheck/mc_translate.c
|
||||||
|
@@ -8322,6 +8322,9 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure,
|
||||||
|
# elif defined(VGA_amd64)
|
||||||
|
mce.dlbo.dl_Add64 = DLauto;
|
||||||
|
mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive;
|
||||||
|
+# elif defined(VGA_ppc64le)
|
||||||
|
+ // Needed by (at least) set_AV_CR6() in the front end.
|
||||||
|
+ mce.dlbo.dl_CmpEQ64_CmpNE64 = DLexpensive;
|
||||||
|
# endif
|
||||||
|
|
||||||
|
/* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then
|
@ -142,6 +142,7 @@ Patch20: valgrind-3.14.0-ppc64-unaligned-words.patch
|
|||||||
Patch21: valgrind-3.14.0-ppc64-lxvd2x.patch
|
Patch21: valgrind-3.14.0-ppc64-lxvd2x.patch
|
||||||
Patch22: valgrind-3.14.0-ppc64-unaligned-vecs.patch
|
Patch22: valgrind-3.14.0-ppc64-unaligned-vecs.patch
|
||||||
Patch23: valgrind-3.14.0-ppc64-lxvb16x.patch
|
Patch23: valgrind-3.14.0-ppc64-lxvb16x.patch
|
||||||
|
Patch24: valgrind-3.14.0-set_AV_CR6.patch
|
||||||
|
|
||||||
%if %{build_multilib}
|
%if %{build_multilib}
|
||||||
# Ensure glibc{,-devel} is installed for both multilib arches
|
# Ensure glibc{,-devel} is installed for both multilib arches
|
||||||
@ -298,6 +299,7 @@ Valgrind User Manual for details.
|
|||||||
%patch21 -p1
|
%patch21 -p1
|
||||||
%patch22 -p1
|
%patch22 -p1
|
||||||
%patch23 -p1
|
%patch23 -p1
|
||||||
|
%patch24 -p1
|
||||||
|
|
||||||
%build
|
%build
|
||||||
CC=gcc
|
CC=gcc
|
||||||
@ -539,6 +541,7 @@ fi
|
|||||||
- Add valgrind-3.14.0-ppc64-lxvd2x.patch
|
- Add valgrind-3.14.0-ppc64-lxvd2x.patch
|
||||||
- Add valgrind-3.14.0-ppc64-unaligned-vecs.patch
|
- Add valgrind-3.14.0-ppc64-unaligned-vecs.patch
|
||||||
- Add valgrind-3.14.0-ppc64-lxvb16x.patch
|
- Add valgrind-3.14.0-ppc64-lxvb16x.patch
|
||||||
|
- Add valgrind-3.14.0-set_AV_CR6.patch
|
||||||
|
|
||||||
* Sat Dec 1 2018 Mark Wielaard <mjw@fedoraproject.org> - 3.14.0.5
|
* Sat Dec 1 2018 Mark Wielaard <mjw@fedoraproject.org> - 3.14.0.5
|
||||||
- Add valgrind-3.14.0-wcsncmp.patch (#1645971)
|
- Add valgrind-3.14.0-wcsncmp.patch (#1645971)
|
||||||
|
Loading…
Reference in New Issue
Block a user