import valgrind-3.14.0-10.el8_0
This commit is contained in:
commit
6ee00ced0a
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
SOURCES/valgrind-3.14.0.tar.bz2
|
1
.valgrind.metadata
Normal file
1
.valgrind.metadata
Normal file
@ -0,0 +1 @@
|
|||||||
|
182afd405b92ddb6f52c6729e848eacf4b1daf46 SOURCES/valgrind-3.14.0.tar.bz2
|
24
SOURCES/valgrind-3.14.0-arm64-ptrace-traceme.patch
Normal file
24
SOURCES/valgrind-3.14.0-arm64-ptrace-traceme.patch
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
commit 43fe4bc236d667257eeebfb4f6bcbe2b92aea455
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Fri Dec 14 14:32:27 2018 +0100
|
||||||
|
|
||||||
|
arm64: Fix PTRACE_TRACEME memcheck/tests/linux/getregset.vgtest testcase.
|
||||||
|
|
||||||
|
The sys_ptrace post didn't mark the thread as being in traceme mode.
|
||||||
|
This occassionally would make the memcheck/tests/linux/getregset.vgtest
|
||||||
|
testcase fail. With this patch it reliably passes.
|
||||||
|
|
||||||
|
diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c
|
||||||
|
index 9ef54b4..650f5b9 100644
|
||||||
|
--- a/coregrind/m_syswrap/syswrap-arm64-linux.c
|
||||||
|
+++ b/coregrind/m_syswrap/syswrap-arm64-linux.c
|
||||||
|
@@ -499,6 +499,9 @@ PRE(sys_ptrace)
|
||||||
|
POST(sys_ptrace)
|
||||||
|
{
|
||||||
|
switch (ARG1) {
|
||||||
|
+ case VKI_PTRACE_TRACEME:
|
||||||
|
+ ML_(linux_POST_traceme)(tid);
|
||||||
|
+ break;
|
||||||
|
case VKI_PTRACE_PEEKTEXT:
|
||||||
|
case VKI_PTRACE_PEEKDATA:
|
||||||
|
case VKI_PTRACE_PEEKUSR:
|
18
SOURCES/valgrind-3.14.0-enable-ppc-Iop_Sar_Shr8.patch
Normal file
18
SOURCES/valgrind-3.14.0-enable-ppc-Iop_Sar_Shr8.patch
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
commit 27fe22378da38424102c5292b782cacdd9d7b9e4
|
||||||
|
Author: Julian Seward <jseward@acm.org>
|
||||||
|
Date: Tue Nov 20 12:09:03 2018 +0100
|
||||||
|
|
||||||
|
Add support for Iop_{Sar,Shr}8 on ppc. --expensive-definedness-checks=yes needs them.
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
|
||||||
|
index 5242176..750cf8d 100644
|
||||||
|
--- a/VEX/priv/host_ppc_isel.c
|
||||||
|
+++ b/VEX/priv/host_ppc_isel.c
|
||||||
|
@@ -1528,7 +1528,6 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
|
||||||
|
True/*32bit shift*/,
|
||||||
|
tmp, tmp, amt));
|
||||||
|
r_srcL = tmp;
|
||||||
|
- vassert(0); /* AWAITING TEST CASE */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Only 64 expressions need 64bit shifts,
|
59
SOURCES/valgrind-3.14.0-final_tidyup.patch
Normal file
59
SOURCES/valgrind-3.14.0-final_tidyup.patch
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
commit be7a73004583aab5d4c97cf55276ca58d5b3090b
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Wed Dec 12 14:15:28 2018 +0100
|
||||||
|
|
||||||
|
Mark helper regs defined in final_tidyup before freeres_wrapper call.
|
||||||
|
|
||||||
|
In final_tidyup we setup the guest to call the freeres_wrapper, which
|
||||||
|
will (possibly) call __gnu_cxx::__freeres() and/or __libc_freeres().
|
||||||
|
|
||||||
|
In a couple of cases (ppc64be, ppc64le and mips32) this involves setting
|
||||||
|
up one or more helper registers. Since we setup these guest registers
|
||||||
|
we should make sure to mark them as fully defined. Otherwise we might
|
||||||
|
see spurious warnings about undefined value usage if the guest register
|
||||||
|
happened to not be fully defined before.
|
||||||
|
|
||||||
|
This fixes PR402006.
|
||||||
|
|
||||||
|
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
|
||||||
|
index 00702fc..22872a2 100644
|
||||||
|
--- a/coregrind/m_main.c
|
||||||
|
+++ b/coregrind/m_main.c
|
||||||
|
@@ -2304,22 +2304,35 @@ static void final_tidyup(ThreadId tid)
|
||||||
|
"Caught __NR_exit; running %s wrapper\n", msgs[to_run - 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
- /* set thread context to point to freeres_wrapper */
|
||||||
|
- /* ppc64be-linux note: freeres_wrapper gives us the real
|
||||||
|
+ /* Set thread context to point to freeres_wrapper.
|
||||||
|
+ ppc64be-linux note: freeres_wrapper gives us the real
|
||||||
|
function entry point, not a fn descriptor, so can use it
|
||||||
|
directly. However, we need to set R2 (the toc pointer)
|
||||||
|
appropriately. */
|
||||||
|
VG_(set_IP)(tid, freeres_wrapper);
|
||||||
|
+
|
||||||
|
# if defined(VGP_ppc64be_linux)
|
||||||
|
VG_(threads)[tid].arch.vex.guest_GPR2 = r2;
|
||||||
|
+ VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
|
||||||
|
+ offsetof(VexGuestPPC64State, guest_GPR2),
|
||||||
|
+ sizeof(VG_(threads)[tid].arch.vex.guest_GPR2));
|
||||||
|
# elif defined(VGP_ppc64le_linux)
|
||||||
|
/* setting GPR2 but not really needed, GPR12 is needed */
|
||||||
|
VG_(threads)[tid].arch.vex.guest_GPR2 = freeres_wrapper;
|
||||||
|
+ VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
|
||||||
|
+ offsetof(VexGuestPPC64State, guest_GPR2),
|
||||||
|
+ sizeof(VG_(threads)[tid].arch.vex.guest_GPR2));
|
||||||
|
VG_(threads)[tid].arch.vex.guest_GPR12 = freeres_wrapper;
|
||||||
|
+ VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
|
||||||
|
+ offsetof(VexGuestPPC64State, guest_GPR12),
|
||||||
|
+ sizeof(VG_(threads)[tid].arch.vex.guest_GPR12));
|
||||||
|
# endif
|
||||||
|
/* mips-linux note: we need to set t9 */
|
||||||
|
# if defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
|
||||||
|
VG_(threads)[tid].arch.vex.guest_r25 = freeres_wrapper;
|
||||||
|
+ VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
|
||||||
|
+ offsetof(VexGuestMIPS32State, guest_r25),
|
||||||
|
+ sizeof(VG_(threads)[tid].arch.vex.guest_r25));
|
||||||
|
# endif
|
||||||
|
|
||||||
|
/* Pass a parameter to freeres_wrapper(). */
|
@ -0,0 +1,81 @@
|
|||||||
|
commit 7f1dd9d5aec1f1fd4eb0ae3a311358a914f1d73f
|
||||||
|
Author: Julian Seward <jseward@acm.org>
|
||||||
|
Date: Tue Nov 20 10:18:29 2018 +0100
|
||||||
|
|
||||||
|
get_otrack_shadow_offset_wrk for ppc32 and ppc64: add missing cases for XER_OV32, XER_CA32 and C_FPCC.
|
||||||
|
|
||||||
|
The missing cases were discovered whilst testing fixes for bug 386945, but are
|
||||||
|
otherwise unrelated to that bug.
|
||||||
|
|
||||||
|
diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c
|
||||||
|
index 5ed101f..4ce746e 100644
|
||||||
|
--- a/memcheck/mc_machine.c
|
||||||
|
+++ b/memcheck/mc_machine.c
|
||||||
|
@@ -120,11 +120,11 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
|
||||||
|
Int o = offset;
|
||||||
|
tl_assert(sz > 0);
|
||||||
|
|
||||||
|
-#if defined(VGA_ppc64be)
|
||||||
|
+# if defined(VGA_ppc64be)
|
||||||
|
tl_assert(host_is_big_endian());
|
||||||
|
-#elif defined(VGA_ppc64le)
|
||||||
|
+# elif defined(VGA_ppc64le)
|
||||||
|
tl_assert(host_is_little_endian());
|
||||||
|
-#endif
|
||||||
|
+# endif
|
||||||
|
|
||||||
|
if (sz == 8 || sz == 4) {
|
||||||
|
/* The point of this is to achieve
|
||||||
|
@@ -132,11 +132,11 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
|
||||||
|
return GOF(GPRn);
|
||||||
|
by testing ox instead of o, and setting ox back 4 bytes when sz == 4.
|
||||||
|
*/
|
||||||
|
-#if defined(VGA_ppc64le)
|
||||||
|
+# if defined(VGA_ppc64le)
|
||||||
|
Int ox = o;
|
||||||
|
-#else
|
||||||
|
+# else
|
||||||
|
Int ox = sz == 8 ? o : (o - 4);
|
||||||
|
-#endif
|
||||||
|
+# endif
|
||||||
|
if (ox == GOF(GPR0)) return ox;
|
||||||
|
if (ox == GOF(GPR1)) return ox;
|
||||||
|
if (ox == GOF(GPR2)) return ox;
|
||||||
|
@@ -240,11 +240,13 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
|
||||||
|
if (o == GOF(VSR31) && sz == 8) return o;
|
||||||
|
|
||||||
|
/* For the various byte sized XER/CR pieces, use offset 8
|
||||||
|
- in VSR0 .. VSR19. */
|
||||||
|
+ in VSR0 .. VSR21. */
|
||||||
|
tl_assert(SZB(VSR0) == 16);
|
||||||
|
if (o == GOF(XER_SO) && sz == 1) return 8 +GOF(VSR0);
|
||||||
|
if (o == GOF(XER_OV) && sz == 1) return 8 +GOF(VSR1);
|
||||||
|
+ if (o == GOF(XER_OV32) && sz == 1) return 8 +GOF(VSR20);
|
||||||
|
if (o == GOF(XER_CA) && sz == 1) return 8 +GOF(VSR2);
|
||||||
|
+ if (o == GOF(XER_CA32) && sz == 1) return 8 +GOF(VSR21);
|
||||||
|
if (o == GOF(XER_BC) && sz == 1) return 8 +GOF(VSR3);
|
||||||
|
|
||||||
|
if (o == GOF(CR0_321) && sz == 1) return 8 +GOF(VSR4);
|
||||||
|
@@ -388,6 +390,7 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
|
||||||
|
if (o == GOF(IP_AT_SYSCALL) && sz == 4) return -1; /* slot unused */
|
||||||
|
if (o == GOF(FPROUND) && sz == 1) return -1;
|
||||||
|
if (o == GOF(DFPROUND) && sz == 1) return -1;
|
||||||
|
+ if (o == GOF(C_FPCC) && sz == 1) return -1;
|
||||||
|
if (o == GOF(VRSAVE) && sz == 4) return -1;
|
||||||
|
if (o == GOF(EMNOTE) && sz == 4) return -1;
|
||||||
|
if (o == GOF(CMSTART) && sz == 4) return -1;
|
||||||
|
@@ -440,11 +443,13 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
|
||||||
|
if (o == GOF(VSR31) && sz == 8) return o;
|
||||||
|
|
||||||
|
/* For the various byte sized XER/CR pieces, use offset 8
|
||||||
|
- in VSR0 .. VSR19. */
|
||||||
|
+ in VSR0 .. VSR21. */
|
||||||
|
tl_assert(SZB(VSR0) == 16);
|
||||||
|
if (o == GOF(XER_SO) && sz == 1) return 8 +GOF(VSR0);
|
||||||
|
if (o == GOF(XER_OV) && sz == 1) return 8 +GOF(VSR1);
|
||||||
|
+ if (o == GOF(XER_OV32) && sz == 1) return 8 +GOF(VSR20);
|
||||||
|
if (o == GOF(XER_CA) && sz == 1) return 8 +GOF(VSR2);
|
||||||
|
+ if (o == GOF(XER_CA32) && sz == 1) return 8 +GOF(VSR21);
|
||||||
|
if (o == GOF(XER_BC) && sz == 1) return 8 +GOF(VSR3);
|
||||||
|
|
||||||
|
if (o == GOF(CR0_321) && sz == 1) return 8 +GOF(VSR4);
|
654
SOURCES/valgrind-3.14.0-jm-vmx-constraints.patch
Normal file
654
SOURCES/valgrind-3.14.0-jm-vmx-constraints.patch
Normal file
@ -0,0 +1,654 @@
|
|||||||
|
commit a0d97e88ec6d71239d30a5a4b2b129e094150873
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Thu Dec 6 20:52:22 2018 +0100
|
||||||
|
|
||||||
|
Bug 401822 Fix asm constraints for ppc64 jm-vmx jm-insns.c test.
|
||||||
|
|
||||||
|
The mfvscr and vor instructions in jm-insns.c had a "=vr" constraint.
|
||||||
|
This should have been an "=v" constraint. This resolved assembler
|
||||||
|
warnings and the testcase failing on ppc64le with gcc 8.2 and
|
||||||
|
binutils 2.30.
|
||||||
|
|
||||||
|
diff --git a/none/tests/ppc32/jm-insns.c b/none/tests/ppc32/jm-insns.c
|
||||||
|
index e1a7da9..be02425 100644
|
||||||
|
--- a/none/tests/ppc32/jm-insns.c
|
||||||
|
+++ b/none/tests/ppc32/jm-insns.c
|
||||||
|
@@ -6269,7 +6269,7 @@ static void test_av_int_one_arg (const char* name, test_func_t func,
|
||||||
|
for (i=0; i<nb_viargs; i++) {
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
vec_in = (vector unsigned int)viargs[i];
|
||||||
|
vec_out = (vector unsigned int){ 0,0,0,0 };
|
||||||
|
@@ -6287,11 +6287,11 @@ static void test_av_int_one_arg (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6333,7 +6333,7 @@ static void test_av_int_two_args (const char* name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6349,11 +6349,11 @@ static void test_av_int_two_args (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6401,7 +6401,7 @@ static void test_av_int_three_args (const char* name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6418,11 +6418,11 @@ static void test_av_int_three_args (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6475,7 +6475,7 @@ static void vs128_cb (const char* name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6491,11 +6491,11 @@ static void vs128_cb (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6548,7 +6548,7 @@ static void vsplt_cb (const char* name, test_func_t func_IN,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6563,11 +6563,11 @@ static void vsplt_cb (const char* name, test_func_t func_IN,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6615,7 +6615,7 @@ static void vspltis_cb (const char* name, test_func_t func_IN,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6627,11 +6627,11 @@ static void vspltis_cb (const char* name, test_func_t func_IN,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6677,7 +6677,7 @@ static void vsldoi_cb (const char* name, test_func_t func_IN,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6693,11 +6693,11 @@ static void vsldoi_cb (const char* name, test_func_t func_IN,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6750,7 +6750,7 @@ static void lvs_cb (const char *name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6762,11 +6762,11 @@ static void lvs_cb (const char *name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6870,7 +6870,7 @@ static void test_av_int_ld_two_regs (const char *name,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6882,11 +6882,11 @@ static void test_av_int_ld_two_regs (const char *name,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6956,7 +6956,7 @@ static void test_av_int_st_three_regs (const char *name,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6974,7 +6974,7 @@ static void test_av_int_st_three_regs (const char *name,
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -7037,7 +7037,7 @@ static void test_av_float_one_arg (const char* name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -7052,11 +7052,11 @@ static void test_av_float_one_arg (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -7099,7 +7099,7 @@ static void test_av_float_two_args (const char* name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -7115,11 +7115,11 @@ static void test_av_float_two_args (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -7168,7 +7168,7 @@ static void test_av_float_three_args (const char* name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -7185,11 +7185,11 @@ static void test_av_float_three_args (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -7276,7 +7276,7 @@ static void vcvt_cb (const char* name, test_func_t func_IN,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -7291,11 +7291,11 @@ static void vcvt_cb (const char* name, test_func_t func_IN,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
|
||||||
|
diff --git a/none/tests/ppc64/jm-insns.c b/none/tests/ppc64/jm-insns.c
|
||||||
|
index e1a7da9..be02425 100644
|
||||||
|
--- a/none/tests/ppc64/jm-insns.c
|
||||||
|
+++ b/none/tests/ppc64/jm-insns.c
|
||||||
|
@@ -6269,7 +6269,7 @@ static void test_av_int_one_arg (const char* name, test_func_t func,
|
||||||
|
for (i=0; i<nb_viargs; i++) {
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
vec_in = (vector unsigned int)viargs[i];
|
||||||
|
vec_out = (vector unsigned int){ 0,0,0,0 };
|
||||||
|
@@ -6287,11 +6287,11 @@ static void test_av_int_one_arg (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6333,7 +6333,7 @@ static void test_av_int_two_args (const char* name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6349,11 +6349,11 @@ static void test_av_int_two_args (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6401,7 +6401,7 @@ static void test_av_int_three_args (const char* name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6418,11 +6418,11 @@ static void test_av_int_three_args (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6475,7 +6475,7 @@ static void vs128_cb (const char* name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6491,11 +6491,11 @@ static void vs128_cb (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6548,7 +6548,7 @@ static void vsplt_cb (const char* name, test_func_t func_IN,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6563,11 +6563,11 @@ static void vsplt_cb (const char* name, test_func_t func_IN,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6615,7 +6615,7 @@ static void vspltis_cb (const char* name, test_func_t func_IN,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6627,11 +6627,11 @@ static void vspltis_cb (const char* name, test_func_t func_IN,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6677,7 +6677,7 @@ static void vsldoi_cb (const char* name, test_func_t func_IN,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6693,11 +6693,11 @@ static void vsldoi_cb (const char* name, test_func_t func_IN,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6750,7 +6750,7 @@ static void lvs_cb (const char *name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6762,11 +6762,11 @@ static void lvs_cb (const char *name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6870,7 +6870,7 @@ static void test_av_int_ld_two_regs (const char *name,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6882,11 +6882,11 @@ static void test_av_int_ld_two_regs (const char *name,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -6956,7 +6956,7 @@ static void test_av_int_st_three_regs (const char *name,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -6974,7 +6974,7 @@ static void test_av_int_st_three_regs (const char *name,
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -7037,7 +7037,7 @@ static void test_av_float_one_arg (const char* name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -7052,11 +7052,11 @@ static void test_av_float_one_arg (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -7099,7 +7099,7 @@ static void test_av_float_two_args (const char* name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -7115,11 +7115,11 @@ static void test_av_float_two_args (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -7168,7 +7168,7 @@ static void test_av_float_three_args (const char* name, test_func_t func,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -7185,11 +7185,11 @@ static void test_av_float_three_args (const char* name, test_func_t func,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
||||||
|
@@ -7276,7 +7276,7 @@ static void vcvt_cb (const char* name, test_func_t func_IN,
|
||||||
|
|
||||||
|
/* Save flags */
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
|
||||||
|
|
||||||
|
// reset VSCR and CR
|
||||||
|
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
|
||||||
|
@@ -7291,11 +7291,11 @@ static void vcvt_cb (const char* name, test_func_t func_IN,
|
||||||
|
(*func)();
|
||||||
|
|
||||||
|
// retrieve output <- r17
|
||||||
|
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
|
||||||
|
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
|
||||||
|
|
||||||
|
// get CR,VSCR flags
|
||||||
|
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
|
||||||
|
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
|
||||||
|
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
|
||||||
|
|
||||||
|
/* Restore flags */
|
||||||
|
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
|
12
SOURCES/valgrind-3.14.0-mc_translate-vecret.patch
Normal file
12
SOURCES/valgrind-3.14.0-mc_translate-vecret.patch
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
|
||||||
|
index c24db91..1e770b3 100644
|
||||||
|
--- a/memcheck/mc_translate.c
|
||||||
|
+++ b/memcheck/mc_translate.c
|
||||||
|
@@ -8022,6 +8022,7 @@ static inline void noteTmpUsesIn ( /*MOD*/HowUsed* useEnv,
|
||||||
|
use info. */
|
||||||
|
switch (at->tag) {
|
||||||
|
case Iex_GSPTR:
|
||||||
|
+ case Iex_VECRET:
|
||||||
|
case Iex_Const:
|
||||||
|
return;
|
||||||
|
case Iex_RdTmp: {
|
453
SOURCES/valgrind-3.14.0-memcheck-new-IROps.patch
Normal file
453
SOURCES/valgrind-3.14.0-memcheck-new-IROps.patch
Normal file
@ -0,0 +1,453 @@
|
|||||||
|
commit e221eca26be6b2396e3fcbf4117e630fc22e79f6
|
||||||
|
Author: Julian Seward <jseward@acm.org>
|
||||||
|
Date: Tue Nov 20 11:28:42 2018 +0100
|
||||||
|
|
||||||
|
Add Memcheck support for IROps added in 42719898.
|
||||||
|
|
||||||
|
memcheck/mc_translate.c:
|
||||||
|
|
||||||
|
Add mkRight{32,64} as right-travelling analogues to mkLeft{32,64}.
|
||||||
|
|
||||||
|
doCmpORD: for the cases of a signed comparison against zero, compute
|
||||||
|
definedness of the 3 result bits (lt,gt,eq) separately, and, for the lt and eq
|
||||||
|
bits, do it exactly accurately.
|
||||||
|
|
||||||
|
expensiveCountTrailingZeroes: no functional change. Re-analyse/verify and add
|
||||||
|
comments.
|
||||||
|
|
||||||
|
expensiveCountLeadingZeroes: add. Very similar to
|
||||||
|
expensiveCountTrailingZeroes.
|
||||||
|
|
||||||
|
Add some comments to mark unary ops which are self-shadowing.
|
||||||
|
|
||||||
|
Route Iop_Ctz{,Nat}{32,64} through expensiveCountTrailingZeroes.
|
||||||
|
Route Iop_Clz{,Nat}{32,64} through expensiveCountLeadingZeroes.
|
||||||
|
|
||||||
|
Add instrumentation for Iop_PopCount{32,64} and Iop_Reverse8sIn32_x1.
|
||||||
|
|
||||||
|
memcheck/tests/vbit-test/irops.c
|
||||||
|
|
||||||
|
Add dummy new entries for all new IROps, just enough to make it compile and
|
||||||
|
run.
|
||||||
|
|
||||||
|
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
|
||||||
|
index 68a2ab3..c24db91 100644
|
||||||
|
--- a/memcheck/mc_translate.c
|
||||||
|
+++ b/memcheck/mc_translate.c
|
||||||
|
@@ -737,6 +737,34 @@ static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
|
||||||
|
return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* --------- The Right-family of operations. --------- */
|
||||||
|
+
|
||||||
|
+/* Unfortunately these are a lot more expensive then their Left
|
||||||
|
+ counterparts. Fortunately they are only very rarely used -- only for
|
||||||
|
+ count-leading-zeroes instrumentation. */
|
||||||
|
+
|
||||||
|
+static IRAtom* mkRight32 ( MCEnv* mce, IRAtom* a1 )
|
||||||
|
+{
|
||||||
|
+ for (Int i = 1; i <= 16; i *= 2) {
|
||||||
|
+ // a1 |= (a1 >>u i)
|
||||||
|
+ IRAtom* tmp
|
||||||
|
+ = assignNew('V', mce, Ity_I32, binop(Iop_Shr32, a1, mkU8(i)));
|
||||||
|
+ a1 = assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, tmp));
|
||||||
|
+ }
|
||||||
|
+ return a1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static IRAtom* mkRight64 ( MCEnv* mce, IRAtom* a1 )
|
||||||
|
+{
|
||||||
|
+ for (Int i = 1; i <= 32; i *= 2) {
|
||||||
|
+ // a1 |= (a1 >>u i)
|
||||||
|
+ IRAtom* tmp
|
||||||
|
+ = assignNew('V', mce, Ity_I64, binop(Iop_Shr64, a1, mkU8(i)));
|
||||||
|
+ a1 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, tmp));
|
||||||
|
+ }
|
||||||
|
+ return a1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* --------- 'Improvement' functions for AND/OR. --------- */
|
||||||
|
|
||||||
|
/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
|
||||||
|
@@ -1280,20 +1308,18 @@ static IRAtom* doCmpORD ( MCEnv* mce,
|
||||||
|
IRAtom* xxhash, IRAtom* yyhash,
|
||||||
|
IRAtom* xx, IRAtom* yy )
|
||||||
|
{
|
||||||
|
- Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
|
||||||
|
- Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
|
||||||
|
- IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
|
||||||
|
- IROp opAND = m64 ? Iop_And64 : Iop_And32;
|
||||||
|
- IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
|
||||||
|
- IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
|
||||||
|
- IRType ty = m64 ? Ity_I64 : Ity_I32;
|
||||||
|
- Int width = m64 ? 64 : 32;
|
||||||
|
+ Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
|
||||||
|
+ Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
|
||||||
|
+ IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
|
||||||
|
+ IROp opAND = m64 ? Iop_And64 : Iop_And32;
|
||||||
|
+ IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
|
||||||
|
+ IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
|
||||||
|
+ IROp op1UtoWS = m64 ? Iop_1Uto64 : Iop_1Uto32;
|
||||||
|
+ IRType ty = m64 ? Ity_I64 : Ity_I32;
|
||||||
|
+ Int width = m64 ? 64 : 32;
|
||||||
|
|
||||||
|
Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
|
||||||
|
|
||||||
|
- IRAtom* threeLeft1 = NULL;
|
||||||
|
- IRAtom* sevenLeft1 = NULL;
|
||||||
|
-
|
||||||
|
tl_assert(isShadowAtom(mce,xxhash));
|
||||||
|
tl_assert(isShadowAtom(mce,yyhash));
|
||||||
|
tl_assert(isOriginalAtom(mce,xx));
|
||||||
|
@@ -1312,30 +1338,55 @@ static IRAtom* doCmpORD ( MCEnv* mce,
|
||||||
|
/* fancy interpretation */
|
||||||
|
/* if yy is zero, then it must be fully defined (zero#). */
|
||||||
|
tl_assert(isZero(yyhash));
|
||||||
|
- threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
|
||||||
|
+ // This is still inaccurate, but I don't think it matters, since
|
||||||
|
+ // nobody writes code of the form
|
||||||
|
+ // "is <partially-undefined-value> signedly greater than zero?".
|
||||||
|
+ // We therefore simply declare "x >s 0" to be undefined if any bit in
|
||||||
|
+ // x is undefined. That's clearly suboptimal in some cases. Eg, if
|
||||||
|
+ // the highest order bit is a defined 1 then x is negative so it
|
||||||
|
+ // doesn't matter whether the remaining bits are defined or not.
|
||||||
|
+ IRAtom* t_0_gt_0_0
|
||||||
|
+ = assignNew(
|
||||||
|
+ 'V', mce,ty,
|
||||||
|
+ binop(
|
||||||
|
+ opAND,
|
||||||
|
+ mkPCastTo(mce,ty, xxhash),
|
||||||
|
+ m64 ? mkU64(1<<2) : mkU32(1<<2)
|
||||||
|
+ ));
|
||||||
|
+ // For "x <s 0", we can just copy the definedness of the top bit of x
|
||||||
|
+ // and we have a precise result.
|
||||||
|
+ IRAtom* t_lt_0_0_0
|
||||||
|
+ = assignNew(
|
||||||
|
+ 'V', mce,ty,
|
||||||
|
+ binop(
|
||||||
|
+ opSHL,
|
||||||
|
+ assignNew(
|
||||||
|
+ 'V', mce,ty,
|
||||||
|
+ binop(opSHR, xxhash, mkU8(width-1))),
|
||||||
|
+ mkU8(3)
|
||||||
|
+ ));
|
||||||
|
+ // For "x == 0" we can hand the problem off to expensiveCmpEQorNE.
|
||||||
|
+ IRAtom* t_0_0_eq_0
|
||||||
|
+ = assignNew(
|
||||||
|
+ 'V', mce,ty,
|
||||||
|
+ binop(
|
||||||
|
+ opSHL,
|
||||||
|
+ assignNew('V', mce,ty,
|
||||||
|
+ unop(
|
||||||
|
+ op1UtoWS,
|
||||||
|
+ expensiveCmpEQorNE(mce, ty, xxhash, yyhash, xx, yy))
|
||||||
|
+ ),
|
||||||
|
+ mkU8(1)
|
||||||
|
+ ));
|
||||||
|
return
|
||||||
|
binop(
|
||||||
|
opOR,
|
||||||
|
- assignNew(
|
||||||
|
- 'V', mce,ty,
|
||||||
|
- binop(
|
||||||
|
- opAND,
|
||||||
|
- mkPCastTo(mce,ty, xxhash),
|
||||||
|
- threeLeft1
|
||||||
|
- )),
|
||||||
|
- assignNew(
|
||||||
|
- 'V', mce,ty,
|
||||||
|
- binop(
|
||||||
|
- opSHL,
|
||||||
|
- assignNew(
|
||||||
|
- 'V', mce,ty,
|
||||||
|
- binop(opSHR, xxhash, mkU8(width-1))),
|
||||||
|
- mkU8(3)
|
||||||
|
- ))
|
||||||
|
- );
|
||||||
|
+ assignNew('V', mce,ty, binop(opOR, t_lt_0_0_0, t_0_gt_0_0)),
|
||||||
|
+ t_0_0_eq_0
|
||||||
|
+ );
|
||||||
|
} else {
|
||||||
|
/* standard interpretation */
|
||||||
|
- sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
|
||||||
|
+ IRAtom* sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
|
||||||
|
return
|
||||||
|
binop(
|
||||||
|
opAND,
|
||||||
|
@@ -2211,14 +2262,14 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
|
||||||
|
tl_assert(sameKindedAtoms(atom,vatom));
|
||||||
|
|
||||||
|
switch (czop) {
|
||||||
|
- case Iop_Ctz32:
|
||||||
|
+ case Iop_Ctz32: case Iop_CtzNat32:
|
||||||
|
ty = Ity_I32;
|
||||||
|
xorOp = Iop_Xor32;
|
||||||
|
subOp = Iop_Sub32;
|
||||||
|
andOp = Iop_And32;
|
||||||
|
one = mkU32(1);
|
||||||
|
break;
|
||||||
|
- case Iop_Ctz64:
|
||||||
|
+ case Iop_Ctz64: case Iop_CtzNat64:
|
||||||
|
ty = Ity_I64;
|
||||||
|
xorOp = Iop_Xor64;
|
||||||
|
subOp = Iop_Sub64;
|
||||||
|
@@ -2232,8 +2283,30 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
|
||||||
|
|
||||||
|
// improver = atom ^ (atom - 1)
|
||||||
|
//
|
||||||
|
- // That is, improver has its low ctz(atom) bits equal to one;
|
||||||
|
- // higher bits (if any) equal to zero.
|
||||||
|
+ // That is, improver has its low ctz(atom)+1 bits equal to one;
|
||||||
|
+ // higher bits (if any) equal to zero. So it's exactly the right
|
||||||
|
+ // mask to use to remove the irrelevant undefined input bits.
|
||||||
|
+ /* Here are some examples:
|
||||||
|
+ atom = U...U 1 0...0
|
||||||
|
+ atom-1 = U...U 0 1...1
|
||||||
|
+ ^ed = 0...0 1 11111, which correctly describes which bits of |atom|
|
||||||
|
+ actually influence the result
|
||||||
|
+ A boundary case
|
||||||
|
+ atom = 0...0
|
||||||
|
+ atom-1 = 1...1
|
||||||
|
+ ^ed = 11111, also a correct mask for the input: all input bits
|
||||||
|
+ are relevant
|
||||||
|
+ Another boundary case
|
||||||
|
+ atom = 1..1 1
|
||||||
|
+ atom-1 = 1..1 0
|
||||||
|
+ ^ed = 0..0 1, also a correct mask: only the rightmost input bit
|
||||||
|
+ is relevant
|
||||||
|
+ Now with misc U bits interspersed:
|
||||||
|
+ atom = U...U 1 0 U...U 0 1 0...0
|
||||||
|
+ atom-1 = U...U 1 0 U...U 0 0 1...1
|
||||||
|
+ ^ed = 0...0 0 0 0...0 0 1 1...1, also correct
|
||||||
|
+ (Per re-check/analysis of 14 Nov 2018)
|
||||||
|
+ */
|
||||||
|
improver = assignNew('V', mce,ty,
|
||||||
|
binop(xorOp,
|
||||||
|
atom,
|
||||||
|
@@ -2242,8 +2315,96 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
|
||||||
|
|
||||||
|
// improved = vatom & improver
|
||||||
|
//
|
||||||
|
- // That is, treat any V bits above the first ctz(atom) bits as
|
||||||
|
- // "defined".
|
||||||
|
+ // That is, treat any V bits to the left of the rightmost ctz(atom)+1
|
||||||
|
+ // bits as "defined".
|
||||||
|
+ improved = assignNew('V', mce, ty,
|
||||||
|
+ binop(andOp, vatom, improver));
|
||||||
|
+
|
||||||
|
+ // Return pessimizing cast of improved.
|
||||||
|
+ return mkPCastTo(mce, ty, improved);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static
|
||||||
|
+IRAtom* expensiveCountLeadingZeroes ( MCEnv* mce, IROp czop,
|
||||||
|
+ IRAtom* atom, IRAtom* vatom )
|
||||||
|
+{
|
||||||
|
+ IRType ty;
|
||||||
|
+ IROp shrOp, notOp, andOp;
|
||||||
|
+ IRAtom* (*mkRight)(MCEnv*, IRAtom*);
|
||||||
|
+ IRAtom *improver, *improved;
|
||||||
|
+ tl_assert(isShadowAtom(mce,vatom));
|
||||||
|
+ tl_assert(isOriginalAtom(mce,atom));
|
||||||
|
+ tl_assert(sameKindedAtoms(atom,vatom));
|
||||||
|
+
|
||||||
|
+ switch (czop) {
|
||||||
|
+ case Iop_Clz32: case Iop_ClzNat32:
|
||||||
|
+ ty = Ity_I32;
|
||||||
|
+ shrOp = Iop_Shr32;
|
||||||
|
+ notOp = Iop_Not32;
|
||||||
|
+ andOp = Iop_And32;
|
||||||
|
+ mkRight = mkRight32;
|
||||||
|
+ break;
|
||||||
|
+ case Iop_Clz64: case Iop_ClzNat64:
|
||||||
|
+ ty = Ity_I64;
|
||||||
|
+ shrOp = Iop_Shr64;
|
||||||
|
+ notOp = Iop_Not64;
|
||||||
|
+ andOp = Iop_And64;
|
||||||
|
+ mkRight = mkRight64;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ ppIROp(czop);
|
||||||
|
+ VG_(tool_panic)("memcheck:expensiveCountLeadingZeroes");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // This is in principle very similar to how expensiveCountTrailingZeroes
|
||||||
|
+ // works. That function computed an "improver", which it used to mask
|
||||||
|
+ // off all but the rightmost 1-bit and the zeroes to the right of it,
|
||||||
|
+ // hence removing irrelevant bits from the input. Here, we play the
|
||||||
|
+ // exact same game but with the left-vs-right roles interchanged.
|
||||||
|
+ // Unfortunately calculation of the improver in this case is
|
||||||
|
+ // significantly more expensive.
|
||||||
|
+ //
|
||||||
|
+ // improver = ~(RIGHT(atom) >>u 1)
|
||||||
|
+ //
|
||||||
|
+ // That is, improver has its upper clz(atom)+1 bits equal to one;
|
||||||
|
+ // lower bits (if any) equal to zero. So it's exactly the right
|
||||||
|
+ // mask to use to remove the irrelevant undefined input bits.
|
||||||
|
+ /* Here are some examples:
|
||||||
|
+ atom = 0...0 1 U...U
|
||||||
|
+ R(atom) = 0...0 1 1...1
|
||||||
|
+ R(atom) >>u 1 = 0...0 0 1...1
|
||||||
|
+ ~(R(atom) >>u 1) = 1...1 1 0...0
|
||||||
|
+ which correctly describes which bits of |atom|
|
||||||
|
+ actually influence the result
|
||||||
|
+ A boundary case
|
||||||
|
+ atom = 0...0
|
||||||
|
+ R(atom) = 0...0
|
||||||
|
+ R(atom) >>u 1 = 0...0
|
||||||
|
+ ~(R(atom) >>u 1) = 1...1
|
||||||
|
+ also a correct mask for the input: all input bits
|
||||||
|
+ are relevant
|
||||||
|
+ Another boundary case
|
||||||
|
+ atom = 1 1..1
|
||||||
|
+ R(atom) = 1 1..1
|
||||||
|
+ R(atom) >>u 1 = 0 1..1
|
||||||
|
+ ~(R(atom) >>u 1) = 1 0..0
|
||||||
|
+ also a correct mask: only the leftmost input bit
|
||||||
|
+ is relevant
|
||||||
|
+ Now with misc U bits interspersed:
|
||||||
|
+ atom = 0...0 1 U...U 0 1 U...U
|
||||||
|
+ R(atom) = 0...0 1 1...1 1 1 1...1
|
||||||
|
+ R(atom) >>u 1 = 0...0 0 1...1 1 1 1...1
|
||||||
|
+ ~(R(atom) >>u 1) = 1...1 1 0...0 0 0 0...0, also correct
|
||||||
|
+ (Per initial implementation of 15 Nov 2018)
|
||||||
|
+ */
|
||||||
|
+ improver = mkRight(mce, atom);
|
||||||
|
+ improver = assignNew('V', mce, ty, binop(shrOp, improver, mkU8(1)));
|
||||||
|
+ improver = assignNew('V', mce, ty, unop(notOp, improver));
|
||||||
|
+
|
||||||
|
+ // improved = vatom & improver
|
||||||
|
+ //
|
||||||
|
+ // That is, treat any V bits to the right of the leftmost clz(atom)+1
|
||||||
|
+ // bits as "defined".
|
||||||
|
improved = assignNew('V', mce, ty,
|
||||||
|
binop(andOp, vatom, improver));
|
||||||
|
|
||||||
|
@@ -4705,6 +4866,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||||
|
case Iop_RecipEst32F0x4:
|
||||||
|
return unary32F0x4(mce, vatom);
|
||||||
|
|
||||||
|
+ // These are self-shadowing.
|
||||||
|
case Iop_32UtoV128:
|
||||||
|
case Iop_64UtoV128:
|
||||||
|
case Iop_Dup8x16:
|
||||||
|
@@ -4745,6 +4907,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||||
|
case Iop_MulI128by10Carry:
|
||||||
|
case Iop_F16toF64x2:
|
||||||
|
case Iop_F64toF16x2:
|
||||||
|
+ // FIXME JRS 2018-Nov-15. This is surely not correct!
|
||||||
|
return vatom;
|
||||||
|
|
||||||
|
case Iop_I32StoF128: /* signed I32 -> F128 */
|
||||||
|
@@ -4770,7 +4933,6 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||||
|
case Iop_RoundF64toF64_NegINF:
|
||||||
|
case Iop_RoundF64toF64_PosINF:
|
||||||
|
case Iop_RoundF64toF64_ZERO:
|
||||||
|
- case Iop_Clz64:
|
||||||
|
case Iop_D32toD64:
|
||||||
|
case Iop_I32StoD64:
|
||||||
|
case Iop_I32UtoD64:
|
||||||
|
@@ -4785,17 +4947,32 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||||
|
case Iop_D64toD128:
|
||||||
|
return mkPCastTo(mce, Ity_I128, vatom);
|
||||||
|
|
||||||
|
- case Iop_Clz32:
|
||||||
|
case Iop_TruncF64asF32:
|
||||||
|
case Iop_NegF32:
|
||||||
|
case Iop_AbsF32:
|
||||||
|
case Iop_F16toF32:
|
||||||
|
return mkPCastTo(mce, Ity_I32, vatom);
|
||||||
|
|
||||||
|
- case Iop_Ctz32:
|
||||||
|
- case Iop_Ctz64:
|
||||||
|
+ case Iop_Ctz32: case Iop_CtzNat32:
|
||||||
|
+ case Iop_Ctz64: case Iop_CtzNat64:
|
||||||
|
return expensiveCountTrailingZeroes(mce, op, atom, vatom);
|
||||||
|
|
||||||
|
+ case Iop_Clz32: case Iop_ClzNat32:
|
||||||
|
+ case Iop_Clz64: case Iop_ClzNat64:
|
||||||
|
+ return expensiveCountLeadingZeroes(mce, op, atom, vatom);
|
||||||
|
+
|
||||||
|
+ // PopCount32: this is slightly pessimistic. It is true that the
|
||||||
|
+ // result depends on all input bits, so that aspect of the PCast is
|
||||||
|
+ // correct. However, regardless of the input, only the lowest 5 bits
|
||||||
|
+ // out of the output can ever be undefined. So we could actually
|
||||||
|
+ // "improve" the results here by marking the top 27 bits of output as
|
||||||
|
+ // defined. A similar comment applies for PopCount64.
|
||||||
|
+ case Iop_PopCount32:
|
||||||
|
+ return mkPCastTo(mce, Ity_I32, vatom);
|
||||||
|
+ case Iop_PopCount64:
|
||||||
|
+ return mkPCastTo(mce, Ity_I64, vatom);
|
||||||
|
+
|
||||||
|
+ // These are self-shadowing.
|
||||||
|
case Iop_1Uto64:
|
||||||
|
case Iop_1Sto64:
|
||||||
|
case Iop_8Uto64:
|
||||||
|
@@ -4821,6 +4998,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||||
|
case Iop_V256to64_2: case Iop_V256to64_3:
|
||||||
|
return assignNew('V', mce, Ity_I64, unop(op, vatom));
|
||||||
|
|
||||||
|
+ // These are self-shadowing.
|
||||||
|
case Iop_64to32:
|
||||||
|
case Iop_64HIto32:
|
||||||
|
case Iop_1Uto32:
|
||||||
|
@@ -4830,8 +5008,10 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||||
|
case Iop_16Sto32:
|
||||||
|
case Iop_8Sto32:
|
||||||
|
case Iop_V128to32:
|
||||||
|
+ case Iop_Reverse8sIn32_x1:
|
||||||
|
return assignNew('V', mce, Ity_I32, unop(op, vatom));
|
||||||
|
|
||||||
|
+ // These are self-shadowing.
|
||||||
|
case Iop_8Sto16:
|
||||||
|
case Iop_8Uto16:
|
||||||
|
case Iop_32to16:
|
||||||
|
@@ -4840,6 +5020,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||||
|
case Iop_GetMSBs8x16:
|
||||||
|
return assignNew('V', mce, Ity_I16, unop(op, vatom));
|
||||||
|
|
||||||
|
+ // These are self-shadowing.
|
||||||
|
case Iop_1Uto8:
|
||||||
|
case Iop_1Sto8:
|
||||||
|
case Iop_16to8:
|
||||||
|
@@ -4868,6 +5049,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||||
|
case Iop_Not16:
|
||||||
|
case Iop_Not8:
|
||||||
|
case Iop_Not1:
|
||||||
|
+ // FIXME JRS 2018-Nov-15. This is surely not correct!
|
||||||
|
return vatom;
|
||||||
|
|
||||||
|
case Iop_CmpNEZ8x8:
|
||||||
|
@@ -4929,6 +5111,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
||||||
|
case Iop_Ctz64x2:
|
||||||
|
return mkPCast64x2(mce, vatom);
|
||||||
|
|
||||||
|
+ // This is self-shadowing.
|
||||||
|
case Iop_PwBitMtxXpose64x2:
|
||||||
|
return assignNew('V', mce, Ity_V128, unop(op, vatom));
|
||||||
|
|
||||||
|
diff --git a/memcheck/tests/vbit-test/irops.c b/memcheck/tests/vbit-test/irops.c
|
||||||
|
index bfd82fc..e8bf67d 100644
|
||||||
|
--- a/memcheck/tests/vbit-test/irops.c
|
||||||
|
+++ b/memcheck/tests/vbit-test/irops.c
|
||||||
|
@@ -111,6 +111,12 @@ static irop_t irops[] = {
|
||||||
|
{ DEFOP(Iop_Clz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
|
||||||
|
{ DEFOP(Iop_Ctz64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
|
||||||
|
{ DEFOP(Iop_Ctz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
|
||||||
|
+ { DEFOP(Iop_ClzNat64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, // ppc32 asserts
|
||||||
|
+ { DEFOP(Iop_ClzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
|
||||||
|
+ { DEFOP(Iop_CtzNat64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
|
||||||
|
+ { DEFOP(Iop_CtzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
|
||||||
|
+ { DEFOP(Iop_PopCount64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
|
||||||
|
+ { DEFOP(Iop_PopCount32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
|
||||||
|
{ DEFOP(Iop_CmpLT32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
|
||||||
|
{ DEFOP(Iop_CmpLT64S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc, mips assert
|
||||||
|
{ DEFOP(Iop_CmpLE32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
|
||||||
|
@@ -336,6 +342,7 @@ static irop_t irops[] = {
|
||||||
|
{ DEFOP(Iop_Sad8Ux4, UNDEF_UNKNOWN), },
|
||||||
|
{ DEFOP(Iop_CmpNEZ16x2, UNDEF_UNKNOWN), },
|
||||||
|
{ DEFOP(Iop_CmpNEZ8x4, UNDEF_UNKNOWN), },
|
||||||
|
+ { DEFOP(Iop_Reverse8sIn32_x1, UNDEF_UNKNOWN) },
|
||||||
|
/* ------------------ 64-bit SIMD FP ------------------------ */
|
||||||
|
{ DEFOP(Iop_I32UtoFx2, UNDEF_UNKNOWN), },
|
||||||
|
{ DEFOP(Iop_I32StoFx2, UNDEF_UNKNOWN), },
|
124
SOURCES/valgrind-3.14.0-new-strlen-IROps.patch
Normal file
124
SOURCES/valgrind-3.14.0-new-strlen-IROps.patch
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
commit 4271989815b5fc933c1e29bc75507c2726dc3738
|
||||||
|
Author: Julian Seward <jseward@acm.org>
|
||||||
|
Date: Tue Nov 20 10:52:33 2018 +0100
|
||||||
|
|
||||||
|
Add some new IROps to support improved Memcheck analysis of strlen etc.
|
||||||
|
|
||||||
|
This is part of the fix for bug 386945. It adds the following IROps, plus
|
||||||
|
their supporting type- and printing- fragments:
|
||||||
|
|
||||||
|
Iop_Reverse8sIn32_x1: 32-bit byteswap. A fancy name, but it is consistent
|
||||||
|
with naming for the other swapping IROps that already exist.
|
||||||
|
|
||||||
|
Iop_PopCount64, Iop_PopCount32: population count
|
||||||
|
|
||||||
|
Iop_ClzNat64, Iop_ClzNat32, Iop_CtzNat64, Iop_CtzNat32: counting leading and
|
||||||
|
trailing zeroes, with "natural" (Nat) semantics for a zero input, meaning, in
|
||||||
|
the case of zero input, return the number of bits in the word. These
|
||||||
|
functionally overlap with the existing Iop_Clz64, Iop_Clz32, Iop_Ctz64,
|
||||||
|
Iop_Ctz32. The existing operations are undefined in case of a zero input.
|
||||||
|
Adding these new variants avoids the complexity of having to change the
|
||||||
|
declared semantics of the existing operations. Instead they are deprecated
|
||||||
|
but still available for use.
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
|
||||||
|
index 823b6be..3221033 100644
|
||||||
|
--- a/VEX/priv/ir_defs.c
|
||||||
|
+++ b/VEX/priv/ir_defs.c
|
||||||
|
@@ -194,6 +194,14 @@ void ppIROp ( IROp op )
|
||||||
|
case Iop_Ctz64: vex_printf("Ctz64"); return;
|
||||||
|
case Iop_Ctz32: vex_printf("Ctz32"); return;
|
||||||
|
|
||||||
|
+ case Iop_ClzNat64: vex_printf("ClzNat64"); return;
|
||||||
|
+ case Iop_ClzNat32: vex_printf("ClzNat32"); return;
|
||||||
|
+ case Iop_CtzNat64: vex_printf("CtzNat64"); return;
|
||||||
|
+ case Iop_CtzNat32: vex_printf("CtzNat32"); return;
|
||||||
|
+
|
||||||
|
+ case Iop_PopCount64: vex_printf("PopCount64"); return;
|
||||||
|
+ case Iop_PopCount32: vex_printf("PopCount32"); return;
|
||||||
|
+
|
||||||
|
case Iop_CmpLT32S: vex_printf("CmpLT32S"); return;
|
||||||
|
case Iop_CmpLE32S: vex_printf("CmpLE32S"); return;
|
||||||
|
case Iop_CmpLT32U: vex_printf("CmpLT32U"); return;
|
||||||
|
@@ -395,6 +403,7 @@ void ppIROp ( IROp op )
|
||||||
|
|
||||||
|
case Iop_CmpNEZ16x2: vex_printf("CmpNEZ16x2"); return;
|
||||||
|
case Iop_CmpNEZ8x4: vex_printf("CmpNEZ8x4"); return;
|
||||||
|
+ case Iop_Reverse8sIn32_x1: vex_printf("Reverse8sIn32_x1"); return;
|
||||||
|
|
||||||
|
case Iop_CmpF64: vex_printf("CmpF64"); return;
|
||||||
|
|
||||||
|
@@ -2719,6 +2728,7 @@ void typeOfPrimop ( IROp op,
|
||||||
|
UNARY(Ity_I16, Ity_I16);
|
||||||
|
case Iop_Not32:
|
||||||
|
case Iop_CmpNEZ16x2: case Iop_CmpNEZ8x4:
|
||||||
|
+ case Iop_Reverse8sIn32_x1:
|
||||||
|
UNARY(Ity_I32, Ity_I32);
|
||||||
|
|
||||||
|
case Iop_Not64:
|
||||||
|
@@ -2782,9 +2792,13 @@ void typeOfPrimop ( IROp op,
|
||||||
|
BINARY(Ity_I64,Ity_I64, Ity_I128);
|
||||||
|
|
||||||
|
case Iop_Clz32: case Iop_Ctz32:
|
||||||
|
+ case Iop_ClzNat32: case Iop_CtzNat32:
|
||||||
|
+ case Iop_PopCount32:
|
||||||
|
UNARY(Ity_I32, Ity_I32);
|
||||||
|
|
||||||
|
case Iop_Clz64: case Iop_Ctz64:
|
||||||
|
+ case Iop_ClzNat64: case Iop_CtzNat64:
|
||||||
|
+ case Iop_PopCount64:
|
||||||
|
UNARY(Ity_I64, Ity_I64);
|
||||||
|
|
||||||
|
case Iop_DivU32: case Iop_DivS32: case Iop_DivU32E: case Iop_DivS32E:
|
||||||
|
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
|
||||||
|
index 17bcb55..93fa5ac 100644
|
||||||
|
--- a/VEX/pub/libvex_ir.h
|
||||||
|
+++ b/VEX/pub/libvex_ir.h
|
||||||
|
@@ -452,12 +452,21 @@ typedef
|
||||||
|
Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
|
||||||
|
Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
|
||||||
|
|
||||||
|
- /* Wierdo integer stuff */
|
||||||
|
+ /* Counting bits */
|
||||||
|
+ /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of zero.
|
||||||
|
+ You must ensure they are never given a zero argument. As of
|
||||||
|
+ 2018-Nov-14 they are deprecated. Try to use the Nat variants
|
||||||
|
+ immediately below, if you can.
|
||||||
|
+ */
|
||||||
|
Iop_Clz64, Iop_Clz32, /* count leading zeroes */
|
||||||
|
Iop_Ctz64, Iop_Ctz32, /* count trailing zeros */
|
||||||
|
- /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
|
||||||
|
- zero. You must ensure they are never given a zero argument.
|
||||||
|
- */
|
||||||
|
+ /* Count leading/trailing zeroes, with "natural" semantics for the
|
||||||
|
+ case where the input is zero: then the result is the number of bits
|
||||||
|
+ in the word. */
|
||||||
|
+ Iop_ClzNat64, Iop_ClzNat32,
|
||||||
|
+ Iop_CtzNat64, Iop_CtzNat32,
|
||||||
|
+ /* Population count -- compute the number of 1 bits in the argument. */
|
||||||
|
+ Iop_PopCount64, Iop_PopCount32,
|
||||||
|
|
||||||
|
/* Standard integer comparisons */
|
||||||
|
Iop_CmpLT32S, Iop_CmpLT64S,
|
||||||
|
@@ -831,6 +840,9 @@ typedef
|
||||||
|
/* MISC (vector integer cmp != 0) */
|
||||||
|
Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
|
||||||
|
|
||||||
|
+ /* Byte swap in a 32-bit word */
|
||||||
|
+ Iop_Reverse8sIn32_x1,
|
||||||
|
+
|
||||||
|
/* ------------------ 64-bit SIMD FP ------------------------ */
|
||||||
|
|
||||||
|
/* Convertion to/from int */
|
||||||
|
@@ -1034,8 +1046,9 @@ typedef
|
||||||
|
Iop_Slice64, // (I64, I64, I8) -> I64
|
||||||
|
|
||||||
|
/* REVERSE the order of chunks in vector lanes. Chunks must be
|
||||||
|
- smaller than the vector lanes (obviously) and so may be 8-,
|
||||||
|
- 16- and 32-bit in size. */
|
||||||
|
+ smaller than the vector lanes (obviously) and so may be 8-, 16- and
|
||||||
|
+ 32-bit in size. Note that the degenerate case,
|
||||||
|
+ Iop_Reverse8sIn64_x1, is a simply a vanilla byte-swap. */
|
||||||
|
/* Examples:
|
||||||
|
Reverse8sIn16_x4([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
|
||||||
|
Reverse8sIn32_x2([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
|
256
SOURCES/valgrind-3.14.0-power9-addex.patch
Normal file
256
SOURCES/valgrind-3.14.0-power9-addex.patch
Normal file
@ -0,0 +1,256 @@
|
|||||||
|
From 2c1f016e634bf79faf45e81c14c955c711bc202f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Mon, 31 Dec 2018 22:26:31 +0100
|
||||||
|
Subject: [PATCH] Bug 402519 - POWER 3.0 addex instruction incorrectly
|
||||||
|
implemented
|
||||||
|
|
||||||
|
addex uses OV as carry in and carry out. For all other instructions
|
||||||
|
OV is the signed overflow flag. And instructions like adde use CA
|
||||||
|
as carry.
|
||||||
|
|
||||||
|
Replace set_XER_OV_OV32 with set_XER_OV_OV32_ADDEX, which will
|
||||||
|
call calculate_XER_CA_64 and calculate_XER_CA_32, but with OV
|
||||||
|
as input, and sets OV and OV32.
|
||||||
|
|
||||||
|
Enable test_addex in none/tests/ppc64/test_isa_3_0.c and update
|
||||||
|
the expected output. test_addex would fail to match the expected
|
||||||
|
output before this patch.
|
||||||
|
---
|
||||||
|
NEWS | 1 +
|
||||||
|
VEX/priv/guest_ppc_toIR.c | 52 ++++++++++++++---------
|
||||||
|
none/tests/ppc64/test_isa_3_0.c | 3 +-
|
||||||
|
none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE | 36 ++++++++++------
|
||||||
|
4 files changed, 58 insertions(+), 34 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
index 18df822..d685383 100644
|
||||||
|
--- a/VEX/priv/guest_ppc_toIR.c
|
||||||
|
+++ b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
@@ -2645,21 +2645,6 @@ static void copy_OV_to_OV32( void ) {
|
||||||
|
putXER_OV32( getXER_OV() );
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void set_XER_OV_OV32 ( IRType ty, UInt op, IRExpr* res,
|
||||||
|
- IRExpr* argL, IRExpr* argR )
|
||||||
|
-{
|
||||||
|
- if (ty == Ity_I32) {
|
||||||
|
- set_XER_OV_OV32_32( op, res, argL, argR );
|
||||||
|
- } else {
|
||||||
|
- IRExpr* xer_ov_32;
|
||||||
|
- set_XER_OV_64( op, res, argL, argR );
|
||||||
|
- xer_ov_32 = calculate_XER_OV_32( op, unop(Iop_64to32, res),
|
||||||
|
- unop(Iop_64to32, argL),
|
||||||
|
- unop(Iop_64to32, argR));
|
||||||
|
- putXER_OV32( unop(Iop_32to8, xer_ov_32) );
|
||||||
|
- }
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static void set_XER_OV_OV32_SO ( IRType ty, UInt op, IRExpr* res,
|
||||||
|
IRExpr* argL, IRExpr* argR )
|
||||||
|
{
|
||||||
|
@@ -3005,6 +2990,33 @@ static void set_XER_CA_CA32 ( IRType ty, UInt op, IRExpr* res,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Used only by addex instruction, which uses and sets OV as carry. */
|
||||||
|
+static void set_XER_OV_OV32_ADDEX ( IRType ty, IRExpr* res,
|
||||||
|
+ IRExpr* argL, IRExpr* argR,
|
||||||
|
+ IRExpr* old_ov )
|
||||||
|
+{
|
||||||
|
+ if (ty == Ity_I32) {
|
||||||
|
+ IRTemp xer_ov = newTemp(Ity_I32);
|
||||||
|
+ assign ( xer_ov, unop(Iop_32to8,
|
||||||
|
+ calculate_XER_CA_32( PPCG_FLAG_OP_ADDE,
|
||||||
|
+ res, argL, argR, old_ov ) ) );
|
||||||
|
+ putXER_OV( mkexpr (xer_ov) );
|
||||||
|
+ putXER_OV32( mkexpr (xer_ov) );
|
||||||
|
+ } else {
|
||||||
|
+ IRExpr *xer_ov;
|
||||||
|
+ IRExpr* xer_ov_32;
|
||||||
|
+ xer_ov = calculate_XER_CA_64( PPCG_FLAG_OP_ADDE,
|
||||||
|
+ res, argL, argR, old_ov );
|
||||||
|
+ putXER_OV( unop(Iop_32to8, xer_ov) );
|
||||||
|
+ xer_ov_32 = calculate_XER_CA_32( PPCG_FLAG_OP_ADDE,
|
||||||
|
+ unop(Iop_64to32, res),
|
||||||
|
+ unop(Iop_64to32, argL),
|
||||||
|
+ unop(Iop_64to32, argR),
|
||||||
|
+ unop(Iop_64to32, old_ov) );
|
||||||
|
+ putXER_OV32( unop(Iop_32to8, xer_ov_32) );
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
|
||||||
|
|
||||||
|
/*------------------------------------------------------------*/
|
||||||
|
@@ -5094,16 +5106,18 @@ static Bool dis_int_arith ( UInt theInstr )
|
||||||
|
}
|
||||||
|
|
||||||
|
case 0xAA: {// addex (Add Extended alternate carry bit Z23-form)
|
||||||
|
+ IRTemp old_xer_ov = newTemp(ty);
|
||||||
|
DIP("addex r%u,r%u,r%u,%d\n", rD_addr, rA_addr, rB_addr, (Int)flag_OE);
|
||||||
|
+ assign( old_xer_ov, mkWidenFrom32(ty, getXER_OV_32(), False) );
|
||||||
|
assign( rD, binop( mkSzOp(ty, Iop_Add8), mkexpr(rA),
|
||||||
|
binop( mkSzOp(ty, Iop_Add8), mkexpr(rB),
|
||||||
|
- mkWidenFrom8( ty, getXER_OV(), False ) ) ) );
|
||||||
|
+ mkexpr(old_xer_ov) ) ) );
|
||||||
|
|
||||||
|
/* CY bit is same as OE bit */
|
||||||
|
if (flag_OE == 0) {
|
||||||
|
- /* Exception, do not set SO bit */
|
||||||
|
- set_XER_OV_OV32( ty, PPCG_FLAG_OP_ADDE,
|
||||||
|
- mkexpr(rD), mkexpr(rA), mkexpr(rB) );
|
||||||
|
+ /* Exception, do not set SO bit and set OV from carry. */
|
||||||
|
+ set_XER_OV_OV32_ADDEX( ty, mkexpr(rD), mkexpr(rA), mkexpr(rB),
|
||||||
|
+ mkexpr(old_xer_ov) );
|
||||||
|
} else {
|
||||||
|
/* CY=1, 2 and 3 (AKA flag_OE) are reserved */
|
||||||
|
vex_printf("addex instruction, CY = %d is reserved.\n", flag_OE);
|
||||||
|
diff --git a/none/tests/ppc64/test_isa_3_0.c b/none/tests/ppc64/test_isa_3_0.c
|
||||||
|
index 2d13505..1c2cda3 100644
|
||||||
|
--- a/none/tests/ppc64/test_isa_3_0.c
|
||||||
|
+++ b/none/tests/ppc64/test_isa_3_0.c
|
||||||
|
@@ -286,7 +286,7 @@ static test_list_t testgroup_ia_ops_two[] = {
|
||||||
|
{ &test_moduw, "moduw" },
|
||||||
|
{ &test_modsd, "modsd" },
|
||||||
|
{ &test_modud, "modud" },
|
||||||
|
- //{ &test_addex, "addex" },
|
||||||
|
+ { &test_addex, "addex" },
|
||||||
|
{ NULL , NULL },
|
||||||
|
};
|
||||||
|
|
||||||
|
@@ -2741,7 +2741,6 @@ static void testfunction_gpr_vector_logical_one (const char* instruction_name,
|
||||||
|
* rt, xa
|
||||||
|
*/
|
||||||
|
int i;
|
||||||
|
- int t;
|
||||||
|
volatile HWord_t res;
|
||||||
|
|
||||||
|
VERBOSE_FUNCTION_CALLOUT
|
||||||
|
diff --git a/none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE b/none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE
|
||||||
|
index 152ff28..cc0e88e 100644
|
||||||
|
--- a/none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE
|
||||||
|
+++ b/none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE
|
||||||
|
@@ -40,7 +40,17 @@ modud ffffffffffffffff, 0000000000000000 => 0000000000000000 (00000000)
|
||||||
|
modud ffffffffffffffff, 0000001cbe991def => 000000043eb0c0b2 (00000000)
|
||||||
|
modud ffffffffffffffff, ffffffffffffffff => 0000000000000000 (00000000)
|
||||||
|
|
||||||
|
-All done. Tested 4 different instructions
|
||||||
|
+addex 0000000000000000, 0000000000000000 => 0000000000000000 (00000000)
|
||||||
|
+addex 0000000000000000, 0000001cbe991def => 0000001cbe991def (00000000)
|
||||||
|
+addex 0000000000000000, ffffffffffffffff => ffffffffffffffff (00000000)
|
||||||
|
+addex 0000001cbe991def, 0000000000000000 => 0000001cbe991def (00000000)
|
||||||
|
+addex 0000001cbe991def, 0000001cbe991def => 000000397d323bde (00000000) OV32
|
||||||
|
+addex 0000001cbe991def, ffffffffffffffff => 0000001cbe991dee (00000000) OV OV32
|
||||||
|
+addex ffffffffffffffff, 0000000000000000 => 0000000000000000 (00000000) OV OV32
|
||||||
|
+addex ffffffffffffffff, 0000001cbe991def => 0000001cbe991def (00000000) OV OV32
|
||||||
|
+addex ffffffffffffffff, ffffffffffffffff => ffffffffffffffff (00000000) OV OV32
|
||||||
|
+
|
||||||
|
+All done. Tested 5 different instructions
|
||||||
|
ppc one argument plus shift:
|
||||||
|
Test instruction group [ppc one argument plus shift]
|
||||||
|
extswsli aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffffffffffffffff => aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffffffffffffffff
|
||||||
|
@@ -85,7 +95,7 @@ extswsli. aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffaa5599113377cc => aaaaaaaaaaaaaa
|
||||||
|
extswsli. 5152535455565758 5152535455565758 0 ffaa5599113377cc => 5152535455565758 5152535455565758 0 ffaa5599113377cc
|
||||||
|
extswsli. 0000000000000000 0000000000000000 0 ffaa5599113377cc => 0000000000000000 0000000000000000 0 ffaa5599113377cc
|
||||||
|
|
||||||
|
-All done. Tested 6 different instructions
|
||||||
|
+All done. Tested 7 different instructions
|
||||||
|
ppc three parameter ops:
|
||||||
|
Test instruction group [ppc three parameter ops]
|
||||||
|
maddhd 0000000000000000, 0000000000000000, 0000000000000000 => 0000000000000000 (00000000)
|
||||||
|
@@ -172,7 +182,7 @@ maddld ffffffffffffffff, ffffffffffffffff, 0000000000000000 => 000000000000000
|
||||||
|
maddld ffffffffffffffff, ffffffffffffffff, 0000001cbe991def => 0000001cbe991df0 (00000000)
|
||||||
|
maddld ffffffffffffffff, ffffffffffffffff, ffffffffffffffff => 0000000000000000 (00000000)
|
||||||
|
|
||||||
|
-All done. Tested 9 different instructions
|
||||||
|
+All done. Tested 10 different instructions
|
||||||
|
ppc count zeros:
|
||||||
|
Test instruction group [ppc count zeros]
|
||||||
|
cnttzw 0000000000000000 => 0000000000000020
|
||||||
|
@@ -197,7 +207,7 @@ cnttzd. 0000001cbe991def => 0000000000000000 Expected cr0 to be zero, it is (200
|
||||||
|
cnttzd. ffffffffffffffff => 0000000000000000 Expected cr0 to be zero, it is (20000000)
|
||||||
|
|
||||||
|
|
||||||
|
-All done. Tested 13 different instructions
|
||||||
|
+All done. Tested 14 different instructions
|
||||||
|
ppc set boolean:
|
||||||
|
Test instruction group [ppc set boolean]
|
||||||
|
setb cr_field:0 cr_value::00000000 => 0000000000000000
|
||||||
|
@@ -265,7 +275,7 @@ setb cr_field:7 cr_value::00000005 => 0000000000000001
|
||||||
|
setb cr_field:7 cr_value::00000006 => 0000000000000001
|
||||||
|
setb cr_field:7 cr_value::00000007 => 0000000000000001
|
||||||
|
|
||||||
|
-All done. Tested 14 different instructions
|
||||||
|
+All done. Tested 15 different instructions
|
||||||
|
ppc char compare:
|
||||||
|
Test instruction group [ppc char compare]
|
||||||
|
cmprb l=0 0x61 (a) (cmpeq:0x5b427b625a417a61) (cmprb:src22(a-z) src21(A-Z)) => in range/found
|
||||||
|
@@ -1711,7 +1721,7 @@ cmpeqb 0x5d (]) (cmpeq:0x4642666245416561) (cmprb:src22(a-e) src21(A-E)) =>
|
||||||
|
cmpeqb 0x60 (`) (cmpeq:0x4642666245416561) (cmprb:src22(a-e) src21(A-E)) =>
|
||||||
|
cmpeqb 0x5f (_) (cmpeq:0x4642666245416561) (cmprb:src22(a-e) src21(A-E)) =>
|
||||||
|
|
||||||
|
-All done. Tested 17 different instructions
|
||||||
|
+All done. Tested 18 different instructions
|
||||||
|
ppc vector scalar move to/from:
|
||||||
|
Test instruction group [ppc vector scalar move to/from]
|
||||||
|
mfvsrld aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffffffffffffffff => aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa ffffffffffffffff
|
||||||
|
@@ -1777,7 +1787,7 @@ mtvsrws aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffaa5599113377cc => 113377cc113377cc
|
||||||
|
mtvsrws 5152535455565758 5152535455565758 0 ffaa5599113377cc => 113377cc113377cc 113377cc113377cc 0 ffaa5599113377cc
|
||||||
|
mtvsrws 0000000000000000 0000000000000000 0 ffaa5599113377cc => 113377cc113377cc 113377cc113377cc 0 ffaa5599113377cc
|
||||||
|
|
||||||
|
-All done. Tested 20 different instructions
|
||||||
|
+All done. Tested 21 different instructions
|
||||||
|
ppc dfp significance:
|
||||||
|
Test instruction group [ppc dfp significance]
|
||||||
|
dtstsfi significance(0x00) +Finite 0 * 10 ^ -12 (GT) (4)
|
||||||
|
@@ -1862,7 +1872,7 @@ dtstsfiq significance(0x20) -inf (GT) (4)
|
||||||
|
dtstsfiq significance(0x30) -inf (GT) (4)
|
||||||
|
dtstsfiq significance(0x3f) -inf (GT) (4)
|
||||||
|
|
||||||
|
-All done. Tested 22 different instructions
|
||||||
|
+All done. Tested 23 different instructions
|
||||||
|
ppc bcd misc:
|
||||||
|
Test instruction group [ppc bcd misc]
|
||||||
|
bcdadd. p0 xa:0000000000000000 000000000000000c (+|0) xb:0000000000000000 000000000000000c (+|0) => (EQ) (2) xt:0000000000000000 000000000000000c(+|0)
|
||||||
|
@@ -33338,12 +33348,12 @@ bcdcfsq. p1 xa:0000000000000000 000000000000000c (+|0) xb:9999999999999999 99999
|
||||||
|
bcdcfsq. p1 xa:0000000000000000 000000000000000c (+|0) xb:0000000000000000 000000001234567d ( - ) => (GT) (4) xt:0000000000000000 000000305419901f(+|0)
|
||||||
|
|
||||||
|
|
||||||
|
-All done. Tested 51 different instructions
|
||||||
|
+All done. Tested 52 different instructions
|
||||||
|
ppc noop misc:
|
||||||
|
Test instruction group [ppc noop misc]
|
||||||
|
wait =>
|
||||||
|
|
||||||
|
-All done. Tested 52 different instructions
|
||||||
|
+All done. Tested 53 different instructions
|
||||||
|
ppc addpc_misc:
|
||||||
|
Test instruction group [ppc addpc_misc]
|
||||||
|
addpcis 0000000000000000 => 0000000000000000
|
||||||
|
@@ -33380,7 +33390,7 @@ subpcis 000000000000000d => 0000000000000000
|
||||||
|
subpcis 000000000000000e => 0000000000000000
|
||||||
|
subpcis 000000000000000f => 0000000000000000
|
||||||
|
|
||||||
|
-All done. Tested 54 different instructions
|
||||||
|
+All done. Tested 55 different instructions
|
||||||
|
ppc mffpscr:
|
||||||
|
Test instruction group [ppc mffpscr]
|
||||||
|
mffsce => 000000000.000000
|
||||||
|
@@ -33395,7 +33405,7 @@ mffs => 000000000.000000
|
||||||
|
fpscr: f14
|
||||||
|
local_fpscr:
|
||||||
|
|
||||||
|
-All done. Tested 57 different instructions
|
||||||
|
+All done. Tested 58 different instructions
|
||||||
|
ppc mffpscr:
|
||||||
|
Test instruction group [ppc mffpscr]
|
||||||
|
mffscdrni 0 => 0X0
|
||||||
|
@@ -33426,4 +33436,4 @@ mffscrn f15 0X1 => 0X200000000
|
||||||
|
mffscrn f15 0X2 => 0X200000000
|
||||||
|
fpscr: f14 local_fpscr: 30-DRN1 RN-bit62
|
||||||
|
|
||||||
|
-All done. Tested 61 different instructions
|
||||||
|
+All done. Tested 62 different instructions
|
||||||
|
--
|
||||||
|
1.8.3.1
|
||||||
|
|
381
SOURCES/valgrind-3.14.0-ppc-frontend-new-IROps.patch
Normal file
381
SOURCES/valgrind-3.14.0-ppc-frontend-new-IROps.patch
Normal file
@ -0,0 +1,381 @@
|
|||||||
|
commit 81d9832226d6e3d1ee78ee3133189d7b520e7eea
|
||||||
|
Author: Julian Seward <jseward@acm.org>
|
||||||
|
Date: Tue Nov 20 11:36:53 2018 +0100
|
||||||
|
|
||||||
|
ppc front end: use new IROps added in 42719898.
|
||||||
|
|
||||||
|
This pertains to bug 386945.
|
||||||
|
|
||||||
|
VEX/priv/guest_ppc_toIR.c:
|
||||||
|
|
||||||
|
gen_POPCOUNT: use Iop_PopCount{32,64} where possible.
|
||||||
|
|
||||||
|
gen_vpopcntd_mode32: use Iop_PopCount32.
|
||||||
|
|
||||||
|
for cntlz{w,d}, use Iop_CtzNat{32,64}.
|
||||||
|
|
||||||
|
gen_byterev32: use Iop_Reverse8sIn32_x1 instead of lengthy sequence.
|
||||||
|
|
||||||
|
verbose_Clz32: remove (was unused anyway).
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
index cb1cae1..8977d4f 100644
|
||||||
|
--- a/VEX/priv/guest_ppc_toIR.c
|
||||||
|
+++ b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
@@ -1595,7 +1595,8 @@ typedef enum {
|
||||||
|
/* Generate an IR sequence to do a popcount operation on the supplied
|
||||||
|
IRTemp, and return a new IRTemp holding the result. 'ty' may be
|
||||||
|
Ity_I32 or Ity_I64 only. */
|
||||||
|
-static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_type )
|
||||||
|
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src,
|
||||||
|
+ _popcount_data_type data_type )
|
||||||
|
{
|
||||||
|
/* Do count across 2^data_type bits,
|
||||||
|
byte: data_type = 3
|
||||||
|
@@ -1611,6 +1612,22 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
|
||||||
|
|
||||||
|
vassert(ty == Ity_I64 || ty == Ity_I32);
|
||||||
|
|
||||||
|
+ // Use a single IROp in cases where we can.
|
||||||
|
+
|
||||||
|
+ if (ty == Ity_I64 && data_type == DWORD) {
|
||||||
|
+ IRTemp res = newTemp(Ity_I64);
|
||||||
|
+ assign(res, unop(Iop_PopCount64, mkexpr(src)));
|
||||||
|
+ return res;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (ty == Ity_I32 && data_type == WORD) {
|
||||||
|
+ IRTemp res = newTemp(Ity_I32);
|
||||||
|
+ assign(res, unop(Iop_PopCount32, mkexpr(src)));
|
||||||
|
+ return res;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // For the rest, we have to do it the slow way.
|
||||||
|
+
|
||||||
|
if (ty == Ity_I32) {
|
||||||
|
|
||||||
|
for (idx = 0; idx < WORD; idx++) {
|
||||||
|
@@ -1638,7 +1655,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
|
||||||
|
return nyu;
|
||||||
|
}
|
||||||
|
|
||||||
|
-// else, ty == Ity_I64
|
||||||
|
+ // else, ty == Ity_I64
|
||||||
|
vassert(mode64);
|
||||||
|
|
||||||
|
for (i = 0; i < DWORD; i++) {
|
||||||
|
@@ -1670,52 +1687,15 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
|
||||||
|
*/
|
||||||
|
static IRTemp gen_vpopcntd_mode32 ( IRTemp src1, IRTemp src2 )
|
||||||
|
{
|
||||||
|
- Int i, shift[6];
|
||||||
|
- IRTemp mask[6];
|
||||||
|
- IRTemp old = IRTemp_INVALID;
|
||||||
|
- IRTemp nyu1 = IRTemp_INVALID;
|
||||||
|
- IRTemp nyu2 = IRTemp_INVALID;
|
||||||
|
IRTemp retval = newTemp(Ity_I64);
|
||||||
|
|
||||||
|
vassert(!mode64);
|
||||||
|
|
||||||
|
- for (i = 0; i < WORD; i++) {
|
||||||
|
- mask[i] = newTemp(Ity_I32);
|
||||||
|
- shift[i] = 1 << i;
|
||||||
|
- }
|
||||||
|
- assign(mask[0], mkU32(0x55555555));
|
||||||
|
- assign(mask[1], mkU32(0x33333333));
|
||||||
|
- assign(mask[2], mkU32(0x0F0F0F0F));
|
||||||
|
- assign(mask[3], mkU32(0x00FF00FF));
|
||||||
|
- assign(mask[4], mkU32(0x0000FFFF));
|
||||||
|
- old = src1;
|
||||||
|
- for (i = 0; i < WORD; i++) {
|
||||||
|
- nyu1 = newTemp(Ity_I32);
|
||||||
|
- assign(nyu1,
|
||||||
|
- binop(Iop_Add32,
|
||||||
|
- binop(Iop_And32,
|
||||||
|
- mkexpr(old),
|
||||||
|
- mkexpr(mask[i])),
|
||||||
|
- binop(Iop_And32,
|
||||||
|
- binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
|
||||||
|
- mkexpr(mask[i]))));
|
||||||
|
- old = nyu1;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- old = src2;
|
||||||
|
- for (i = 0; i < WORD; i++) {
|
||||||
|
- nyu2 = newTemp(Ity_I32);
|
||||||
|
- assign(nyu2,
|
||||||
|
- binop(Iop_Add32,
|
||||||
|
- binop(Iop_And32,
|
||||||
|
- mkexpr(old),
|
||||||
|
- mkexpr(mask[i])),
|
||||||
|
- binop(Iop_And32,
|
||||||
|
- binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
|
||||||
|
- mkexpr(mask[i]))));
|
||||||
|
- old = nyu2;
|
||||||
|
- }
|
||||||
|
- assign(retval, unop(Iop_32Uto64, binop(Iop_Add32, mkexpr(nyu1), mkexpr(nyu2))));
|
||||||
|
+ assign(retval,
|
||||||
|
+ unop(Iop_32Uto64,
|
||||||
|
+ binop(Iop_Add32,
|
||||||
|
+ unop(Iop_PopCount32, mkexpr(src1)),
|
||||||
|
+ unop(Iop_PopCount32, mkexpr(src2)))));
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -5715,7 +5695,7 @@ static Bool dis_modulo_int ( UInt theInstr )
|
||||||
|
rA_address, rS_address);
|
||||||
|
|
||||||
|
assign( rS, getIReg( rS_address ) );
|
||||||
|
- assign( result, unop( Iop_Ctz32,
|
||||||
|
+ assign( result, unop( Iop_CtzNat32,
|
||||||
|
unop( Iop_64to32, mkexpr( rS ) ) ) );
|
||||||
|
assign( rA, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( result ) ) );
|
||||||
|
|
||||||
|
@@ -5746,7 +5726,7 @@ static Bool dis_modulo_int ( UInt theInstr )
|
||||||
|
rA_address, rS_address);
|
||||||
|
|
||||||
|
assign( rS, getIReg( rS_address ) );
|
||||||
|
- assign( rA, unop( Iop_Ctz64, mkexpr( rS ) ) );
|
||||||
|
+ assign( rA, unop( Iop_CtzNat64, mkexpr( rS ) ) );
|
||||||
|
|
||||||
|
if ( flag_rC == 1 )
|
||||||
|
set_CR0( mkexpr( rA ) );
|
||||||
|
@@ -6307,7 +6287,6 @@ static Bool dis_int_logic ( UInt theInstr )
|
||||||
|
IRTemp rS = newTemp(ty);
|
||||||
|
IRTemp rA = newTemp(ty);
|
||||||
|
IRTemp rB = newTemp(ty);
|
||||||
|
- IRExpr* irx;
|
||||||
|
Bool do_rc = False;
|
||||||
|
|
||||||
|
assign( rS, getIReg(rS_addr) );
|
||||||
|
@@ -6404,26 +6383,16 @@ static Bool dis_int_logic ( UInt theInstr )
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0x01A: { // cntlzw (Count Leading Zeros Word, PPC32 p371)
|
||||||
|
- IRExpr* lo32;
|
||||||
|
if (rB_addr!=0) {
|
||||||
|
vex_printf("dis_int_logic(ppc)(cntlzw,rB_addr)\n");
|
||||||
|
return False;
|
||||||
|
}
|
||||||
|
- DIP("cntlzw%s r%u,r%u\n",
|
||||||
|
- flag_rC ? ".":"", rA_addr, rS_addr);
|
||||||
|
+ DIP("cntlzw%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
|
||||||
|
|
||||||
|
// mode64: count in low word only
|
||||||
|
- lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
|
||||||
|
-
|
||||||
|
- // Iop_Clz32 undefined for arg==0, so deal with that case:
|
||||||
|
- irx = binop(Iop_CmpNE32, lo32, mkU32(0));
|
||||||
|
- assign(rA, mkWidenFrom32(ty,
|
||||||
|
- IRExpr_ITE( irx,
|
||||||
|
- unop(Iop_Clz32, lo32),
|
||||||
|
- mkU32(32)),
|
||||||
|
- False));
|
||||||
|
-
|
||||||
|
- // TODO: alternatively: assign(rA, verbose_Clz32(rS));
|
||||||
|
+ IRExpr* lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
|
||||||
|
+ IRExpr* res32 = unop(Iop_ClzNat32, lo32);
|
||||||
|
+ assign(rA, mode64 ? unop(Iop_32Uto64, res32) : res32);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -6521,14 +6490,8 @@ static Bool dis_int_logic ( UInt theInstr )
|
||||||
|
vex_printf("dis_int_logic(ppc)(cntlzd,rB_addr)\n");
|
||||||
|
return False;
|
||||||
|
}
|
||||||
|
- DIP("cntlzd%s r%u,r%u\n",
|
||||||
|
- flag_rC ? ".":"", rA_addr, rS_addr);
|
||||||
|
- // Iop_Clz64 undefined for arg==0, so deal with that case:
|
||||||
|
- irx = binop(Iop_CmpNE64, mkexpr(rS), mkU64(0));
|
||||||
|
- assign(rA, IRExpr_ITE( irx,
|
||||||
|
- unop(Iop_Clz64, mkexpr(rS)),
|
||||||
|
- mkU64(64) ));
|
||||||
|
- // TODO: alternatively: assign(rA, verbose_Clz64(rS));
|
||||||
|
+ DIP("cntlzd%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
|
||||||
|
+ assign(rA, unop(Iop_ClzNat64, mkexpr(rS)));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0x1FC: // cmpb (Power6: compare bytes)
|
||||||
|
@@ -6574,8 +6537,9 @@ static Bool dis_int_logic ( UInt theInstr )
|
||||||
|
putFReg( rS_addr, mkexpr(frA));
|
||||||
|
return True;
|
||||||
|
}
|
||||||
|
- case 0x1FA: // popcntd (population count doubleword
|
||||||
|
+ case 0x1FA: // popcntd (population count doubleword)
|
||||||
|
{
|
||||||
|
+ vassert(mode64);
|
||||||
|
DIP("popcntd r%u,r%u\n", rA_addr, rS_addr);
|
||||||
|
IRTemp result = gen_POPCOUNT(ty, rS, DWORD);
|
||||||
|
putIReg( rA_addr, mkexpr(result) );
|
||||||
|
@@ -9154,18 +9118,7 @@ static Bool dis_int_shift ( UInt theInstr )
|
||||||
|
static IRExpr* /* :: Ity_I32 */ gen_byterev32 ( IRTemp t )
|
||||||
|
{
|
||||||
|
vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32);
|
||||||
|
- return
|
||||||
|
- binop(Iop_Or32,
|
||||||
|
- binop(Iop_Shl32, mkexpr(t), mkU8(24)),
|
||||||
|
- binop(Iop_Or32,
|
||||||
|
- binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)),
|
||||||
|
- mkU32(0x00FF0000)),
|
||||||
|
- binop(Iop_Or32,
|
||||||
|
- binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)),
|
||||||
|
- mkU32(0x0000FF00)),
|
||||||
|
- binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(24)),
|
||||||
|
- mkU32(0x000000FF) )
|
||||||
|
- )));
|
||||||
|
+ return unop(Iop_Reverse8sIn32_x1, mkexpr(t));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Generates code to swap the byte order in the lower half of an Ity_I32,
|
||||||
|
@@ -9225,6 +9178,10 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
|
||||||
|
|
||||||
|
case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
|
||||||
|
{
|
||||||
|
+ // JRS FIXME:
|
||||||
|
+ // * is the host_endness conditional below actually necessary?
|
||||||
|
+ // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
|
||||||
|
+ // That would be a lot more efficient.
|
||||||
|
IRExpr * nextAddr;
|
||||||
|
IRTemp w3 = newTemp( Ity_I32 );
|
||||||
|
IRTemp w4 = newTemp( Ity_I32 );
|
||||||
|
@@ -17056,8 +17013,8 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
|
||||||
|
case 0x7C3: // vpopcntd
|
||||||
|
{
|
||||||
|
if (mode64) {
|
||||||
|
- /* Break vector into 64-bit double words and do the population count
|
||||||
|
- * on each double word.
|
||||||
|
+ /* Break vector into 64-bit double words and do the population
|
||||||
|
+ count on each double word.
|
||||||
|
*/
|
||||||
|
IRType ty = Ity_I64;
|
||||||
|
IRTemp bits0_63 = newTemp(Ity_I64);
|
||||||
|
@@ -17077,15 +17034,16 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
|
||||||
|
mkexpr( cnt_bits0_63 ) ) );
|
||||||
|
} else {
|
||||||
|
/* Break vector into 32-bit words and do the population count
|
||||||
|
- * on each doubleword.
|
||||||
|
+ on each 32-bit word.
|
||||||
|
*/
|
||||||
|
IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
|
||||||
|
bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
|
||||||
|
- IRTemp cnt_bits0_63 = newTemp(Ity_I64);
|
||||||
|
+ IRTemp cnt_bits0_63 = newTemp(Ity_I64);
|
||||||
|
IRTemp cnt_bits64_127 = newTemp(Ity_I64);
|
||||||
|
|
||||||
|
DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr);
|
||||||
|
- breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
|
||||||
|
+ breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95,
|
||||||
|
+ &bits32_63, &bits0_31 );
|
||||||
|
|
||||||
|
cnt_bits0_63 = gen_vpopcntd_mode32(bits0_31, bits32_63);
|
||||||
|
cnt_bits64_127 = gen_vpopcntd_mode32(bits64_95, bits96_127);
|
||||||
|
@@ -29103,10 +29061,12 @@ DisResult disInstr_PPC_WRK (
|
||||||
|
|
||||||
|
/* Miscellaneous ISA 2.06 instructions */
|
||||||
|
case 0x1FA: // popcntd
|
||||||
|
+ if (!mode64) goto decode_failure;
|
||||||
|
+ /* else fallthru */
|
||||||
|
case 0x17A: // popcntw
|
||||||
|
case 0x7A: // popcntb
|
||||||
|
- if (dis_int_logic( theInstr )) goto decode_success;
|
||||||
|
- goto decode_failure;
|
||||||
|
+ if (dis_int_logic( theInstr )) goto decode_success;
|
||||||
|
+ goto decode_failure;
|
||||||
|
|
||||||
|
case 0x0FC: // bpermd
|
||||||
|
if (!mode64) goto decode_failure;
|
||||||
|
@@ -29669,94 +29629,6 @@ DisResult disInstr_PPC ( IRSB* irsb_IN,
|
||||||
|
return dres;
|
||||||
|
}
|
||||||
|
|
||||||
|
-
|
||||||
|
-/*------------------------------------------------------------*/
|
||||||
|
-/*--- Unused stuff ---*/
|
||||||
|
-/*------------------------------------------------------------*/
|
||||||
|
-
|
||||||
|
-///* A potentially more memcheck-friendly implementation of Clz32, with
|
||||||
|
-// the boundary case Clz32(0) = 32, which is what ppc requires. */
|
||||||
|
-//
|
||||||
|
-//static IRExpr* /* :: Ity_I32 */ verbose_Clz32 ( IRTemp arg )
|
||||||
|
-//{
|
||||||
|
-// /* Welcome ... to SSA R Us. */
|
||||||
|
-// IRTemp n1 = newTemp(Ity_I32);
|
||||||
|
-// IRTemp n2 = newTemp(Ity_I32);
|
||||||
|
-// IRTemp n3 = newTemp(Ity_I32);
|
||||||
|
-// IRTemp n4 = newTemp(Ity_I32);
|
||||||
|
-// IRTemp n5 = newTemp(Ity_I32);
|
||||||
|
-// IRTemp n6 = newTemp(Ity_I32);
|
||||||
|
-// IRTemp n7 = newTemp(Ity_I32);
|
||||||
|
-// IRTemp n8 = newTemp(Ity_I32);
|
||||||
|
-// IRTemp n9 = newTemp(Ity_I32);
|
||||||
|
-// IRTemp n10 = newTemp(Ity_I32);
|
||||||
|
-// IRTemp n11 = newTemp(Ity_I32);
|
||||||
|
-// IRTemp n12 = newTemp(Ity_I32);
|
||||||
|
-//
|
||||||
|
-// /* First, propagate the most significant 1-bit into all lower
|
||||||
|
-// positions in the word. */
|
||||||
|
-// /* unsigned int clz ( unsigned int n )
|
||||||
|
-// {
|
||||||
|
-// n |= (n >> 1);
|
||||||
|
-// n |= (n >> 2);
|
||||||
|
-// n |= (n >> 4);
|
||||||
|
-// n |= (n >> 8);
|
||||||
|
-// n |= (n >> 16);
|
||||||
|
-// return bitcount(~n);
|
||||||
|
-// }
|
||||||
|
-// */
|
||||||
|
-// assign(n1, mkexpr(arg));
|
||||||
|
-// assign(n2, binop(Iop_Or32, mkexpr(n1), binop(Iop_Shr32, mkexpr(n1), mkU8(1))));
|
||||||
|
-// assign(n3, binop(Iop_Or32, mkexpr(n2), binop(Iop_Shr32, mkexpr(n2), mkU8(2))));
|
||||||
|
-// assign(n4, binop(Iop_Or32, mkexpr(n3), binop(Iop_Shr32, mkexpr(n3), mkU8(4))));
|
||||||
|
-// assign(n5, binop(Iop_Or32, mkexpr(n4), binop(Iop_Shr32, mkexpr(n4), mkU8(8))));
|
||||||
|
-// assign(n6, binop(Iop_Or32, mkexpr(n5), binop(Iop_Shr32, mkexpr(n5), mkU8(16))));
|
||||||
|
-// /* This gives a word of the form 0---01---1. Now invert it, giving
|
||||||
|
-// a word of the form 1---10---0, then do a population-count idiom
|
||||||
|
-// (to count the 1s, which is the number of leading zeroes, or 32
|
||||||
|
-// if the original word was 0. */
|
||||||
|
-// assign(n7, unop(Iop_Not32, mkexpr(n6)));
|
||||||
|
-//
|
||||||
|
-// /* unsigned int bitcount ( unsigned int n )
|
||||||
|
-// {
|
||||||
|
-// n = n - ((n >> 1) & 0x55555555);
|
||||||
|
-// n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
|
||||||
|
-// n = (n + (n >> 4)) & 0x0F0F0F0F;
|
||||||
|
-// n = n + (n >> 8);
|
||||||
|
-// n = (n + (n >> 16)) & 0x3F;
|
||||||
|
-// return n;
|
||||||
|
-// }
|
||||||
|
-// */
|
||||||
|
-// assign(n8,
|
||||||
|
-// binop(Iop_Sub32,
|
||||||
|
-// mkexpr(n7),
|
||||||
|
-// binop(Iop_And32,
|
||||||
|
-// binop(Iop_Shr32, mkexpr(n7), mkU8(1)),
|
||||||
|
-// mkU32(0x55555555))));
|
||||||
|
-// assign(n9,
|
||||||
|
-// binop(Iop_Add32,
|
||||||
|
-// binop(Iop_And32, mkexpr(n8), mkU32(0x33333333)),
|
||||||
|
-// binop(Iop_And32,
|
||||||
|
-// binop(Iop_Shr32, mkexpr(n8), mkU8(2)),
|
||||||
|
-// mkU32(0x33333333))));
|
||||||
|
-// assign(n10,
|
||||||
|
-// binop(Iop_And32,
|
||||||
|
-// binop(Iop_Add32,
|
||||||
|
-// mkexpr(n9),
|
||||||
|
-// binop(Iop_Shr32, mkexpr(n9), mkU8(4))),
|
||||||
|
-// mkU32(0x0F0F0F0F)));
|
||||||
|
-// assign(n11,
|
||||||
|
-// binop(Iop_Add32,
|
||||||
|
-// mkexpr(n10),
|
||||||
|
-// binop(Iop_Shr32, mkexpr(n10), mkU8(8))));
|
||||||
|
-// assign(n12,
|
||||||
|
-// binop(Iop_Add32,
|
||||||
|
-// mkexpr(n11),
|
||||||
|
-// binop(Iop_Shr32, mkexpr(n11), mkU8(16))));
|
||||||
|
-// return
|
||||||
|
-// binop(Iop_And32, mkexpr(n12), mkU32(0x3F));
|
||||||
|
-//}
|
||||||
|
-
|
||||||
|
/*--------------------------------------------------------------------*/
|
||||||
|
/*--- end guest_ppc_toIR.c ---*/
|
||||||
|
/*--------------------------------------------------------------------*/
|
257
SOURCES/valgrind-3.14.0-ppc-instr-new-IROps.patch
Normal file
257
SOURCES/valgrind-3.14.0-ppc-instr-new-IROps.patch
Normal file
@ -0,0 +1,257 @@
|
|||||||
|
commit 97d336b79e36f6c99d8b07f49ebc9b780e6df84e
|
||||||
|
Author: Julian Seward <jseward@acm.org>
|
||||||
|
Date: Tue Nov 20 11:07:37 2018 +0100
|
||||||
|
|
||||||
|
Add ppc host-side isel and instruction support for IROps added in previous commit.
|
||||||
|
|
||||||
|
VEX/priv/host_ppc_defs.c, VEX/priv/host_ppc_defs.h:
|
||||||
|
|
||||||
|
Dont emit cnttz{w,d}. We may need them on a target which doesn't support
|
||||||
|
them. Instead we can generate a fairly reasonable alternative sequence with
|
||||||
|
cntlz{w,d} instead.
|
||||||
|
|
||||||
|
Add support for emitting popcnt{w,d}.
|
||||||
|
|
||||||
|
VEX/priv/host_ppc_isel.c
|
||||||
|
|
||||||
|
Add support for: Iop_ClzNat32 Iop_ClzNat64
|
||||||
|
|
||||||
|
Redo support for: Iop_Ctz{32,64} and their Nat equivalents, so as to not use
|
||||||
|
cnttz{w,d}, as mentioned above.
|
||||||
|
|
||||||
|
Add support for: Iop_PopCount64 Iop_PopCount32 Iop_Reverse8sIn32_x1
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c
|
||||||
|
index b073c1d..f4b52e4 100644
|
||||||
|
--- a/VEX/priv/host_ppc_defs.c
|
||||||
|
+++ b/VEX/priv/host_ppc_defs.c
|
||||||
|
@@ -501,9 +501,9 @@ const HChar* showPPCUnaryOp ( PPCUnaryOp op ) {
|
||||||
|
case Pun_NEG: return "neg";
|
||||||
|
case Pun_CLZ32: return "cntlzw";
|
||||||
|
case Pun_CLZ64: return "cntlzd";
|
||||||
|
- case Pun_CTZ32: return "cnttzw";
|
||||||
|
- case Pun_CTZ64: return "cnttzd";
|
||||||
|
case Pun_EXTSW: return "extsw";
|
||||||
|
+ case Pun_POP32: return "popcntw";
|
||||||
|
+ case Pun_POP64: return "popcntd";
|
||||||
|
default: vpanic("showPPCUnaryOp");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -4265,20 +4265,19 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
|
||||||
|
vassert(mode64);
|
||||||
|
p = mkFormX(p, 31, r_src, r_dst, 0, 58, 0, endness_host);
|
||||||
|
break;
|
||||||
|
- case Pun_CTZ32: // cnttzw r_dst, r_src
|
||||||
|
- /* Note oder of src and dst is backwards from normal */
|
||||||
|
- p = mkFormX(p, 31, r_src, r_dst, 0, 538, 0, endness_host);
|
||||||
|
- break;
|
||||||
|
- case Pun_CTZ64: // cnttzd r_dst, r_src
|
||||||
|
- /* Note oder of src and dst is backwards from normal */
|
||||||
|
- vassert(mode64);
|
||||||
|
- p = mkFormX(p, 31, r_src, r_dst, 0, 570, 0, endness_host);
|
||||||
|
- break;
|
||||||
|
case Pun_EXTSW: // extsw r_dst, r_src
|
||||||
|
vassert(mode64);
|
||||||
|
p = mkFormX(p, 31, r_src, r_dst, 0, 986, 0, endness_host);
|
||||||
|
break;
|
||||||
|
- default: goto bad;
|
||||||
|
+ case Pun_POP32: // popcntw r_dst, r_src
|
||||||
|
+ p = mkFormX(p, 31, r_src, r_dst, 0, 378, 0, endness_host);
|
||||||
|
+ break;
|
||||||
|
+ case Pun_POP64: // popcntd r_dst, r_src
|
||||||
|
+ vassert(mode64);
|
||||||
|
+ p = mkFormX(p, 31, r_src, r_dst, 0, 506, 0, endness_host);
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ goto bad;
|
||||||
|
}
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h
|
||||||
|
index 17baff5..321fba9 100644
|
||||||
|
--- a/VEX/priv/host_ppc_defs.h
|
||||||
|
+++ b/VEX/priv/host_ppc_defs.h
|
||||||
|
@@ -291,9 +291,9 @@ typedef
|
||||||
|
Pun_NOT,
|
||||||
|
Pun_CLZ32,
|
||||||
|
Pun_CLZ64,
|
||||||
|
- Pun_CTZ32,
|
||||||
|
- Pun_CTZ64,
|
||||||
|
- Pun_EXTSW
|
||||||
|
+ Pun_EXTSW,
|
||||||
|
+ Pun_POP32, // popcntw
|
||||||
|
+ Pun_POP64 // popcntd
|
||||||
|
}
|
||||||
|
PPCUnaryOp;
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
|
||||||
|
index 6bdb5f7..5242176 100644
|
||||||
|
--- a/VEX/priv/host_ppc_isel.c
|
||||||
|
+++ b/VEX/priv/host_ppc_isel.c
|
||||||
|
@@ -2065,12 +2065,15 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
|
||||||
|
return r_dst;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
- case Iop_Clz32:
|
||||||
|
- case Iop_Clz64: {
|
||||||
|
+
|
||||||
|
+ case Iop_Clz32: case Iop_ClzNat32:
|
||||||
|
+ case Iop_Clz64: case Iop_ClzNat64: {
|
||||||
|
+ // cntlz is available even in the most basic (earliest) ppc
|
||||||
|
+ // variants, so it's safe to generate it unconditionally.
|
||||||
|
HReg r_src, r_dst;
|
||||||
|
- PPCUnaryOp op_clz = (op_unop == Iop_Clz32) ? Pun_CLZ32 :
|
||||||
|
- Pun_CLZ64;
|
||||||
|
- if (op_unop == Iop_Clz64 && !mode64)
|
||||||
|
+ PPCUnaryOp op_clz = (op_unop == Iop_Clz32 || op_unop == Iop_ClzNat32)
|
||||||
|
+ ? Pun_CLZ32 : Pun_CLZ64;
|
||||||
|
+ if ((op_unop == Iop_Clz64 || op_unop == Iop_ClzNat64) && !mode64)
|
||||||
|
goto irreducible;
|
||||||
|
/* Count leading zeroes. */
|
||||||
|
r_dst = newVRegI(env);
|
||||||
|
@@ -2079,18 +2082,133 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
|
||||||
|
return r_dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
- case Iop_Ctz32:
|
||||||
|
- case Iop_Ctz64: {
|
||||||
|
- HReg r_src, r_dst;
|
||||||
|
- PPCUnaryOp op_clz = (op_unop == Iop_Ctz32) ? Pun_CTZ32 :
|
||||||
|
- Pun_CTZ64;
|
||||||
|
- if (op_unop == Iop_Ctz64 && !mode64)
|
||||||
|
- goto irreducible;
|
||||||
|
- /* Count trailing zeroes. */
|
||||||
|
- r_dst = newVRegI(env);
|
||||||
|
- r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
|
||||||
|
- addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
|
||||||
|
- return r_dst;
|
||||||
|
+ //case Iop_Ctz32:
|
||||||
|
+ case Iop_CtzNat32:
|
||||||
|
+ //case Iop_Ctz64:
|
||||||
|
+ case Iop_CtzNat64:
|
||||||
|
+ {
|
||||||
|
+ // Generate code using Clz, because we can't assume the host has
|
||||||
|
+ // Ctz. In particular, part of the fix for bug 386945 involves
|
||||||
|
+ // creating a Ctz in ir_opt.c from smaller fragments.
|
||||||
|
+ PPCUnaryOp op_clz = Pun_CLZ64;
|
||||||
|
+ Int WS = 64;
|
||||||
|
+ if (op_unop == Iop_Ctz32 || op_unop == Iop_CtzNat32) {
|
||||||
|
+ op_clz = Pun_CLZ32;
|
||||||
|
+ WS = 32;
|
||||||
|
+ }
|
||||||
|
+ /* Compute ctz(arg) = wordsize - clz(~arg & (arg - 1)), thusly:
|
||||||
|
+ t1 = arg - 1
|
||||||
|
+ t2 = not arg
|
||||||
|
+ t2 = t2 & t1
|
||||||
|
+ t2 = clz t2
|
||||||
|
+ t1 = WS
|
||||||
|
+ t2 = t1 - t2
|
||||||
|
+ // result in t2
|
||||||
|
+ */
|
||||||
|
+ HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
|
||||||
|
+ HReg t1 = newVRegI(env);
|
||||||
|
+ HReg t2 = newVRegI(env);
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_SUB, t1, arg, PPCRH_Imm(True, 1)));
|
||||||
|
+ addInstr(env, PPCInstr_Unary(Pun_NOT, t2, arg));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_AND, t2, t2, PPCRH_Reg(t1)));
|
||||||
|
+ addInstr(env, PPCInstr_Unary(op_clz, t2, t2));
|
||||||
|
+ addInstr(env, PPCInstr_LI(t1, WS, False/*!64-bit imm*/));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_SUB, t2, t1, PPCRH_Reg(t2)));
|
||||||
|
+ return t2;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ case Iop_PopCount64: {
|
||||||
|
+ // popcnt{x,d} is only available in later arch revs (ISA 3.0,
|
||||||
|
+ // maybe) so it's not really correct to emit it here without a caps
|
||||||
|
+ // check for the host.
|
||||||
|
+ if (mode64) {
|
||||||
|
+ HReg r_dst = newVRegI(env);
|
||||||
|
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
|
||||||
|
+ addInstr(env, PPCInstr_Unary(Pun_POP64, r_dst, r_src));
|
||||||
|
+ return r_dst;
|
||||||
|
+ }
|
||||||
|
+ // We don't expect to be required to handle this in 32-bit mode.
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ case Iop_PopCount32: {
|
||||||
|
+ // Similar comment as for Ctz just above applies -- we really
|
||||||
|
+ // should have a caps check here.
|
||||||
|
+
|
||||||
|
+ HReg r_dst = newVRegI(env);
|
||||||
|
+ // This actually generates popcntw, which in 64 bit mode does a
|
||||||
|
+ // 32-bit count individually for both low and high halves of the
|
||||||
|
+ // word. Per the comment at the top of iselIntExpr_R, in the 64
|
||||||
|
+ // bit mode case, the user of this result is required to ignore
|
||||||
|
+ // the upper 32 bits of the result. In 32 bit mode this is all
|
||||||
|
+ // moot. It is however unclear from the PowerISA 3.0 docs that
|
||||||
|
+ // the instruction exists in 32 bit mode; however our own front
|
||||||
|
+ // end (guest_ppc_toIR.c) accepts it, so I guess it does exist.
|
||||||
|
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
|
||||||
|
+ addInstr(env, PPCInstr_Unary(Pun_POP32, r_dst, r_src));
|
||||||
|
+ return r_dst;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ case Iop_Reverse8sIn32_x1: {
|
||||||
|
+ // A bit of a mouthful, but simply .. 32-bit byte swap.
|
||||||
|
+ // This is pretty rubbish code. We could do vastly better if
|
||||||
|
+ // rotates, and better, rotate-inserts, were allowed. Note that
|
||||||
|
+ // even on a 64 bit target, the right shifts must be done as 32-bit
|
||||||
|
+ // so as to introduce zero bits in the right places. So it seems
|
||||||
|
+ // simplest to do the whole sequence in 32-bit insns.
|
||||||
|
+ /*
|
||||||
|
+ r = <argument> // working temporary, initial byte order ABCD
|
||||||
|
+ Mask = 00FF00FF
|
||||||
|
+ nMask = not Mask
|
||||||
|
+ tHi = and r, Mask
|
||||||
|
+ tHi = shl tHi, 8
|
||||||
|
+ tLo = and r, nMask
|
||||||
|
+ tLo = shr tLo, 8
|
||||||
|
+ r = or tHi, tLo // now r has order BADC
|
||||||
|
+ and repeat for 16 bit chunks ..
|
||||||
|
+ Mask = 0000FFFF
|
||||||
|
+ nMask = not Mask
|
||||||
|
+ tHi = and r, Mask
|
||||||
|
+ tHi = shl tHi, 16
|
||||||
|
+ tLo = and r, nMask
|
||||||
|
+ tLo = shr tLo, 16
|
||||||
|
+ r = or tHi, tLo // now r has order DCBA
|
||||||
|
+ */
|
||||||
|
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
|
||||||
|
+ HReg rr = newVRegI(env);
|
||||||
|
+ HReg rMask = newVRegI(env);
|
||||||
|
+ HReg rnMask = newVRegI(env);
|
||||||
|
+ HReg rtHi = newVRegI(env);
|
||||||
|
+ HReg rtLo = newVRegI(env);
|
||||||
|
+ // Copy r_src since we need to modify it
|
||||||
|
+ addInstr(env, mk_iMOVds_RR(rr, r_src));
|
||||||
|
+ // Swap within 16-bit lanes
|
||||||
|
+ addInstr(env, PPCInstr_LI(rMask, 0x00FF00FFULL,
|
||||||
|
+ False/* !64bit imm*/));
|
||||||
|
+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
|
||||||
|
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
|
||||||
|
+ rtHi, rtHi,
|
||||||
|
+ PPCRH_Imm(False/*!signed imm*/, 8)));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
|
||||||
|
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
|
||||||
|
+ rtLo, rtLo,
|
||||||
|
+ PPCRH_Imm(False/*!signed imm*/, 8)));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
|
||||||
|
+ // And now swap the two 16-bit chunks
|
||||||
|
+ addInstr(env, PPCInstr_LI(rMask, 0x0000FFFFULL,
|
||||||
|
+ False/* !64bit imm*/));
|
||||||
|
+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
|
||||||
|
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
|
||||||
|
+ rtHi, rtHi,
|
||||||
|
+ PPCRH_Imm(False/*!signed imm*/, 16)));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
|
||||||
|
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
|
||||||
|
+ rtLo, rtLo,
|
||||||
|
+ PPCRH_Imm(False/*!signed imm*/, 16)));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
|
||||||
|
+ return rr;
|
||||||
|
}
|
||||||
|
|
||||||
|
case Iop_Left8:
|
130
SOURCES/valgrind-3.14.0-ppc64-ldbrx.patch
Normal file
130
SOURCES/valgrind-3.14.0-ppc64-ldbrx.patch
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
commit 7bdd6731f8337fd57bf91772aa1917e44239d7c2
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Fri Dec 7 10:42:22 2018 -0500
|
||||||
|
|
||||||
|
Implement ppc64 ldbrx as 64-bit load and Iop_Reverse8sIn64_x1.
|
||||||
|
|
||||||
|
This makes it possible for memcheck to analyse the new gcc strcmp
|
||||||
|
inlined code correctly even if the ldbrx load is partly beyond an
|
||||||
|
addressable block.
|
||||||
|
|
||||||
|
Partially resolves bug 386945.
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
index 8977d4f..a81dace 100644
|
||||||
|
--- a/VEX/priv/guest_ppc_toIR.c
|
||||||
|
+++ b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
@@ -9178,24 +9178,28 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
|
||||||
|
|
||||||
|
case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
|
||||||
|
{
|
||||||
|
- // JRS FIXME:
|
||||||
|
- // * is the host_endness conditional below actually necessary?
|
||||||
|
- // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
|
||||||
|
- // That would be a lot more efficient.
|
||||||
|
- IRExpr * nextAddr;
|
||||||
|
- IRTemp w3 = newTemp( Ity_I32 );
|
||||||
|
- IRTemp w4 = newTemp( Ity_I32 );
|
||||||
|
- DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
|
||||||
|
- assign( w1, load( Ity_I32, mkexpr( EA ) ) );
|
||||||
|
- assign( w2, gen_byterev32( w1 ) );
|
||||||
|
- nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
|
||||||
|
- ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );
|
||||||
|
- assign( w3, load( Ity_I32, nextAddr ) );
|
||||||
|
- assign( w4, gen_byterev32( w3 ) );
|
||||||
|
- if (host_endness == VexEndnessLE)
|
||||||
|
- putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w2 ), mkexpr( w4 ) ) );
|
||||||
|
+ /* Caller makes sure we are only called in mode64. */
|
||||||
|
+
|
||||||
|
+ /* If we supported swapping LE/BE loads in the backend then we could
|
||||||
|
+ just load the value with the bytes reversed by doing a BE load
|
||||||
|
+ on an LE machine and a LE load on a BE machine.
|
||||||
|
+
|
||||||
|
+ IRTemp dw1 = newTemp(Ity_I64);
|
||||||
|
+ if (host_endness == VexEndnessBE)
|
||||||
|
+ assign( dw1, IRExpr_Load(Iend_LE, Ity_I64, mkexpr(EA)));
|
||||||
|
else
|
||||||
|
- putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
|
||||||
|
+ assign( dw1, IRExpr_Load(Iend_BE, Ity_I64, mkexpr(EA)));
|
||||||
|
+ putIReg( rD_addr, mkexpr(dw1) );
|
||||||
|
+
|
||||||
|
+ But since we currently don't we load the value as is and then
|
||||||
|
+ switch it around with Iop_Reverse8sIn64_x1. */
|
||||||
|
+
|
||||||
|
+ IRTemp dw1 = newTemp(Ity_I64);
|
||||||
|
+ IRTemp dw2 = newTemp(Ity_I64);
|
||||||
|
+ DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
|
||||||
|
+ assign( dw1, load(Ity_I64, mkexpr(EA)) );
|
||||||
|
+ assign( dw2, unop(Iop_Reverse8sIn64_x1, mkexpr(dw1)) );
|
||||||
|
+ putIReg( rD_addr, mkexpr(dw2) );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
|
||||||
|
index 750cf8d..4fc3eb5 100644
|
||||||
|
--- a/VEX/priv/host_ppc_isel.c
|
||||||
|
+++ b/VEX/priv/host_ppc_isel.c
|
||||||
|
@@ -2210,6 +2210,63 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
|
||||||
|
return rr;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ case Iop_Reverse8sIn64_x1: {
|
||||||
|
+ /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
|
||||||
|
+ Can only be used in 64bit mode. */
|
||||||
|
+ vassert (mode64);
|
||||||
|
+
|
||||||
|
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
|
||||||
|
+ HReg rr = newVRegI(env);
|
||||||
|
+ HReg rMask = newVRegI(env);
|
||||||
|
+ HReg rnMask = newVRegI(env);
|
||||||
|
+ HReg rtHi = newVRegI(env);
|
||||||
|
+ HReg rtLo = newVRegI(env);
|
||||||
|
+
|
||||||
|
+ // Copy r_src since we need to modify it
|
||||||
|
+ addInstr(env, mk_iMOVds_RR(rr, r_src));
|
||||||
|
+
|
||||||
|
+ // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
|
||||||
|
+ addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
|
||||||
|
+ True/* 64bit imm*/));
|
||||||
|
+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
|
||||||
|
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
|
||||||
|
+ rtHi, rtHi,
|
||||||
|
+ PPCRH_Imm(False/*!signed imm*/, 8)));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
|
||||||
|
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
|
||||||
|
+ rtLo, rtLo,
|
||||||
|
+ PPCRH_Imm(False/*!signed imm*/, 8)));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
|
||||||
|
+
|
||||||
|
+ // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
|
||||||
|
+ addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
|
||||||
|
+ True/* !64bit imm*/));
|
||||||
|
+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
|
||||||
|
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
|
||||||
|
+ rtHi, rtHi,
|
||||||
|
+ PPCRH_Imm(False/*!signed imm*/, 16)));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
|
||||||
|
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
|
||||||
|
+ rtLo, rtLo,
|
||||||
|
+ PPCRH_Imm(False/*!signed imm*/, 16)));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
|
||||||
|
+
|
||||||
|
+ // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
|
||||||
|
+ /* We don't need to mask anymore, just two more shifts and an or. */
|
||||||
|
+ addInstr(env, mk_iMOVds_RR(rtLo, rr));
|
||||||
|
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
|
||||||
|
+ rtLo, rtLo,
|
||||||
|
+ PPCRH_Imm(False/*!signed imm*/, 32)));
|
||||||
|
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
|
||||||
|
+ rr, rr,
|
||||||
|
+ PPCRH_Imm(False/*!signed imm*/, 32)));
|
||||||
|
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
|
||||||
|
+
|
||||||
|
+ return rr;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
case Iop_Left8:
|
||||||
|
case Iop_Left16:
|
||||||
|
case Iop_Left32:
|
88
SOURCES/valgrind-3.14.0-ppc64-lxvb16x.patch
Normal file
88
SOURCES/valgrind-3.14.0-ppc64-lxvb16x.patch
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
commit 5c00e04a1b61475a7f731f8cfede114201815e0a
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Sun Dec 9 23:25:05 2018 +0100
|
||||||
|
|
||||||
|
Implement ppc64 lxvb16x as 128-bit vector load with reversed double words.
|
||||||
|
|
||||||
|
This makes it possible for memcheck to know which part of the 128bit
|
||||||
|
vector is defined, even if the load is partly beyond an addressable block.
|
||||||
|
|
||||||
|
Partially resolves bug 386945.
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
index 7af4973..ec2f90a 100644
|
||||||
|
--- a/VEX/priv/guest_ppc_toIR.c
|
||||||
|
+++ b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
@@ -20702,54 +20702,29 @@ dis_vx_load ( UInt theInstr )
|
||||||
|
{
|
||||||
|
DIP("lxvb16x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
|
||||||
|
|
||||||
|
- IRTemp byte[16];
|
||||||
|
- int i;
|
||||||
|
- UInt ea_off = 0;
|
||||||
|
- IRExpr* irx_addr;
|
||||||
|
- IRTemp tmp_low[9];
|
||||||
|
- IRTemp tmp_hi[9];
|
||||||
|
+ /* The result of lxvb16x should be the same on big and little
|
||||||
|
+ endian systems. We do a host load, then reverse the bytes in
|
||||||
|
+ the double words. If the host load was little endian we swap
|
||||||
|
+ them around again. */
|
||||||
|
|
||||||
|
- tmp_low[0] = newTemp( Ity_I64 );
|
||||||
|
- tmp_hi[0] = newTemp( Ity_I64 );
|
||||||
|
- assign( tmp_low[0], mkU64( 0 ) );
|
||||||
|
- assign( tmp_hi[0], mkU64( 0 ) );
|
||||||
|
-
|
||||||
|
- for ( i = 0; i < 8; i++ ) {
|
||||||
|
- byte[i] = newTemp( Ity_I64 );
|
||||||
|
- tmp_low[i+1] = newTemp( Ity_I64 );
|
||||||
|
-
|
||||||
|
- irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
|
||||||
|
- ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
|
||||||
|
- ea_off += 1;
|
||||||
|
-
|
||||||
|
- assign( byte[i], binop( Iop_Shl64,
|
||||||
|
- unop( Iop_8Uto64,
|
||||||
|
- load( Ity_I8, irx_addr ) ),
|
||||||
|
- mkU8( 8 * ( 7 - i ) ) ) );
|
||||||
|
+ IRTemp high = newTemp(Ity_I64);
|
||||||
|
+ IRTemp high_rev = newTemp(Ity_I64);
|
||||||
|
+ IRTemp low = newTemp(Ity_I64);
|
||||||
|
+ IRTemp low_rev = newTemp(Ity_I64);
|
||||||
|
|
||||||
|
- assign( tmp_low[i+1],
|
||||||
|
- binop( Iop_Or64,
|
||||||
|
- mkexpr( byte[i] ), mkexpr( tmp_low[i] ) ) );
|
||||||
|
- }
|
||||||
|
+ IRExpr *t128 = load( Ity_V128, mkexpr( EA ) );
|
||||||
|
|
||||||
|
- for ( i = 0; i < 8; i++ ) {
|
||||||
|
- byte[i + 8] = newTemp( Ity_I64 );
|
||||||
|
- tmp_hi[i+1] = newTemp( Ity_I64 );
|
||||||
|
+ assign( high, unop(Iop_V128HIto64, t128) );
|
||||||
|
+ assign( high_rev, unop(Iop_Reverse8sIn64_x1, mkexpr(high)) );
|
||||||
|
+ assign( low, unop(Iop_V128to64, t128) );
|
||||||
|
+ assign( low_rev, unop(Iop_Reverse8sIn64_x1, mkexpr(low)) );
|
||||||
|
|
||||||
|
- irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
|
||||||
|
- ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
|
||||||
|
- ea_off += 1;
|
||||||
|
+ if (host_endness == VexEndnessLE)
|
||||||
|
+ t128 = binop( Iop_64HLtoV128, mkexpr (low_rev), mkexpr (high_rev) );
|
||||||
|
+ else
|
||||||
|
+ t128 = binop( Iop_64HLtoV128, mkexpr (high_rev), mkexpr (low_rev) );
|
||||||
|
|
||||||
|
- assign( byte[i+8], binop( Iop_Shl64,
|
||||||
|
- unop( Iop_8Uto64,
|
||||||
|
- load( Ity_I8, irx_addr ) ),
|
||||||
|
- mkU8( 8 * ( 7 - i ) ) ) );
|
||||||
|
- assign( tmp_hi[i+1], binop( Iop_Or64,
|
||||||
|
- mkexpr( byte[i+8] ),
|
||||||
|
- mkexpr( tmp_hi[i] ) ) );
|
||||||
|
- }
|
||||||
|
- putVSReg( XT, binop( Iop_64HLtoV128,
|
||||||
|
- mkexpr( tmp_low[8] ), mkexpr( tmp_hi[8] ) ) );
|
||||||
|
+ putVSReg( XT, t128 );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
47
SOURCES/valgrind-3.14.0-ppc64-lxvd2x.patch
Normal file
47
SOURCES/valgrind-3.14.0-ppc64-lxvd2x.patch
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
commit b7d65cab4f3e9a6f66a496e723e53ed736c4d2e7
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Sun Dec 9 00:55:42 2018 +0100
|
||||||
|
|
||||||
|
Implement ppc64 lxvd2x as 128-bit load with double word swap for ppc64le.
|
||||||
|
|
||||||
|
This makes it possible for memcheck to know which part of the 128bit
|
||||||
|
vector is defined, even if the load is partly beyond an addressable block.
|
||||||
|
|
||||||
|
Partially resolves bug 386945.
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
index a81dace..7af4973 100644
|
||||||
|
--- a/VEX/priv/guest_ppc_toIR.c
|
||||||
|
+++ b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
@@ -20590,16 +20590,22 @@ dis_vx_load ( UInt theInstr )
|
||||||
|
}
|
||||||
|
case 0x34C: // lxvd2x
|
||||||
|
{
|
||||||
|
- IROp addOp = ty == Ity_I64 ? Iop_Add64 : Iop_Add32;
|
||||||
|
- IRExpr * high, *low;
|
||||||
|
- ULong ea_off = 8;
|
||||||
|
- IRExpr* high_addr;
|
||||||
|
+ IRExpr *t128;
|
||||||
|
DIP("lxvd2x %d,r%u,r%u\n", XT, rA_addr, rB_addr);
|
||||||
|
- high = load( Ity_I64, mkexpr( EA ) );
|
||||||
|
- high_addr = binop( addOp, mkexpr( EA ), ty == Ity_I64 ? mkU64( ea_off )
|
||||||
|
- : mkU32( ea_off ) );
|
||||||
|
- low = load( Ity_I64, high_addr );
|
||||||
|
- putVSReg( XT, binop( Iop_64HLtoV128, high, low ) );
|
||||||
|
+ t128 = load( Ity_V128, mkexpr( EA ) );
|
||||||
|
+
|
||||||
|
+ /* The data in the vec register should be in big endian order.
|
||||||
|
+ So if we just did a little endian load then swap around the
|
||||||
|
+ high and low double words. */
|
||||||
|
+ if (host_endness == VexEndnessLE) {
|
||||||
|
+ IRTemp high = newTemp(Ity_I64);
|
||||||
|
+ IRTemp low = newTemp(Ity_I64);
|
||||||
|
+ assign( high, unop(Iop_V128HIto64, t128) );
|
||||||
|
+ assign( low, unop(Iop_V128to64, t128) );
|
||||||
|
+ t128 = binop( Iop_64HLtoV128, mkexpr (low), mkexpr (high) );
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ putVSReg( XT, t128 );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 0x14C: // lxvdsx
|
111
SOURCES/valgrind-3.14.0-ppc64-ptrace.patch
Normal file
111
SOURCES/valgrind-3.14.0-ppc64-ptrace.patch
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
commit 3967a99c26e8b314634a6b1fd8927cbb2bb5d060
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Wed Dec 12 14:11:29 2018 +0100
|
||||||
|
|
||||||
|
Implement minimal ptrace support for ppc64[le]-linux.
|
||||||
|
|
||||||
|
diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c
|
||||||
|
index 6549dd1..0fdcc8e 100644
|
||||||
|
--- a/coregrind/m_syswrap/syswrap-ppc64-linux.c
|
||||||
|
+++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c
|
||||||
|
@@ -388,6 +388,7 @@ DECL_TEMPLATE(ppc64_linux, sys_mmap);
|
||||||
|
//zz DECL_TEMPLATE(ppc64_linux, sys_sigreturn);
|
||||||
|
DECL_TEMPLATE(ppc64_linux, sys_rt_sigreturn);
|
||||||
|
DECL_TEMPLATE(ppc64_linux, sys_fadvise64);
|
||||||
|
+DECL_TEMPLATE(ppc64_linux, sys_ptrace);
|
||||||
|
|
||||||
|
PRE(sys_mmap)
|
||||||
|
{
|
||||||
|
@@ -511,6 +512,72 @@ PRE(sys_rt_sigreturn)
|
||||||
|
*flags |= SfPollAfter;
|
||||||
|
}
|
||||||
|
|
||||||
|
+// ARG3 is only used for pointers into the traced process's address
|
||||||
|
+// space and for offsets into the traced process's struct
|
||||||
|
+// user_regs_struct. It is never a pointer into this process's memory
|
||||||
|
+// space, and we should therefore not check anything it points to.
|
||||||
|
+// powerpc does have other ways to get/set registers, we only support
|
||||||
|
+// GET/SETREGSET for now.
|
||||||
|
+PRE(sys_ptrace)
|
||||||
|
+{
|
||||||
|
+ PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4);
|
||||||
|
+ PRE_REG_READ4(int, "ptrace",
|
||||||
|
+ long, request, long, pid, long, addr, long, data);
|
||||||
|
+ switch (ARG1) {
|
||||||
|
+ case VKI_PTRACE_PEEKTEXT:
|
||||||
|
+ case VKI_PTRACE_PEEKDATA:
|
||||||
|
+ case VKI_PTRACE_PEEKUSR:
|
||||||
|
+ PRE_MEM_WRITE( "ptrace(peek)", ARG4,
|
||||||
|
+ sizeof (long));
|
||||||
|
+ break;
|
||||||
|
+ case VKI_PTRACE_GETEVENTMSG:
|
||||||
|
+ PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
|
||||||
|
+ break;
|
||||||
|
+ case VKI_PTRACE_GETSIGINFO:
|
||||||
|
+ PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
|
||||||
|
+ break;
|
||||||
|
+ case VKI_PTRACE_SETSIGINFO:
|
||||||
|
+ PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
|
||||||
|
+ break;
|
||||||
|
+ case VKI_PTRACE_GETREGSET:
|
||||||
|
+ ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
|
||||||
|
+ break;
|
||||||
|
+ case VKI_PTRACE_SETREGSET:
|
||||||
|
+ ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+POST(sys_ptrace)
|
||||||
|
+{
|
||||||
|
+ switch (ARG1) {
|
||||||
|
+ case VKI_PTRACE_TRACEME:
|
||||||
|
+ ML_(linux_POST_traceme)(tid);
|
||||||
|
+ break;
|
||||||
|
+ case VKI_PTRACE_PEEKTEXT:
|
||||||
|
+ case VKI_PTRACE_PEEKDATA:
|
||||||
|
+ case VKI_PTRACE_PEEKUSR:
|
||||||
|
+ POST_MEM_WRITE( ARG4, sizeof (long));
|
||||||
|
+ break;
|
||||||
|
+ case VKI_PTRACE_GETEVENTMSG:
|
||||||
|
+ POST_MEM_WRITE( ARG4, sizeof(unsigned long));
|
||||||
|
+ break;
|
||||||
|
+ case VKI_PTRACE_GETSIGINFO:
|
||||||
|
+ /* XXX: This is a simplification. Different parts of the
|
||||||
|
+ * siginfo_t are valid depending on the type of signal.
|
||||||
|
+ */
|
||||||
|
+ POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
|
||||||
|
+ break;
|
||||||
|
+ case VKI_PTRACE_GETREGSET:
|
||||||
|
+ ML_(linux_POST_getregset)(tid, ARG3, ARG4);
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#undef PRE
|
||||||
|
#undef POST
|
||||||
|
|
||||||
|
@@ -562,8 +629,7 @@ static SyscallTableEntry syscall_table[] = {
|
||||||
|
GENX_(__NR_getuid, sys_getuid), // 24
|
||||||
|
|
||||||
|
// _____(__NR_stime, sys_stime), // 25
|
||||||
|
-// When ptrace is supported, memcheck/tests/linux/getregset should be enabled
|
||||||
|
-// _____(__NR_ptrace, sys_ptrace), // 26
|
||||||
|
+ PLAXY(__NR_ptrace, sys_ptrace), // 26
|
||||||
|
GENX_(__NR_alarm, sys_alarm), // 27
|
||||||
|
// _____(__NR_oldfstat, sys_oldfstat), // 28
|
||||||
|
GENX_(__NR_pause, sys_pause), // 29
|
||||||
|
diff --git a/memcheck/tests/linux/getregset.vgtest b/memcheck/tests/linux/getregset.vgtest
|
||||||
|
index 4c66108..c35be4c 100644
|
||||||
|
--- a/memcheck/tests/linux/getregset.vgtest
|
||||||
|
+++ b/memcheck/tests/linux/getregset.vgtest
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
prog: getregset
|
||||||
|
vgopts: -q
|
||||||
|
-prereq: ((../../../tests/os_test linux 2.6.33 && ! ../../../tests/arch_test mips32) || ../../../tests/os_test linux 3.10.0 ) && ! ../../../tests/arch_test ppc64
|
||||||
|
+prereq: ((../../../tests/os_test linux 2.6.33 && ! ../../../tests/arch_test mips32) || ../../../tests/os_test linux 3.10.0 )
|
||||||
|
|
28
SOURCES/valgrind-3.14.0-ppc64-unaligned-vecs.patch
Normal file
28
SOURCES/valgrind-3.14.0-ppc64-unaligned-vecs.patch
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
commit 321771ee63740333ad355244e0764295218843b8
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Sun Dec 9 14:26:39 2018 +0100
|
||||||
|
|
||||||
|
memcheck: Allow unaligned loads of 128bit vectors on ppc64[le].
|
||||||
|
|
||||||
|
On powerpc partial unaligned loads of vectors from partially invalid
|
||||||
|
addresses are OK and could be generated by our translation of lxvd2x.
|
||||||
|
|
||||||
|
Adjust partial_load memcheck tests to allow partial loads of 16 byte
|
||||||
|
vectors on powerpc64.
|
||||||
|
|
||||||
|
Part of resolving bug #386945.
|
||||||
|
|
||||||
|
diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
|
||||||
|
index 737f79d..101916b 100644
|
||||||
|
--- a/memcheck/mc_main.c
|
||||||
|
+++ b/memcheck/mc_main.c
|
||||||
|
@@ -1354,6 +1354,9 @@ void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
|
||||||
|
tl_assert(szB == 16); // s390 doesn't have > 128 bit SIMD
|
||||||
|
/* OK if all loaded bytes are from the same page. */
|
||||||
|
Bool alignedOK = ((a & 0xfff) <= 0x1000 - szB);
|
||||||
|
+# elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
|
||||||
|
+ /* lxvd2x might generate an unaligned 128 bit vector load. */
|
||||||
|
+ Bool alignedOK = (szB == 16);
|
||||||
|
# else
|
||||||
|
/* OK if the address is aligned by the load size. */
|
||||||
|
Bool alignedOK = (0 == (a & (szB - 1)));
|
148
SOURCES/valgrind-3.14.0-ppc64-unaligned-words.patch
Normal file
148
SOURCES/valgrind-3.14.0-ppc64-unaligned-words.patch
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
commit c5a5bea00af75f6ac50da10967d956f117b956f1
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Sat Dec 8 13:47:43 2018 -0500
|
||||||
|
|
||||||
|
memcheck: Allow unaligned loads of words on ppc64[le].
|
||||||
|
|
||||||
|
On powerpc partial unaligned loads of words from partially invalid
|
||||||
|
addresses are OK and could be generated by our translation of ldbrx.
|
||||||
|
|
||||||
|
Adjust partial_load memcheck tests to allow partial loads of words
|
||||||
|
on powerpc64.
|
||||||
|
|
||||||
|
Part of resolving bug #386945.
|
||||||
|
|
||||||
|
diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
|
||||||
|
index 3ef7cb9..737f79d 100644
|
||||||
|
--- a/memcheck/mc_main.c
|
||||||
|
+++ b/memcheck/mc_main.c
|
||||||
|
@@ -1508,6 +1508,9 @@ ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
|
||||||
|
# if defined(VGA_mips64) && defined(VGABI_N32)
|
||||||
|
if (szB == VG_WORDSIZE * 2 && VG_IS_WORD_ALIGNED(a)
|
||||||
|
&& n_addrs_bad < VG_WORDSIZE * 2)
|
||||||
|
+# elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
|
||||||
|
+ /* On power unaligned loads of words are OK. */
|
||||||
|
+ if (szB == VG_WORDSIZE && n_addrs_bad < VG_WORDSIZE)
|
||||||
|
# else
|
||||||
|
if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
|
||||||
|
&& n_addrs_bad < VG_WORDSIZE)
|
||||||
|
diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
|
||||||
|
index 2af4dd1..70b8ada 100644
|
||||||
|
--- a/memcheck/tests/Makefile.am
|
||||||
|
+++ b/memcheck/tests/Makefile.am
|
||||||
|
@@ -235,8 +235,10 @@ EXTRA_DIST = \
|
||||||
|
partiallydefinedeq.stdout.exp \
|
||||||
|
partial_load_ok.vgtest partial_load_ok.stderr.exp \
|
||||||
|
partial_load_ok.stderr.exp64 \
|
||||||
|
+ partial_load_ok.stderr.exp-ppc64 \
|
||||||
|
partial_load_dflt.vgtest partial_load_dflt.stderr.exp \
|
||||||
|
partial_load_dflt.stderr.exp64 \
|
||||||
|
+ partial_load_dflt.stderr.exp-ppc64 \
|
||||||
|
partial_load_dflt.stderr.expr-s390x-mvc \
|
||||||
|
pdb-realloc.stderr.exp pdb-realloc.vgtest \
|
||||||
|
pdb-realloc2.stderr.exp pdb-realloc2.stdout.exp pdb-realloc2.vgtest \
|
||||||
|
diff --git a/memcheck/tests/partial_load.c b/memcheck/tests/partial_load.c
|
||||||
|
index 0b2f10b..685ca8d 100644
|
||||||
|
--- a/memcheck/tests/partial_load.c
|
||||||
|
+++ b/memcheck/tests/partial_load.c
|
||||||
|
@@ -1,14 +1,14 @@
|
||||||
|
-
|
||||||
|
+#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
int main ( void )
|
||||||
|
{
|
||||||
|
- long w;
|
||||||
|
- int i;
|
||||||
|
- char* p;
|
||||||
|
-
|
||||||
|
+ long w; int i; char* p;
|
||||||
|
assert(sizeof(long) == sizeof(void*));
|
||||||
|
+#if defined(__powerpc64__)
|
||||||
|
+ fprintf (stderr, "powerpc64\n"); /* Used to select correct .exp file. */
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/* partial load, which --partial-loads-ok=yes should suppress */
|
||||||
|
p = calloc( sizeof(long)-1, 1 );
|
||||||
|
@@ -16,7 +16,7 @@ int main ( void )
|
||||||
|
w = *(long*)p;
|
||||||
|
free(p);
|
||||||
|
|
||||||
|
- /* partial but misaligned, cannot be suppressed */
|
||||||
|
+ /* partial but misaligned, ppc64[le] ok, but otherwise cannot be suppressed */
|
||||||
|
p = calloc( sizeof(long), 1 );
|
||||||
|
assert(p);
|
||||||
|
p++;
|
||||||
|
diff --git a/memcheck/tests/partial_load_dflt.stderr.exp-ppc64 b/memcheck/tests/partial_load_dflt.stderr.exp-ppc64
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..cf32bcf
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/memcheck/tests/partial_load_dflt.stderr.exp-ppc64
|
||||||
|
@@ -0,0 +1,23 @@
|
||||||
|
+
|
||||||
|
+powerpc64
|
||||||
|
+Invalid read of size 2
|
||||||
|
+ at 0x........: main (partial_load.c:30)
|
||||||
|
+ Address 0x........ is 0 bytes inside a block of size 1 alloc'd
|
||||||
|
+ at 0x........: calloc (vg_replace_malloc.c:...)
|
||||||
|
+ by 0x........: main (partial_load.c:28)
|
||||||
|
+
|
||||||
|
+Invalid read of size 8
|
||||||
|
+ at 0x........: main (partial_load.c:37)
|
||||||
|
+ Address 0x........ is 0 bytes inside a block of size 8 free'd
|
||||||
|
+ at 0x........: free (vg_replace_malloc.c:...)
|
||||||
|
+ by 0x........: main (partial_load.c:36)
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+HEAP SUMMARY:
|
||||||
|
+ in use at exit: ... bytes in ... blocks
|
||||||
|
+ total heap usage: ... allocs, ... frees, ... bytes allocated
|
||||||
|
+
|
||||||
|
+For a detailed leak analysis, rerun with: --leak-check=full
|
||||||
|
+
|
||||||
|
+For counts of detected and suppressed errors, rerun with: -v
|
||||||
|
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
|
||||||
|
diff --git a/memcheck/tests/partial_load_ok.stderr.exp-ppc64 b/memcheck/tests/partial_load_ok.stderr.exp-ppc64
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..cf32bcf
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/memcheck/tests/partial_load_ok.stderr.exp-ppc64
|
||||||
|
@@ -0,0 +1,23 @@
|
||||||
|
+
|
||||||
|
+powerpc64
|
||||||
|
+Invalid read of size 2
|
||||||
|
+ at 0x........: main (partial_load.c:30)
|
||||||
|
+ Address 0x........ is 0 bytes inside a block of size 1 alloc'd
|
||||||
|
+ at 0x........: calloc (vg_replace_malloc.c:...)
|
||||||
|
+ by 0x........: main (partial_load.c:28)
|
||||||
|
+
|
||||||
|
+Invalid read of size 8
|
||||||
|
+ at 0x........: main (partial_load.c:37)
|
||||||
|
+ Address 0x........ is 0 bytes inside a block of size 8 free'd
|
||||||
|
+ at 0x........: free (vg_replace_malloc.c:...)
|
||||||
|
+ by 0x........: main (partial_load.c:36)
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+HEAP SUMMARY:
|
||||||
|
+ in use at exit: ... bytes in ... blocks
|
||||||
|
+ total heap usage: ... allocs, ... frees, ... bytes allocated
|
||||||
|
+
|
||||||
|
+For a detailed leak analysis, rerun with: --leak-check=full
|
||||||
|
+
|
||||||
|
+For counts of detected and suppressed errors, rerun with: -v
|
||||||
|
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
|
||||||
|
diff -ur valgrind-3.14.0.orig/memcheck/tests/Makefile.in valgrind-3.14.0/memcheck/tests/Makefile.in
|
||||||
|
--- valgrind-3.14.0.orig/memcheck/tests/Makefile.in 2018-12-12 23:17:07.525501080 +0100
|
||||||
|
+++ valgrind-3.14.0/memcheck/tests/Makefile.in 2018-12-12 23:18:13.404014757 +0100
|
||||||
|
@@ -1546,8 +1546,10 @@
|
||||||
|
partiallydefinedeq.stdout.exp \
|
||||||
|
partial_load_ok.vgtest partial_load_ok.stderr.exp \
|
||||||
|
partial_load_ok.stderr.exp64 \
|
||||||
|
+ partial_load_ok.stderr.exp-ppc64 \
|
||||||
|
partial_load_dflt.vgtest partial_load_dflt.stderr.exp \
|
||||||
|
partial_load_dflt.stderr.exp64 \
|
||||||
|
+ partial_load_dflt.stderr.exp-ppc64 \
|
||||||
|
partial_load_dflt.stderr.expr-s390x-mvc \
|
||||||
|
pdb-realloc.stderr.exp pdb-realloc.vgtest \
|
||||||
|
pdb-realloc2.stderr.exp pdb-realloc2.stdout.exp pdb-realloc2.vgtest \
|
84
SOURCES/valgrind-3.14.0-s390x-fix-reg-alloc-vr-vs-fpr.patch
Normal file
84
SOURCES/valgrind-3.14.0-s390x-fix-reg-alloc-vr-vs-fpr.patch
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
commit 71002d8a5111d02ce8049c55017a8d948c820e35
|
||||||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||||||
|
Date: Thu Oct 25 13:47:12 2018 +0200
|
||||||
|
|
||||||
|
Bug 400490 s390x: Fix register allocation for VRs vs FPRs
|
||||||
|
|
||||||
|
On s390x, if vector registers are available, they are fed to the register
|
||||||
|
allocator as if they were separate from the floating-point registers. But
|
||||||
|
in fact the FPRs are embedded in the VRs. So for instance, if both f3 and
|
||||||
|
v3 are allocated and used at the same time, corruption will result.
|
||||||
|
|
||||||
|
This is fixed by offering only the non-overlapping VRs, v16 to v31, to the
|
||||||
|
register allocator instead.
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
|
||||||
|
index 6c22ac8..98ac938 100644
|
||||||
|
--- a/VEX/priv/host_s390_defs.c
|
||||||
|
+++ b/VEX/priv/host_s390_defs.c
|
||||||
|
@@ -59,7 +59,6 @@ static UInt s390_tchain_load64_len(void);
|
||||||
|
|
||||||
|
/* A mapping from register number to register index */
|
||||||
|
static Int gpr_index[16]; // GPR regno -> register index
|
||||||
|
-static Int fpr_index[16]; // FPR regno -> register index
|
||||||
|
static Int vr_index[32]; // VR regno -> register index
|
||||||
|
|
||||||
|
HReg
|
||||||
|
@@ -73,7 +72,7 @@ s390_hreg_gpr(UInt regno)
|
||||||
|
HReg
|
||||||
|
s390_hreg_fpr(UInt regno)
|
||||||
|
{
|
||||||
|
- Int ix = fpr_index[regno];
|
||||||
|
+ Int ix = vr_index[regno];
|
||||||
|
vassert(ix >= 0);
|
||||||
|
return mkHReg(/*virtual*/False, HRcFlt64, regno, ix);
|
||||||
|
}
|
||||||
|
@@ -463,11 +462,9 @@ getRRegUniverse_S390(void)
|
||||||
|
|
||||||
|
RRegUniverse__init(ru);
|
||||||
|
|
||||||
|
- /* Assign invalid values to the gpr/fpr/vr_index */
|
||||||
|
+ /* Assign invalid values to the gpr/vr_index */
|
||||||
|
for (UInt i = 0; i < sizeof gpr_index / sizeof gpr_index[0]; ++i)
|
||||||
|
gpr_index[i] = -1;
|
||||||
|
- for (UInt i = 0; i < sizeof fpr_index / sizeof fpr_index[0]; ++i)
|
||||||
|
- fpr_index[i] = -1;
|
||||||
|
for (UInt i = 0; i < sizeof vr_index / sizeof vr_index[0]; ++i)
|
||||||
|
vr_index[i] = -1;
|
||||||
|
|
||||||
|
@@ -494,17 +491,17 @@ getRRegUniverse_S390(void)
|
||||||
|
|
||||||
|
ru->allocable_start[HRcFlt64] = ru->size;
|
||||||
|
for (UInt regno = 8; regno <= 15; ++regno) {
|
||||||
|
- fpr_index[regno] = ru->size;
|
||||||
|
+ vr_index[regno] = ru->size;
|
||||||
|
ru->regs[ru->size++] = s390_hreg_fpr(regno);
|
||||||
|
}
|
||||||
|
for (UInt regno = 0; regno <= 7; ++regno) {
|
||||||
|
- fpr_index[regno] = ru->size;
|
||||||
|
+ vr_index[regno] = ru->size;
|
||||||
|
ru->regs[ru->size++] = s390_hreg_fpr(regno);
|
||||||
|
}
|
||||||
|
ru->allocable_end[HRcFlt64] = ru->size - 1;
|
||||||
|
|
||||||
|
ru->allocable_start[HRcVec128] = ru->size;
|
||||||
|
- for (UInt regno = 0; regno <= 31; ++regno) {
|
||||||
|
+ for (UInt regno = 16; regno <= 31; ++regno) {
|
||||||
|
vr_index[regno] = ru->size;
|
||||||
|
ru->regs[ru->size++] = s390_hreg_vr(regno);
|
||||||
|
}
|
||||||
|
@@ -527,12 +524,12 @@ getRRegUniverse_S390(void)
|
||||||
|
/* Sanity checking */
|
||||||
|
for (UInt i = 0; i < sizeof gpr_index / sizeof gpr_index[0]; ++i)
|
||||||
|
vassert(gpr_index[i] >= 0);
|
||||||
|
- for (UInt i = 0; i < sizeof fpr_index / sizeof fpr_index[0]; ++i)
|
||||||
|
- vassert(fpr_index[i] >= 0);
|
||||||
|
for (UInt i = 0; i < sizeof vr_index / sizeof vr_index[0]; ++i)
|
||||||
|
vassert(vr_index[i] >= 0);
|
||||||
|
|
||||||
|
initialised = True;
|
||||||
|
+
|
||||||
|
+ RRegUniverse__check_is_sane(ru);
|
||||||
|
return ru;
|
||||||
|
}
|
||||||
|
|
41
SOURCES/valgrind-3.14.0-s390x-sign-extend-lochi.patch
Normal file
41
SOURCES/valgrind-3.14.0-s390x-sign-extend-lochi.patch
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
commit 9545e9f96beda6e9f2205bdb3c3e96edaf8d9e2b
|
||||||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||||||
|
Date: Tue Oct 30 17:06:38 2018 +0100
|
||||||
|
|
||||||
|
Bug 400491 s390x: Sign-extend immediate operand of LOCHI and friends
|
||||||
|
|
||||||
|
The VEX implementation of each of the z/Architecture instructions LOCHI,
|
||||||
|
LOCHHI, and LOCGHI treats the immediate 16-bit operand as an unsigned
|
||||||
|
integer instead of a signed integer. This is fixed.
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
|
||||||
|
index 60b6081..9c4d79b 100644
|
||||||
|
--- a/VEX/priv/guest_s390_toIR.c
|
||||||
|
+++ b/VEX/priv/guest_s390_toIR.c
|
||||||
|
@@ -16307,7 +16307,7 @@ static const HChar *
|
||||||
|
s390_irgen_LOCHHI(UChar r1, UChar m3, UShort i2, UChar unused)
|
||||||
|
{
|
||||||
|
next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)));
|
||||||
|
- put_gpr_w0(r1, mkU32(i2));
|
||||||
|
+ put_gpr_w0(r1, mkU32((UInt)(Int)(Short)i2));
|
||||||
|
|
||||||
|
return "lochhi";
|
||||||
|
}
|
||||||
|
@@ -16316,7 +16316,7 @@ static const HChar *
|
||||||
|
s390_irgen_LOCHI(UChar r1, UChar m3, UShort i2, UChar unused)
|
||||||
|
{
|
||||||
|
next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)));
|
||||||
|
- put_gpr_w1(r1, mkU32(i2));
|
||||||
|
+ put_gpr_w1(r1, mkU32((UInt)(Int)(Short)i2));
|
||||||
|
|
||||||
|
return "lochi";
|
||||||
|
}
|
||||||
|
@@ -16325,7 +16325,7 @@ static const HChar *
|
||||||
|
s390_irgen_LOCGHI(UChar r1, UChar m3, UShort i2, UChar unused)
|
||||||
|
{
|
||||||
|
next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)));
|
||||||
|
- put_gpr_dw0(r1, mkU64(i2));
|
||||||
|
+ put_gpr_dw0(r1, mkU64((UInt)(Int)(Short)i2));
|
||||||
|
|
||||||
|
return "locghi";
|
||||||
|
}
|
32
SOURCES/valgrind-3.14.0-s390x-vec-facility-bit.patch
Normal file
32
SOURCES/valgrind-3.14.0-s390x-vec-facility-bit.patch
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
commit 467c7c4c9665c0f8b41a4416722a027ebc05df2b
|
||||||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||||||
|
Date: Mon Jan 21 14:10:00 2019 +0100
|
||||||
|
|
||||||
|
Bug 403552 s390x: Fix vector facility bit number
|
||||||
|
|
||||||
|
The wrong bit number was used when checking for the vector facility. This
|
||||||
|
can result in a fatal emulation error: "Encountered an instruction that
|
||||||
|
requires the vector facility. That facility is not available on this
|
||||||
|
host."
|
||||||
|
|
||||||
|
In many cases the wrong facility bit was usually set as well, hence
|
||||||
|
nothing bad happened. But when running Valgrind within a Qemu/KVM guest,
|
||||||
|
the wrong bit was not (always?) set and the emulation error occurred.
|
||||||
|
|
||||||
|
This fix simply corrects the vector facility bit number, changing it from
|
||||||
|
128 to 129.
|
||||||
|
|
||||||
|
|
||||||
|
diff --git a/VEX/pub/libvex_s390x_common.h b/VEX/pub/libvex_s390x_common.h
|
||||||
|
index a8a66b96b..8723ee21d 100644
|
||||||
|
--- a/VEX/pub/libvex_s390x_common.h
|
||||||
|
+++ b/VEX/pub/libvex_s390x_common.h
|
||||||
|
@@ -103,7 +103,7 @@
|
||||||
|
#define S390_FAC_MSA5 57 // message-security-assist 5
|
||||||
|
#define S390_FAC_TREXE 73 // transactional execution
|
||||||
|
#define S390_FAC_MSA4 77 // message-security-assist 4
|
||||||
|
-#define S390_FAC_VX 128 // vector facility
|
||||||
|
+#define S390_FAC_VX 129 // vector facility
|
||||||
|
|
||||||
|
|
||||||
|
/*--------------------------------------------------------------*/
|
1618
SOURCES/valgrind-3.14.0-s390x-vec-float-point-code.patch
Normal file
1618
SOURCES/valgrind-3.14.0-s390x-vec-float-point-code.patch
Normal file
File diff suppressed because it is too large
Load Diff
2420
SOURCES/valgrind-3.14.0-s390x-vec-float-point-tests.patch
Normal file
2420
SOURCES/valgrind-3.14.0-s390x-vec-float-point-tests.patch
Normal file
File diff suppressed because it is too large
Load Diff
408
SOURCES/valgrind-3.14.0-s390x-vec-reg-vgdb.patch
Normal file
408
SOURCES/valgrind-3.14.0-s390x-vec-reg-vgdb.patch
Normal file
@ -0,0 +1,408 @@
|
|||||||
|
commit 50bd2282bce101012a5668b670cb185375600d2d
|
||||||
|
Author: Andreas Arnez <arnez@linux.ibm.com>
|
||||||
|
Date: Thu Oct 18 17:51:57 2018 +0200
|
||||||
|
|
||||||
|
Bug 397187 s390x: Add vector register support for vgdb
|
||||||
|
|
||||||
|
On s390x machines with a vector facility, Valgrind's gdbserver didn't
|
||||||
|
represent the vector registers. This is fixed.
|
||||||
|
|
||||||
|
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
|
||||||
|
index 8de1996..94030fd 100644
|
||||||
|
--- a/coregrind/Makefile.am
|
||||||
|
+++ b/coregrind/Makefile.am
|
||||||
|
@@ -685,6 +685,11 @@ GDBSERVER_XML_FILES = \
|
||||||
|
m_gdbserver/s390x-linux64-valgrind-s1.xml \
|
||||||
|
m_gdbserver/s390x-linux64-valgrind-s2.xml \
|
||||||
|
m_gdbserver/s390x-linux64.xml \
|
||||||
|
+ m_gdbserver/s390-vx-valgrind-s1.xml \
|
||||||
|
+ m_gdbserver/s390-vx-valgrind-s2.xml \
|
||||||
|
+ m_gdbserver/s390-vx.xml \
|
||||||
|
+ m_gdbserver/s390x-vx-linux-valgrind.xml \
|
||||||
|
+ m_gdbserver/s390x-vx-linux.xml \
|
||||||
|
m_gdbserver/mips-cp0-valgrind-s1.xml \
|
||||||
|
m_gdbserver/mips-cp0-valgrind-s2.xml \
|
||||||
|
m_gdbserver/mips-cp0.xml \
|
||||||
|
diff --git a/coregrind/m_gdbserver/s390-vx-valgrind-s1.xml b/coregrind/m_gdbserver/s390-vx-valgrind-s1.xml
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..ca461b3
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/coregrind/m_gdbserver/s390-vx-valgrind-s1.xml
|
||||||
|
@@ -0,0 +1,43 @@
|
||||||
|
+<?xml version="1.0"?>
|
||||||
|
+<!-- Copyright (C) 2015-2018 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+ Copying and distribution of this file, with or without modification,
|
||||||
|
+ are permitted in any medium without royalty provided the copyright
|
||||||
|
+ notice and this notice are preserved. -->
|
||||||
|
+
|
||||||
|
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
|
||||||
|
+<feature name="org.gnu.gdb.s390.vx-valgrind-s1">
|
||||||
|
+ <reg name="v0ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v1ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v2ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v3ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v4ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v5ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v6ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v7ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v8ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v9ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v10ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v11ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v12ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v13ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v14ls1" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v15ls1" bitsize="64" type="uint64"/>
|
||||||
|
+
|
||||||
|
+ <reg name="v16s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v17s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v18s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v19s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v20s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v21s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v22s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v23s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v24s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v25s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v26s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v27s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v28s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v29s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v30s1" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v31s1" bitsize="128" type="uint128"/>
|
||||||
|
+</feature>
|
||||||
|
diff --git a/coregrind/m_gdbserver/s390-vx-valgrind-s2.xml b/coregrind/m_gdbserver/s390-vx-valgrind-s2.xml
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..eccbd8d
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/coregrind/m_gdbserver/s390-vx-valgrind-s2.xml
|
||||||
|
@@ -0,0 +1,43 @@
|
||||||
|
+<?xml version="1.0"?>
|
||||||
|
+<!-- Copyright (C) 2015-2018 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+ Copying and distribution of this file, with or without modification,
|
||||||
|
+ are permitted in any medium without royalty provided the copyright
|
||||||
|
+ notice and this notice are preserved. -->
|
||||||
|
+
|
||||||
|
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
|
||||||
|
+<feature name="org.gnu.gdb.s390.vx-valgrind-s2">
|
||||||
|
+ <reg name="v0ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v1ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v2ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v3ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v4ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v5ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v6ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v7ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v8ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v9ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v10ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v11ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v12ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v13ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v14ls2" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v15ls2" bitsize="64" type="uint64"/>
|
||||||
|
+
|
||||||
|
+ <reg name="v16s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v17s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v18s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v19s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v20s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v21s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v22s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v23s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v24s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v25s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v26s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v27s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v28s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v29s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v30s2" bitsize="128" type="uint128"/>
|
||||||
|
+ <reg name="v31s2" bitsize="128" type="uint128"/>
|
||||||
|
+</feature>
|
||||||
|
diff --git a/coregrind/m_gdbserver/s390-vx.xml b/coregrind/m_gdbserver/s390-vx.xml
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..2a16873
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/coregrind/m_gdbserver/s390-vx.xml
|
||||||
|
@@ -0,0 +1,59 @@
|
||||||
|
+<?xml version="1.0"?>
|
||||||
|
+<!-- Copyright (C) 2015-2018 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+ Copying and distribution of this file, with or without modification,
|
||||||
|
+ are permitted in any medium without royalty provided the copyright
|
||||||
|
+ notice and this notice are preserved. -->
|
||||||
|
+
|
||||||
|
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
|
||||||
|
+<feature name="org.gnu.gdb.s390.vx">
|
||||||
|
+ <vector id="v4f" type="ieee_single" count="4"/>
|
||||||
|
+ <vector id="v2d" type="ieee_double" count="2"/>
|
||||||
|
+ <vector id="v16i8" type="int8" count="16"/>
|
||||||
|
+ <vector id="v8i16" type="int16" count="8"/>
|
||||||
|
+ <vector id="v4i32" type="int32" count="4"/>
|
||||||
|
+ <vector id="v2i64" type="int64" count="2"/>
|
||||||
|
+ <union id="vec128">
|
||||||
|
+ <field name="v4_float" type="v4f"/>
|
||||||
|
+ <field name="v2_double" type="v2d"/>
|
||||||
|
+ <field name="v16_int8" type="v16i8"/>
|
||||||
|
+ <field name="v8_int16" type="v8i16"/>
|
||||||
|
+ <field name="v4_int32" type="v4i32"/>
|
||||||
|
+ <field name="v2_int64" type="v2i64"/>
|
||||||
|
+ <field name="uint128" type="uint128"/>
|
||||||
|
+ </union>
|
||||||
|
+
|
||||||
|
+ <reg name="v0l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v1l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v2l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v3l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v4l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v5l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v6l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v7l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v8l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v9l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v10l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v11l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v12l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v13l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v14l" bitsize="64" type="uint64"/>
|
||||||
|
+ <reg name="v15l" bitsize="64" type="uint64"/>
|
||||||
|
+
|
||||||
|
+ <reg name="v16" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v17" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v18" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v19" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v20" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v21" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v22" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v23" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v24" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v25" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v26" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v27" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v28" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v29" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v30" bitsize="128" type="vec128"/>
|
||||||
|
+ <reg name="v31" bitsize="128" type="vec128"/>
|
||||||
|
+</feature>
|
||||||
|
diff --git a/coregrind/m_gdbserver/s390x-vx-linux-valgrind.xml b/coregrind/m_gdbserver/s390x-vx-linux-valgrind.xml
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..0237002
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/coregrind/m_gdbserver/s390x-vx-linux-valgrind.xml
|
||||||
|
@@ -0,0 +1,28 @@
|
||||||
|
+<?xml version="1.0"?>
|
||||||
|
+<!-- Copyright (C) 2010-2018 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+ Copying and distribution of this file, with or without modification,
|
||||||
|
+ are permitted in any medium without royalty provided the copyright
|
||||||
|
+ notice and this notice are preserved. -->
|
||||||
|
+
|
||||||
|
+<!-- S/390 64-bit user-level code. -->
|
||||||
|
+
|
||||||
|
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
|
||||||
|
+<target>
|
||||||
|
+ <architecture>s390:64-bit</architecture>
|
||||||
|
+ <xi:include href="s390x-core64.xml"/>
|
||||||
|
+ <xi:include href="s390-acr.xml"/>
|
||||||
|
+ <xi:include href="s390-fpr.xml"/>
|
||||||
|
+ <xi:include href="s390x-linux64.xml"/>
|
||||||
|
+ <xi:include href="s390-vx.xml"/>
|
||||||
|
+ <xi:include href="s390x-core64-valgrind-s1.xml"/>
|
||||||
|
+ <xi:include href="s390-acr-valgrind-s1.xml"/>
|
||||||
|
+ <xi:include href="s390-fpr-valgrind-s1.xml"/>
|
||||||
|
+ <xi:include href="s390x-linux64-valgrind-s1.xml"/>
|
||||||
|
+ <xi:include href="s390-vx-valgrind-s1.xml"/>
|
||||||
|
+ <xi:include href="s390x-core64-valgrind-s2.xml"/>
|
||||||
|
+ <xi:include href="s390-acr-valgrind-s2.xml"/>
|
||||||
|
+ <xi:include href="s390-fpr-valgrind-s2.xml"/>
|
||||||
|
+ <xi:include href="s390x-linux64-valgrind-s2.xml"/>
|
||||||
|
+ <xi:include href="s390-vx-valgrind-s2.xml"/>
|
||||||
|
+</target>
|
||||||
|
diff --git a/coregrind/m_gdbserver/s390x-vx-linux.xml b/coregrind/m_gdbserver/s390x-vx-linux.xml
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..e431c5b
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/coregrind/m_gdbserver/s390x-vx-linux.xml
|
||||||
|
@@ -0,0 +1,18 @@
|
||||||
|
+<?xml version="1.0"?>
|
||||||
|
+<!-- Copyright (C) 2010-2018 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+ Copying and distribution of this file, with or without modification,
|
||||||
|
+ are permitted in any medium without royalty provided the copyright
|
||||||
|
+ notice and this notice are preserved. -->
|
||||||
|
+
|
||||||
|
+<!-- S/390 64-bit user-level code. -->
|
||||||
|
+
|
||||||
|
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
|
||||||
|
+<target>
|
||||||
|
+ <architecture>s390:64-bit</architecture>
|
||||||
|
+ <xi:include href="s390x-core64.xml"/>
|
||||||
|
+ <xi:include href="s390-acr.xml"/>
|
||||||
|
+ <xi:include href="s390-fpr.xml"/>
|
||||||
|
+ <xi:include href="s390x-linux64.xml"/>
|
||||||
|
+ <xi:include href="s390-vx.xml"/>
|
||||||
|
+</target>
|
||||||
|
diff --git a/coregrind/m_gdbserver/valgrind-low-s390x.c b/coregrind/m_gdbserver/valgrind-low-s390x.c
|
||||||
|
index 7bbb2e3..a667f4b 100644
|
||||||
|
--- a/coregrind/m_gdbserver/valgrind-low-s390x.c
|
||||||
|
+++ b/coregrind/m_gdbserver/valgrind-low-s390x.c
|
||||||
|
@@ -88,9 +88,42 @@ static struct reg regs[] = {
|
||||||
|
{ "f14", 2592, 64 },
|
||||||
|
{ "f15", 2656, 64 },
|
||||||
|
{ "orig_r2", 2720, 64 },
|
||||||
|
+ { "v0l", 2784, 64 },
|
||||||
|
+ { "v1l", 2848, 64 },
|
||||||
|
+ { "v2l", 2912, 64 },
|
||||||
|
+ { "v3l", 2976, 64 },
|
||||||
|
+ { "v4l", 3040, 64 },
|
||||||
|
+ { "v5l", 3104, 64 },
|
||||||
|
+ { "v6l", 3168, 64 },
|
||||||
|
+ { "v7l", 3232, 64 },
|
||||||
|
+ { "v8l", 3296, 64 },
|
||||||
|
+ { "v9l", 3360, 64 },
|
||||||
|
+ { "v10l", 3424, 64 },
|
||||||
|
+ { "v11l", 3488, 64 },
|
||||||
|
+ { "v12l", 3552, 64 },
|
||||||
|
+ { "v13l", 3616, 64 },
|
||||||
|
+ { "v14l", 3680, 64 },
|
||||||
|
+ { "v15l", 3744, 64 },
|
||||||
|
+ { "v16", 3808, 128 },
|
||||||
|
+ { "v17", 3936, 128 },
|
||||||
|
+ { "v18", 4064, 128 },
|
||||||
|
+ { "v19", 4192, 128 },
|
||||||
|
+ { "v20", 4320, 128 },
|
||||||
|
+ { "v21", 4448, 128 },
|
||||||
|
+ { "v22", 4576, 128 },
|
||||||
|
+ { "v23", 4704, 128 },
|
||||||
|
+ { "v24", 4832, 128 },
|
||||||
|
+ { "v25", 4960, 128 },
|
||||||
|
+ { "v26", 5088, 128 },
|
||||||
|
+ { "v27", 5216, 128 },
|
||||||
|
+ { "v28", 5344, 128 },
|
||||||
|
+ { "v29", 5472, 128 },
|
||||||
|
+ { "v30", 5600, 128 },
|
||||||
|
+ { "v31", 5728, 128 },
|
||||||
|
};
|
||||||
|
static const char *expedite_regs[] = { "r14", "r15", "pswa", 0 };
|
||||||
|
-#define num_regs (sizeof (regs) / sizeof (regs[0]))
|
||||||
|
+#define num_regs_all (sizeof (regs) / sizeof (regs[0]))
|
||||||
|
+static int num_regs;
|
||||||
|
|
||||||
|
static
|
||||||
|
CORE_ADDR get_pc (void)
|
||||||
|
@@ -165,7 +198,7 @@ void transfer_register (ThreadId tid, int abs_regno, void * buf,
|
||||||
|
case 32: VG_(transfer) (&s390x->guest_a14, buf, dir, size, mod); break;
|
||||||
|
case 33: VG_(transfer) (&s390x->guest_a15, buf, dir, size, mod); break;
|
||||||
|
case 34: VG_(transfer) (&s390x->guest_fpc, buf, dir, size, mod); break;
|
||||||
|
- case 35: VG_(transfer) (&s390x->guest_v0, buf, dir, size, mod); break;
|
||||||
|
+ case 35: VG_(transfer) (&s390x->guest_v0.w64[0], buf, dir, size, mod); break;
|
||||||
|
case 36: VG_(transfer) (&s390x->guest_v1.w64[0], buf, dir, size, mod); break;
|
||||||
|
case 37: VG_(transfer) (&s390x->guest_v2.w64[0], buf, dir, size, mod); break;
|
||||||
|
case 38: VG_(transfer) (&s390x->guest_v3.w64[0], buf, dir, size, mod); break;
|
||||||
|
@@ -182,18 +215,65 @@ void transfer_register (ThreadId tid, int abs_regno, void * buf,
|
||||||
|
case 49: VG_(transfer) (&s390x->guest_v14.w64[0], buf, dir, size, mod); break;
|
||||||
|
case 50: VG_(transfer) (&s390x->guest_v15.w64[0], buf, dir, size, mod); break;
|
||||||
|
case 51: *mod = False; break; //GDBTD??? { "orig_r2", 0, 64 },
|
||||||
|
+ case 52: VG_(transfer) (&s390x->guest_v0.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 53: VG_(transfer) (&s390x->guest_v1.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 54: VG_(transfer) (&s390x->guest_v2.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 55: VG_(transfer) (&s390x->guest_v3.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 56: VG_(transfer) (&s390x->guest_v4.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 57: VG_(transfer) (&s390x->guest_v5.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 58: VG_(transfer) (&s390x->guest_v6.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 59: VG_(transfer) (&s390x->guest_v7.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 60: VG_(transfer) (&s390x->guest_v8.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 61: VG_(transfer) (&s390x->guest_v9.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 62: VG_(transfer) (&s390x->guest_v10.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 63: VG_(transfer) (&s390x->guest_v11.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 64: VG_(transfer) (&s390x->guest_v12.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 65: VG_(transfer) (&s390x->guest_v13.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 66: VG_(transfer) (&s390x->guest_v14.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 67: VG_(transfer) (&s390x->guest_v15.w64[1], buf, dir, size, mod); break;
|
||||||
|
+ case 68: VG_(transfer) (&s390x->guest_v16, buf, dir, size, mod); break;
|
||||||
|
+ case 69: VG_(transfer) (&s390x->guest_v17, buf, dir, size, mod); break;
|
||||||
|
+ case 70: VG_(transfer) (&s390x->guest_v18, buf, dir, size, mod); break;
|
||||||
|
+ case 71: VG_(transfer) (&s390x->guest_v19, buf, dir, size, mod); break;
|
||||||
|
+ case 72: VG_(transfer) (&s390x->guest_v20, buf, dir, size, mod); break;
|
||||||
|
+ case 73: VG_(transfer) (&s390x->guest_v21, buf, dir, size, mod); break;
|
||||||
|
+ case 74: VG_(transfer) (&s390x->guest_v22, buf, dir, size, mod); break;
|
||||||
|
+ case 75: VG_(transfer) (&s390x->guest_v23, buf, dir, size, mod); break;
|
||||||
|
+ case 76: VG_(transfer) (&s390x->guest_v24, buf, dir, size, mod); break;
|
||||||
|
+ case 77: VG_(transfer) (&s390x->guest_v25, buf, dir, size, mod); break;
|
||||||
|
+ case 78: VG_(transfer) (&s390x->guest_v26, buf, dir, size, mod); break;
|
||||||
|
+ case 79: VG_(transfer) (&s390x->guest_v27, buf, dir, size, mod); break;
|
||||||
|
+ case 80: VG_(transfer) (&s390x->guest_v28, buf, dir, size, mod); break;
|
||||||
|
+ case 81: VG_(transfer) (&s390x->guest_v29, buf, dir, size, mod); break;
|
||||||
|
+ case 82: VG_(transfer) (&s390x->guest_v30, buf, dir, size, mod); break;
|
||||||
|
+ case 83: VG_(transfer) (&s390x->guest_v31, buf, dir, size, mod); break;
|
||||||
|
default: vg_assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
+Bool have_vx (void)
|
||||||
|
+{
|
||||||
|
+ VexArch va;
|
||||||
|
+ VexArchInfo vai;
|
||||||
|
+ VG_(machine_get_VexArchInfo) (&va, &vai);
|
||||||
|
+ return (vai.hwcaps & VEX_HWCAPS_S390X_VX) != 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static
|
||||||
|
const char* target_xml (Bool shadow_mode)
|
||||||
|
{
|
||||||
|
if (shadow_mode) {
|
||||||
|
- return "s390x-generic-valgrind.xml";
|
||||||
|
+ if (have_vx())
|
||||||
|
+ return "s390x-vx-linux-valgrind.xml";
|
||||||
|
+ else
|
||||||
|
+ return "s390x-generic-valgrind.xml";
|
||||||
|
} else {
|
||||||
|
- return "s390x-generic.xml";
|
||||||
|
- }
|
||||||
|
+ if (have_vx())
|
||||||
|
+ return "s390x-vx-linux.xml";
|
||||||
|
+ else
|
||||||
|
+ return "s390x-generic.xml";
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
static CORE_ADDR** target_get_dtv (ThreadState *tst)
|
||||||
|
@@ -206,7 +286,7 @@ static CORE_ADDR** target_get_dtv (ThreadState *tst)
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct valgrind_target_ops low_target = {
|
||||||
|
- num_regs,
|
||||||
|
+ -1, // Override at init time.
|
||||||
|
regs,
|
||||||
|
17, //sp = r15, which is register offset 17 in regs
|
||||||
|
transfer_register,
|
||||||
|
@@ -220,6 +300,11 @@ static struct valgrind_target_ops low_target = {
|
||||||
|
void s390x_init_architecture (struct valgrind_target_ops *target)
|
||||||
|
{
|
||||||
|
*target = low_target;
|
||||||
|
+ if (have_vx())
|
||||||
|
+ num_regs = num_regs_all;
|
||||||
|
+ else
|
||||||
|
+ num_regs = num_regs_all - 32; // Remove all VX registers.
|
||||||
|
+ target->num_regs = num_regs;
|
||||||
|
set_register_cache (regs, num_regs);
|
||||||
|
gdbserver_expedite_regs = expedite_regs;
|
||||||
|
}
|
||||||
|
diff -ru valgrind-3.14.0.orig/coregrind/Makefile.in valgrind-3.14.0/coregrind/Makefile.in
|
||||||
|
--- valgrind-3.14.0.orig/coregrind/Makefile.in 2018-11-20 17:30:03.075888111 +0100
|
||||||
|
+++ valgrind-3.14.0/coregrind/Makefile.in 2018-11-20 17:31:14.999314275 +0100
|
||||||
|
@@ -1869,6 +1869,11 @@
|
||||||
|
m_gdbserver/s390x-linux64-valgrind-s1.xml \
|
||||||
|
m_gdbserver/s390x-linux64-valgrind-s2.xml \
|
||||||
|
m_gdbserver/s390x-linux64.xml \
|
||||||
|
+ m_gdbserver/s390-vx-valgrind-s1.xml \
|
||||||
|
+ m_gdbserver/s390-vx-valgrind-s2.xml \
|
||||||
|
+ m_gdbserver/s390-vx.xml \
|
||||||
|
+ m_gdbserver/s390x-vx-linux-valgrind.xml \
|
||||||
|
+ m_gdbserver/s390x-vx-linux.xml \
|
||||||
|
m_gdbserver/mips-cp0-valgrind-s1.xml \
|
||||||
|
m_gdbserver/mips-cp0-valgrind-s2.xml \
|
||||||
|
m_gdbserver/mips-cp0.xml \
|
51
SOURCES/valgrind-3.14.0-s390z-more-z13-fixes.patch
Normal file
51
SOURCES/valgrind-3.14.0-s390z-more-z13-fixes.patch
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
From d10cd86ee32bf76495f79c02df62fc242adbcbe3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Andreas Arnez <arnez@linux.vnet.ibm.com>
|
||||||
|
Date: Thu, 26 Jul 2018 16:35:24 +0200
|
||||||
|
Subject: [PATCH] s390x: More fixes for z13 support
|
||||||
|
|
||||||
|
This patch addresses the following:
|
||||||
|
|
||||||
|
* Fix the implementation of LOCGHI. Previously Valgrind performed 32-bit
|
||||||
|
sign extension instead of 64-bit sign extension on the immediate value.
|
||||||
|
|
||||||
|
* Advertise VXRS in HWCAP. If no VXRS are advertised, but the program
|
||||||
|
uses vector registers, this could cause problems with a glibc built with
|
||||||
|
"-march=z13".
|
||||||
|
---
|
||||||
|
VEX/priv/guest_s390_toIR.c | 2 +-
|
||||||
|
coregrind/m_initimg/initimg-linux.c | 6 +++---
|
||||||
|
2 files changed, 4 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
|
||||||
|
index 9c4d79b87..50a5a4177 100644
|
||||||
|
--- a/VEX/priv/guest_s390_toIR.c
|
||||||
|
+++ b/VEX/priv/guest_s390_toIR.c
|
||||||
|
@@ -16325,7 +16325,7 @@ static const HChar *
|
||||||
|
s390_irgen_LOCGHI(UChar r1, UChar m3, UShort i2, UChar unused)
|
||||||
|
{
|
||||||
|
next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)));
|
||||||
|
- put_gpr_dw0(r1, mkU64((UInt)(Int)(Short)i2));
|
||||||
|
+ put_gpr_dw0(r1, mkU64((ULong)(Long)(Short)i2));
|
||||||
|
|
||||||
|
return "locghi";
|
||||||
|
}
|
||||||
|
diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
|
||||||
|
index 61cc458bc..8a7f0d024 100644
|
||||||
|
--- a/coregrind/m_initimg/initimg-linux.c
|
||||||
|
+++ b/coregrind/m_initimg/initimg-linux.c
|
||||||
|
@@ -699,9 +699,9 @@ Addr setup_client_stack( void* init_sp,
|
||||||
|
}
|
||||||
|
# elif defined(VGP_s390x_linux)
|
||||||
|
{
|
||||||
|
- /* Advertise hardware features "below" TE only. TE and VXRS
|
||||||
|
- (and anything above) are not supported by Valgrind. */
|
||||||
|
- auxv->u.a_val &= VKI_HWCAP_S390_TE - 1;
|
||||||
|
+ /* Advertise hardware features "below" TE and VXRS. TE itself
|
||||||
|
+ and anything above VXRS is not supported by Valgrind. */
|
||||||
|
+ auxv->u.a_val &= (VKI_HWCAP_S390_TE - 1) | VKI_HWCAP_S390_VXRS;
|
||||||
|
}
|
||||||
|
# elif defined(VGP_arm64_linux)
|
||||||
|
{
|
||||||
|
--
|
||||||
|
2.17.0
|
||||||
|
|
145
SOURCES/valgrind-3.14.0-set_AV_CR6.patch
Normal file
145
SOURCES/valgrind-3.14.0-set_AV_CR6.patch
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
commit dc1523fb3550b4ed9dd4c178741626daaa474da7
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Mon Dec 10 17:18:20 2018 +0100
|
||||||
|
|
||||||
|
PR386945 set_AV_CR6 patch
|
||||||
|
|
||||||
|
https://bugs.kde.org/show_bug.cgi?id=386945#c62
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
index ec2f90a..c3cc6d0 100644
|
||||||
|
--- a/VEX/priv/guest_ppc_toIR.c
|
||||||
|
+++ b/VEX/priv/guest_ppc_toIR.c
|
||||||
|
@@ -2062,45 +2062,88 @@ static void set_CR0 ( IRExpr* result )
|
||||||
|
static void set_AV_CR6 ( IRExpr* result, Bool test_all_ones )
|
||||||
|
{
|
||||||
|
/* CR6[0:3] = {all_ones, 0, all_zeros, 0}
|
||||||
|
- all_ones = (v[0] && v[1] && v[2] && v[3])
|
||||||
|
- all_zeros = ~(v[0] || v[1] || v[2] || v[3])
|
||||||
|
+ 32 bit: all_zeros = (v[0] || v[1] || v[2] || v[3]) == 0x0000'0000
|
||||||
|
+ all_ones = ~(v[0] && v[1] && v[2] && v[3]) == 0x0000'0000
|
||||||
|
+ where v[] denotes 32-bit lanes
|
||||||
|
+ or
|
||||||
|
+ 64 bit: all_zeros = (v[0] || v[1]) == 0x0000'0000'0000'0000
|
||||||
|
+ all_ones = ~(v[0] && v[1]) == 0x0000'0000'0000'0000
|
||||||
|
+ where v[] denotes 64-bit lanes
|
||||||
|
+
|
||||||
|
+ The 32- and 64-bit versions compute the same thing, but the 64-bit one
|
||||||
|
+ tries to be a bit more efficient.
|
||||||
|
*/
|
||||||
|
- IRTemp v0 = newTemp(Ity_V128);
|
||||||
|
- IRTemp v1 = newTemp(Ity_V128);
|
||||||
|
- IRTemp v2 = newTemp(Ity_V128);
|
||||||
|
- IRTemp v3 = newTemp(Ity_V128);
|
||||||
|
- IRTemp rOnes = newTemp(Ity_I8);
|
||||||
|
- IRTemp rZeros = newTemp(Ity_I8);
|
||||||
|
-
|
||||||
|
vassert(typeOfIRExpr(irsb->tyenv,result) == Ity_V128);
|
||||||
|
|
||||||
|
- assign( v0, result );
|
||||||
|
- assign( v1, binop(Iop_ShrV128, result, mkU8(32)) );
|
||||||
|
- assign( v2, binop(Iop_ShrV128, result, mkU8(64)) );
|
||||||
|
- assign( v3, binop(Iop_ShrV128, result, mkU8(96)) );
|
||||||
|
+ IRTemp overlappedOred = newTemp(Ity_V128);
|
||||||
|
+ IRTemp overlappedAnded = newTemp(Ity_V128);
|
||||||
|
+
|
||||||
|
+ if (mode64) {
|
||||||
|
+ IRTemp v0 = newTemp(Ity_V128);
|
||||||
|
+ IRTemp v1 = newTemp(Ity_V128);
|
||||||
|
+ assign( v0, result );
|
||||||
|
+ assign( v1, binop(Iop_ShrV128, result, mkU8(64)) );
|
||||||
|
+ assign(overlappedOred,
|
||||||
|
+ binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)));
|
||||||
|
+ assign(overlappedAnded,
|
||||||
|
+ binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)));
|
||||||
|
+ } else {
|
||||||
|
+ IRTemp v0 = newTemp(Ity_V128);
|
||||||
|
+ IRTemp v1 = newTemp(Ity_V128);
|
||||||
|
+ IRTemp v2 = newTemp(Ity_V128);
|
||||||
|
+ IRTemp v3 = newTemp(Ity_V128);
|
||||||
|
+ assign( v0, result );
|
||||||
|
+ assign( v1, binop(Iop_ShrV128, result, mkU8(32)) );
|
||||||
|
+ assign( v2, binop(Iop_ShrV128, result, mkU8(64)) );
|
||||||
|
+ assign( v3, binop(Iop_ShrV128, result, mkU8(96)) );
|
||||||
|
+ assign(overlappedOred,
|
||||||
|
+ binop(Iop_OrV128,
|
||||||
|
+ binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)),
|
||||||
|
+ binop(Iop_OrV128, mkexpr(v2), mkexpr(v3))));
|
||||||
|
+ assign(overlappedAnded,
|
||||||
|
+ binop(Iop_AndV128,
|
||||||
|
+ binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
|
||||||
|
+ binop(Iop_AndV128, mkexpr(v2), mkexpr(v3))));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ IRTemp rOnes = newTemp(Ity_I8);
|
||||||
|
+ IRTemp rZeroes = newTemp(Ity_I8);
|
||||||
|
|
||||||
|
- assign( rZeros, unop(Iop_1Uto8,
|
||||||
|
- binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
|
||||||
|
- unop(Iop_Not32,
|
||||||
|
- unop(Iop_V128to32,
|
||||||
|
- binop(Iop_OrV128,
|
||||||
|
- binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)),
|
||||||
|
- binop(Iop_OrV128, mkexpr(v2), mkexpr(v3))))
|
||||||
|
- ))) );
|
||||||
|
+ if (mode64) {
|
||||||
|
+ assign(rZeroes,
|
||||||
|
+ unop(Iop_1Uto8,
|
||||||
|
+ binop(Iop_CmpEQ64,
|
||||||
|
+ mkU64(0),
|
||||||
|
+ unop(Iop_V128to64, mkexpr(overlappedOred)))));
|
||||||
|
+ assign(rOnes,
|
||||||
|
+ unop(Iop_1Uto8,
|
||||||
|
+ binop(Iop_CmpEQ64,
|
||||||
|
+ mkU64(0),
|
||||||
|
+ unop(Iop_Not64,
|
||||||
|
+ unop(Iop_V128to64, mkexpr(overlappedAnded))))));
|
||||||
|
+ } else {
|
||||||
|
+ assign(rZeroes,
|
||||||
|
+ unop(Iop_1Uto8,
|
||||||
|
+ binop(Iop_CmpEQ32,
|
||||||
|
+ mkU32(0),
|
||||||
|
+ unop(Iop_V128to32, mkexpr(overlappedOred)))));
|
||||||
|
+ assign(rOnes,
|
||||||
|
+ unop(Iop_1Uto8,
|
||||||
|
+ binop(Iop_CmpEQ32,
|
||||||
|
+ mkU32(0),
|
||||||
|
+ unop(Iop_Not32,
|
||||||
|
+ unop(Iop_V128to32, mkexpr(overlappedAnded))))));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // rOnes might not be used below. But iropt will remove it, so there's no
|
||||||
|
+ // inefficiency as a result.
|
||||||
|
|
||||||
|
if (test_all_ones) {
|
||||||
|
- assign( rOnes, unop(Iop_1Uto8,
|
||||||
|
- binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
|
||||||
|
- unop(Iop_V128to32,
|
||||||
|
- binop(Iop_AndV128,
|
||||||
|
- binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
|
||||||
|
- binop(Iop_AndV128, mkexpr(v2), mkexpr(v3)))
|
||||||
|
- ))) );
|
||||||
|
putCR321( 6, binop(Iop_Or8,
|
||||||
|
binop(Iop_Shl8, mkexpr(rOnes), mkU8(3)),
|
||||||
|
- binop(Iop_Shl8, mkexpr(rZeros), mkU8(1))) );
|
||||||
|
+ binop(Iop_Shl8, mkexpr(rZeroes), mkU8(1))) );
|
||||||
|
} else {
|
||||||
|
- putCR321( 6, binop(Iop_Shl8, mkexpr(rZeros), mkU8(1)) );
|
||||||
|
+ putCR321( 6, binop(Iop_Shl8, mkexpr(rZeroes), mkU8(1)) );
|
||||||
|
}
|
||||||
|
putCR0( 6, mkU8(0) );
|
||||||
|
}
|
||||||
|
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
|
||||||
|
index c24db91..7f69ee3 100644
|
||||||
|
--- a/memcheck/mc_translate.c
|
||||||
|
+++ b/memcheck/mc_translate.c
|
||||||
|
@@ -8322,6 +8322,9 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure,
|
||||||
|
# elif defined(VGA_amd64)
|
||||||
|
mce.dlbo.dl_Add64 = DLauto;
|
||||||
|
mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive;
|
||||||
|
+# elif defined(VGA_ppc64le)
|
||||||
|
+ // Needed by (at least) set_AV_CR6() in the front end.
|
||||||
|
+ mce.dlbo.dl_CmpEQ64_CmpNE64 = DLexpensive;
|
||||||
|
# endif
|
||||||
|
|
||||||
|
/* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then
|
244
SOURCES/valgrind-3.14.0-sigkill.patch
Normal file
244
SOURCES/valgrind-3.14.0-sigkill.patch
Normal file
@ -0,0 +1,244 @@
|
|||||||
|
commit 0c701ba2a4b10a5f6f3fae31cb0ec6ca034d51d9
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Fri Dec 7 14:01:20 2018 +0100
|
||||||
|
|
||||||
|
Fix sigkill.stderr.exp for glibc-2.28.
|
||||||
|
|
||||||
|
glibc 2.28 filters out some bad signal numbers and returns
|
||||||
|
Invalid argument instead of passing such bad signal numbers
|
||||||
|
the kernel sigaction syscall. So we won't see such bad signal
|
||||||
|
numbers and won't print "bad signal number" ourselves.
|
||||||
|
|
||||||
|
Add a new memcheck/tests/sigkill.stderr.exp-glibc-2.28 to catch
|
||||||
|
this case.
|
||||||
|
|
||||||
|
diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
|
||||||
|
index 76e0e90..2af4dd1 100644
|
||||||
|
--- a/memcheck/tests/Makefile.am
|
||||||
|
+++ b/memcheck/tests/Makefile.am
|
||||||
|
@@ -260,7 +260,8 @@ EXTRA_DIST = \
|
||||||
|
sh-mem-random.stdout.exp sh-mem-random.vgtest \
|
||||||
|
sigaltstack.stderr.exp sigaltstack.vgtest \
|
||||||
|
sigkill.stderr.exp sigkill.stderr.exp-darwin sigkill.stderr.exp-mips32 \
|
||||||
|
- sigkill.stderr.exp-solaris sigkill.vgtest \
|
||||||
|
+ sigkill.stderr.exp-solaris \
|
||||||
|
+ sigkill.stderr.exp-glibc-2.28 sigkill.vgtest \
|
||||||
|
signal2.stderr.exp signal2.stdout.exp signal2.vgtest \
|
||||||
|
sigprocmask.stderr.exp sigprocmask.stderr.exp2 sigprocmask.vgtest \
|
||||||
|
static_malloc.stderr.exp static_malloc.vgtest \
|
||||||
|
diff --git a/memcheck/tests/sigkill.stderr.exp-glibc-2.28 b/memcheck/tests/sigkill.stderr.exp-glibc-2.28
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..0e5f0cb
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/memcheck/tests/sigkill.stderr.exp-glibc-2.28
|
||||||
|
@@ -0,0 +1,197 @@
|
||||||
|
+
|
||||||
|
+setting signal 1: Success
|
||||||
|
+getting signal 1: Success
|
||||||
|
+
|
||||||
|
+setting signal 2: Success
|
||||||
|
+getting signal 2: Success
|
||||||
|
+
|
||||||
|
+setting signal 3: Success
|
||||||
|
+getting signal 3: Success
|
||||||
|
+
|
||||||
|
+setting signal 4: Success
|
||||||
|
+getting signal 4: Success
|
||||||
|
+
|
||||||
|
+setting signal 5: Success
|
||||||
|
+getting signal 5: Success
|
||||||
|
+
|
||||||
|
+setting signal 6: Success
|
||||||
|
+getting signal 6: Success
|
||||||
|
+
|
||||||
|
+setting signal 7: Success
|
||||||
|
+getting signal 7: Success
|
||||||
|
+
|
||||||
|
+setting signal 8: Success
|
||||||
|
+getting signal 8: Success
|
||||||
|
+
|
||||||
|
+setting signal 9: Warning: ignored attempt to set SIGKILL handler in sigaction();
|
||||||
|
+ the SIGKILL signal is uncatchable
|
||||||
|
+Invalid argument
|
||||||
|
+getting signal 9: Success
|
||||||
|
+
|
||||||
|
+setting signal 10: Success
|
||||||
|
+getting signal 10: Success
|
||||||
|
+
|
||||||
|
+setting signal 11: Success
|
||||||
|
+getting signal 11: Success
|
||||||
|
+
|
||||||
|
+setting signal 12: Success
|
||||||
|
+getting signal 12: Success
|
||||||
|
+
|
||||||
|
+setting signal 13: Success
|
||||||
|
+getting signal 13: Success
|
||||||
|
+
|
||||||
|
+setting signal 14: Success
|
||||||
|
+getting signal 14: Success
|
||||||
|
+
|
||||||
|
+setting signal 15: Success
|
||||||
|
+getting signal 15: Success
|
||||||
|
+
|
||||||
|
+setting signal 16: Success
|
||||||
|
+getting signal 16: Success
|
||||||
|
+
|
||||||
|
+setting signal 17: Success
|
||||||
|
+getting signal 17: Success
|
||||||
|
+
|
||||||
|
+setting signal 18: Success
|
||||||
|
+getting signal 18: Success
|
||||||
|
+
|
||||||
|
+setting signal 19: Warning: ignored attempt to set SIGSTOP handler in sigaction();
|
||||||
|
+ the SIGSTOP signal is uncatchable
|
||||||
|
+Invalid argument
|
||||||
|
+getting signal 19: Success
|
||||||
|
+
|
||||||
|
+setting signal 20: Success
|
||||||
|
+getting signal 20: Success
|
||||||
|
+
|
||||||
|
+setting signal 21: Success
|
||||||
|
+getting signal 21: Success
|
||||||
|
+
|
||||||
|
+setting signal 22: Success
|
||||||
|
+getting signal 22: Success
|
||||||
|
+
|
||||||
|
+setting signal 23: Success
|
||||||
|
+getting signal 23: Success
|
||||||
|
+
|
||||||
|
+setting signal 24: Success
|
||||||
|
+getting signal 24: Success
|
||||||
|
+
|
||||||
|
+setting signal 25: Success
|
||||||
|
+getting signal 25: Success
|
||||||
|
+
|
||||||
|
+setting signal 26: Success
|
||||||
|
+getting signal 26: Success
|
||||||
|
+
|
||||||
|
+setting signal 27: Success
|
||||||
|
+getting signal 27: Success
|
||||||
|
+
|
||||||
|
+setting signal 28: Success
|
||||||
|
+getting signal 28: Success
|
||||||
|
+
|
||||||
|
+setting signal 29: Success
|
||||||
|
+getting signal 29: Success
|
||||||
|
+
|
||||||
|
+setting signal 30: Success
|
||||||
|
+getting signal 30: Success
|
||||||
|
+
|
||||||
|
+setting signal 31: Success
|
||||||
|
+getting signal 31: Success
|
||||||
|
+
|
||||||
|
+setting signal 34: Success
|
||||||
|
+getting signal 34: Success
|
||||||
|
+
|
||||||
|
+setting signal 35: Success
|
||||||
|
+getting signal 35: Success
|
||||||
|
+
|
||||||
|
+setting signal 36: Success
|
||||||
|
+getting signal 36: Success
|
||||||
|
+
|
||||||
|
+setting signal 37: Success
|
||||||
|
+getting signal 37: Success
|
||||||
|
+
|
||||||
|
+setting signal 38: Success
|
||||||
|
+getting signal 38: Success
|
||||||
|
+
|
||||||
|
+setting signal 39: Success
|
||||||
|
+getting signal 39: Success
|
||||||
|
+
|
||||||
|
+setting signal 40: Success
|
||||||
|
+getting signal 40: Success
|
||||||
|
+
|
||||||
|
+setting signal 41: Success
|
||||||
|
+getting signal 41: Success
|
||||||
|
+
|
||||||
|
+setting signal 42: Success
|
||||||
|
+getting signal 42: Success
|
||||||
|
+
|
||||||
|
+setting signal 43: Success
|
||||||
|
+getting signal 43: Success
|
||||||
|
+
|
||||||
|
+setting signal 44: Success
|
||||||
|
+getting signal 44: Success
|
||||||
|
+
|
||||||
|
+setting signal 45: Success
|
||||||
|
+getting signal 45: Success
|
||||||
|
+
|
||||||
|
+setting signal 46: Success
|
||||||
|
+getting signal 46: Success
|
||||||
|
+
|
||||||
|
+setting signal 47: Success
|
||||||
|
+getting signal 47: Success
|
||||||
|
+
|
||||||
|
+setting signal 48: Success
|
||||||
|
+getting signal 48: Success
|
||||||
|
+
|
||||||
|
+setting signal 49: Success
|
||||||
|
+getting signal 49: Success
|
||||||
|
+
|
||||||
|
+setting signal 50: Success
|
||||||
|
+getting signal 50: Success
|
||||||
|
+
|
||||||
|
+setting signal 51: Success
|
||||||
|
+getting signal 51: Success
|
||||||
|
+
|
||||||
|
+setting signal 52: Success
|
||||||
|
+getting signal 52: Success
|
||||||
|
+
|
||||||
|
+setting signal 53: Success
|
||||||
|
+getting signal 53: Success
|
||||||
|
+
|
||||||
|
+setting signal 54: Success
|
||||||
|
+getting signal 54: Success
|
||||||
|
+
|
||||||
|
+setting signal 55: Success
|
||||||
|
+getting signal 55: Success
|
||||||
|
+
|
||||||
|
+setting signal 56: Success
|
||||||
|
+getting signal 56: Success
|
||||||
|
+
|
||||||
|
+setting signal 57: Success
|
||||||
|
+getting signal 57: Success
|
||||||
|
+
|
||||||
|
+setting signal 58: Success
|
||||||
|
+getting signal 58: Success
|
||||||
|
+
|
||||||
|
+setting signal 59: Success
|
||||||
|
+getting signal 59: Success
|
||||||
|
+
|
||||||
|
+setting signal 60: Success
|
||||||
|
+getting signal 60: Success
|
||||||
|
+
|
||||||
|
+setting signal 61: Success
|
||||||
|
+getting signal 61: Success
|
||||||
|
+
|
||||||
|
+setting signal 62: Success
|
||||||
|
+getting signal 62: Success
|
||||||
|
+
|
||||||
|
+setting signal 65: Invalid argument
|
||||||
|
+getting signal 65: Invalid argument
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+HEAP SUMMARY:
|
||||||
|
+ in use at exit: ... bytes in ... blocks
|
||||||
|
+ total heap usage: ... allocs, ... frees, ... bytes allocated
|
||||||
|
+
|
||||||
|
+For a detailed leak analysis, rerun with: --leak-check=full
|
||||||
|
+
|
||||||
|
+For counts of detected and suppressed errors, rerun with: -v
|
||||||
|
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
|
||||||
|
diff -ur valgrind-3.14.0.orig/memcheck/tests/Makefile.in valgrind-3.14.0/memcheck/tests/Makefile.in
|
||||||
|
--- valgrind-3.14.0.orig/memcheck/tests/Makefile.in 2018-12-13 00:30:45.013839247 +0100
|
||||||
|
+++ valgrind-3.14.0/memcheck/tests/Makefile.in 2018-12-13 00:30:54.242636002 +0100
|
||||||
|
@@ -1573,7 +1573,8 @@
|
||||||
|
sh-mem-random.stdout.exp sh-mem-random.vgtest \
|
||||||
|
sigaltstack.stderr.exp sigaltstack.vgtest \
|
||||||
|
sigkill.stderr.exp sigkill.stderr.exp-darwin sigkill.stderr.exp-mips32 \
|
||||||
|
- sigkill.stderr.exp-solaris sigkill.vgtest \
|
||||||
|
+ sigkill.stderr.exp-solaris \
|
||||||
|
+ sigkill.stderr.exp-glibc-2.28 sigkill.vgtest \
|
||||||
|
signal2.stderr.exp signal2.stdout.exp signal2.vgtest \
|
||||||
|
sigprocmask.stderr.exp sigprocmask.stderr.exp2 sigprocmask.vgtest \
|
||||||
|
static_malloc.stderr.exp static_malloc.vgtest \
|
82
SOURCES/valgrind-3.14.0-transform-popcount64-ctznat64.patch
Normal file
82
SOURCES/valgrind-3.14.0-transform-popcount64-ctznat64.patch
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
commit cb5d7e047598bff6d0f1d707a70d9fb1a1c7f0e2
|
||||||
|
Author: Julian Seward <jseward@acm.org>
|
||||||
|
Date: Tue Nov 20 11:46:55 2018 +0100
|
||||||
|
|
||||||
|
VEX/priv/ir_opt.c
|
||||||
|
|
||||||
|
fold_Expr: transform PopCount64(And64(Add64(x,-1),Not64(x))) into CtzNat64(x).
|
||||||
|
|
||||||
|
This is part of the fix for bug 386945.
|
||||||
|
|
||||||
|
diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c
|
||||||
|
index f40870b..23964be 100644
|
||||||
|
--- a/VEX/priv/ir_opt.c
|
||||||
|
+++ b/VEX/priv/ir_opt.c
|
||||||
|
@@ -1377,6 +1377,8 @@ static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
|
||||||
|
case Iex_Unop:
|
||||||
|
/* UNARY ops */
|
||||||
|
if (e->Iex.Unop.arg->tag == Iex_Const) {
|
||||||
|
+
|
||||||
|
+ /* cases where the arg is a const */
|
||||||
|
switch (e->Iex.Unop.op) {
|
||||||
|
case Iop_1Uto8:
|
||||||
|
e2 = IRExpr_Const(IRConst_U8(toUChar(
|
||||||
|
@@ -1690,8 +1692,56 @@ static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
|
||||||
|
|
||||||
|
default:
|
||||||
|
goto unhandled;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
+ } // switch (e->Iex.Unop.op)
|
||||||
|
+
|
||||||
|
+ } else {
|
||||||
|
+
|
||||||
|
+ /* other cases (identities, etc) */
|
||||||
|
+ switch (e->Iex.Unop.op) {
|
||||||
|
+ case Iop_PopCount64: {
|
||||||
|
+ // PopCount64( And64( Add64(x,-1), Not64(x) ) ) ==> CtzNat64(x)
|
||||||
|
+ // bindings:
|
||||||
|
+ // a1:And64( a11:Add64(a111:x,a112:-1), a12:Not64(a121:x) )
|
||||||
|
+ IRExpr* a1 = chase(env, e->Iex.Unop.arg);
|
||||||
|
+ if (!a1)
|
||||||
|
+ goto nomatch;
|
||||||
|
+ if (a1->tag != Iex_Binop || a1->Iex.Binop.op != Iop_And64)
|
||||||
|
+ goto nomatch;
|
||||||
|
+ // a1 is established
|
||||||
|
+ IRExpr* a11 = chase(env, a1->Iex.Binop.arg1);
|
||||||
|
+ if (!a11)
|
||||||
|
+ goto nomatch;
|
||||||
|
+ if (a11->tag != Iex_Binop || a11->Iex.Binop.op != Iop_Add64)
|
||||||
|
+ goto nomatch;
|
||||||
|
+ // a11 is established
|
||||||
|
+ IRExpr* a12 = chase(env, a1->Iex.Binop.arg2);
|
||||||
|
+ if (!a12)
|
||||||
|
+ goto nomatch;
|
||||||
|
+ if (a12->tag != Iex_Unop || a12->Iex.Unop.op != Iop_Not64)
|
||||||
|
+ goto nomatch;
|
||||||
|
+ // a12 is established
|
||||||
|
+ IRExpr* a111 = a11->Iex.Binop.arg1;
|
||||||
|
+ IRExpr* a112 = chase(env, a11->Iex.Binop.arg2);
|
||||||
|
+ IRExpr* a121 = a12->Iex.Unop.arg;
|
||||||
|
+ if (!a111 || !a112 || !a121)
|
||||||
|
+ goto nomatch;
|
||||||
|
+ // a111 and a121 need to be the same temp.
|
||||||
|
+ if (!eqIRAtom(a111, a121))
|
||||||
|
+ goto nomatch;
|
||||||
|
+ // Finally, a112 must be a 64-bit version of -1.
|
||||||
|
+ if (!isOnesU(a112))
|
||||||
|
+ goto nomatch;
|
||||||
|
+ // Match established. Transform.
|
||||||
|
+ e2 = IRExpr_Unop(Iop_CtzNat64, a111);
|
||||||
|
+ break;
|
||||||
|
+ nomatch:
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ default:
|
||||||
|
+ break;
|
||||||
|
+ } // switch (e->Iex.Unop.op)
|
||||||
|
+
|
||||||
|
+ } // if (e->Iex.Unop.arg->tag == Iex_Const)
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Iex_Binop:
|
98
SOURCES/valgrind-3.14.0-undef_malloc_args.patch
Normal file
98
SOURCES/valgrind-3.14.0-undef_malloc_args.patch
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
commit 262275da43425ba2b8c240e47063e36b39167996
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Wed Dec 12 13:55:01 2018 +0100
|
||||||
|
|
||||||
|
Fix memcheck/tests/undef_malloc_args testcase.
|
||||||
|
|
||||||
|
diff --git a/coregrind/m_replacemalloc/vg_replace_malloc.c b/coregrind/m_replacemalloc/vg_replace_malloc.c
|
||||||
|
index 28bdb4a..564829a 100644
|
||||||
|
--- a/coregrind/m_replacemalloc/vg_replace_malloc.c
|
||||||
|
+++ b/coregrind/m_replacemalloc/vg_replace_malloc.c
|
||||||
|
@@ -216,9 +216,19 @@ static void init(void);
|
||||||
|
Apart of allowing memcheck to detect an error, the macro
|
||||||
|
TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED has no effect and
|
||||||
|
has a minimal cost for other tools replacing malloc functions.
|
||||||
|
+
|
||||||
|
+ Creating an "artificial" use of _x that works reliably is not entirely
|
||||||
|
+ straightforward. Simply comparing it against zero often produces no
|
||||||
|
+ warning if _x contains at least one nonzero bit is defined, because
|
||||||
|
+ Memcheck knows that the result of the comparison will be defined (cf
|
||||||
|
+ expensiveCmpEQorNE).
|
||||||
|
+
|
||||||
|
+ Really we want to PCast _x, so as to create a value which is entirely
|
||||||
|
+ undefined if any bit of _x is undefined. But there's no portable way to do
|
||||||
|
+ that.
|
||||||
|
*/
|
||||||
|
-#define TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED(x) \
|
||||||
|
- if ((ULong)x == 0) __asm__ __volatile__( "" ::: "memory" )
|
||||||
|
+#define TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED(_x) \
|
||||||
|
+ if ((UWord)(_x) == 0) __asm__ __volatile__( "" ::: "memory" )
|
||||||
|
|
||||||
|
/*---------------------- malloc ----------------------*/
|
||||||
|
|
||||||
|
@@ -504,7 +514,7 @@ static void init(void);
|
||||||
|
void VG_REPLACE_FUNCTION_EZU(10040,soname,fnname) (void *zone, void *p) \
|
||||||
|
{ \
|
||||||
|
DO_INIT; \
|
||||||
|
- TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED((UWord) zone); \
|
||||||
|
+ TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED((UWord)zone ^ (UWord)p); \
|
||||||
|
MALLOC_TRACE(#fnname "(%p, %p)\n", zone, p ); \
|
||||||
|
if (p == NULL) \
|
||||||
|
return; \
|
||||||
|
diff --git a/memcheck/tests/undef_malloc_args.c b/memcheck/tests/undef_malloc_args.c
|
||||||
|
index 99e2799..654d70d 100644
|
||||||
|
--- a/memcheck/tests/undef_malloc_args.c
|
||||||
|
+++ b/memcheck/tests/undef_malloc_args.c
|
||||||
|
@@ -11,29 +11,29 @@ int main (int argc, char*argv[])
|
||||||
|
|
||||||
|
{
|
||||||
|
size_t size = def_size;
|
||||||
|
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, 1);
|
||||||
|
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, sizeof(size));
|
||||||
|
p = malloc(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&p, 1);
|
||||||
|
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&p, sizeof(p));
|
||||||
|
new_p = realloc(p, def_size);
|
||||||
|
|
||||||
|
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&new_p, 1);
|
||||||
|
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&new_p, sizeof(new_p));
|
||||||
|
new_p = realloc(new_p, def_size);
|
||||||
|
|
||||||
|
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&new_p, 1);
|
||||||
|
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&new_p, sizeof(new_p));
|
||||||
|
free (new_p);
|
||||||
|
|
||||||
|
{
|
||||||
|
size_t nmemb = 1;
|
||||||
|
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&nmemb, 1);
|
||||||
|
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&nmemb, sizeof(nmemb));
|
||||||
|
new_p = calloc(nmemb, def_size);
|
||||||
|
free (new_p);
|
||||||
|
}
|
||||||
|
#if 0
|
||||||
|
{
|
||||||
|
size_t alignment = 1;
|
||||||
|
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&alignment, 1);
|
||||||
|
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&alignment, sizeof(alignment));
|
||||||
|
new_p = memalign(alignment, def_size);
|
||||||
|
free(new_p);
|
||||||
|
}
|
||||||
|
@@ -41,14 +41,14 @@ int main (int argc, char*argv[])
|
||||||
|
{
|
||||||
|
size_t nmemb = 16;
|
||||||
|
size_t size = def_size;
|
||||||
|
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, 1);
|
||||||
|
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, sizeof(size));
|
||||||
|
new_p = memalign(nmemb, size);
|
||||||
|
free(new_p);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
size_t size = def_size;
|
||||||
|
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, 1);
|
||||||
|
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, sizeof(size));
|
||||||
|
new_p = valloc(size);
|
||||||
|
free (new_p);
|
||||||
|
}
|
89
SOURCES/valgrind-3.14.0-wcsncmp.patch
Normal file
89
SOURCES/valgrind-3.14.0-wcsncmp.patch
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
commit 5fdabb72fdcba6bcf788eaa19c1ee557c13b8a7a
|
||||||
|
Author: Mark Wielaard <mark@klomp.org>
|
||||||
|
Date: Sat Dec 1 23:54:40 2018 +0100
|
||||||
|
|
||||||
|
Bug 401627 - Add wcsncmp override and testcase.
|
||||||
|
|
||||||
|
glibc 2.28 added an avx2 optimized variant of wstrncmp which memcheck
|
||||||
|
cannot proof correct. Add a simple override in vg_replace_strmem.c.
|
||||||
|
|
||||||
|
diff --git a/memcheck/tests/wcs.c b/memcheck/tests/wcs.c
|
||||||
|
index 15730ad..538304b 100644
|
||||||
|
--- a/memcheck/tests/wcs.c
|
||||||
|
+++ b/memcheck/tests/wcs.c
|
||||||
|
@@ -1,5 +1,6 @@
|
||||||
|
-// Uses various wchar_t * functions that have hand written SSE assembly
|
||||||
|
-// implementations in glibc. wcslen, wcscpy, wcscmp, wcsrchr, wcschr.
|
||||||
|
+// Uses various wchar_t * functions that have hand written SSE and/or AVX2
|
||||||
|
+// assembly implementations in glibc.
|
||||||
|
+// wcslen, wcscpy, wcscmp, wcsncmp, wcsrchr, wcschr.
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
@@ -18,6 +19,8 @@ int main(int argc, char **argv)
|
||||||
|
c = wcscpy (b, a);
|
||||||
|
|
||||||
|
fprintf (stderr, "wcscmp equal: %d\n", wcscmp (a, b)); // wcscmp equal: 0
|
||||||
|
+ fprintf (stderr,
|
||||||
|
+ "wcsncmp equal: %d\n", wcsncmp (a, b, l)); // wcsncmp equal: 0
|
||||||
|
|
||||||
|
d = wcsrchr (a, L'd');
|
||||||
|
e = wcschr (a, L'd');
|
||||||
|
diff --git a/memcheck/tests/wcs.stderr.exp b/memcheck/tests/wcs.stderr.exp
|
||||||
|
index 41d74c8..d5b5959 100644
|
||||||
|
--- a/memcheck/tests/wcs.stderr.exp
|
||||||
|
+++ b/memcheck/tests/wcs.stderr.exp
|
||||||
|
@@ -1,3 +1,4 @@
|
||||||
|
wcslen: 53
|
||||||
|
wcscmp equal: 0
|
||||||
|
+wcsncmp equal: 0
|
||||||
|
wcsrchr == wcschr: 1
|
||||||
|
diff --git a/shared/vg_replace_strmem.c b/shared/vg_replace_strmem.c
|
||||||
|
index d6927f0..89a7dcc 100644
|
||||||
|
--- a/shared/vg_replace_strmem.c
|
||||||
|
+++ b/shared/vg_replace_strmem.c
|
||||||
|
@@ -103,6 +103,7 @@
|
||||||
|
20420 STPNCPY
|
||||||
|
20430 WMEMCHR
|
||||||
|
20440 WCSNLEN
|
||||||
|
+ 20450 WSTRNCMP
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(VGO_solaris)
|
||||||
|
@@ -1927,6 +1928,36 @@ static inline void my_exit ( int x )
|
||||||
|
WCSCMP(VG_Z_LIBC_SONAME, wcscmp)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+/*---------------------- wcsncmp ----------------------*/
|
||||||
|
+
|
||||||
|
+// This is a wchar_t equivalent to strncmp. We don't
|
||||||
|
+// have wchar_t available here, but in the GNU C Library
|
||||||
|
+// wchar_t is always 32 bits wide and wcsncmp uses signed
|
||||||
|
+// comparison, not unsigned as in strncmp function.
|
||||||
|
+
|
||||||
|
+#define WCSNCMP(soname, fnname) \
|
||||||
|
+ int VG_REPLACE_FUNCTION_EZU(20450,soname,fnname) \
|
||||||
|
+ ( const Int* s1, const Int* s2, SizeT nmax ); \
|
||||||
|
+ int VG_REPLACE_FUNCTION_EZU(20450,soname,fnname) \
|
||||||
|
+ ( const Int* s1, const Int* s2, SizeT nmax ) \
|
||||||
|
+ { \
|
||||||
|
+ SizeT n = 0; \
|
||||||
|
+ while (True) { \
|
||||||
|
+ if (n >= nmax) return 0; \
|
||||||
|
+ if (*s1 == 0 && *s2 == 0) return 0; \
|
||||||
|
+ if (*s1 == 0) return -1; \
|
||||||
|
+ if (*s2 == 0) return 1; \
|
||||||
|
+ \
|
||||||
|
+ if (*s1 < *s2) return -1; \
|
||||||
|
+ if (*s1 > *s2) return 1; \
|
||||||
|
+ \
|
||||||
|
+ s1++; s2++; n++; \
|
||||||
|
+ } \
|
||||||
|
+ }
|
||||||
|
+#if defined(VGO_linux)
|
||||||
|
+ WCSNCMP(VG_Z_LIBC_SONAME, wcsncmp)
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
/*---------------------- wcscpy ----------------------*/
|
||||||
|
|
||||||
|
// This is a wchar_t equivalent to strcpy. We don't
|
54
SOURCES/valgrind-3.9.0-cachegrind-improvements.patch
Normal file
54
SOURCES/valgrind-3.9.0-cachegrind-improvements.patch
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
--- valgrind-3.8.1/cachegrind/cg_sim.c.jj 2011-10-26 23:24:32.000000000 +0200
|
||||||
|
+++ valgrind-3.8.1/cachegrind/cg_sim.c 2011-12-09 17:31:19.256023683 +0100
|
||||||
|
@@ -42,27 +42,30 @@ typedef struct {
|
||||||
|
Int size; /* bytes */
|
||||||
|
Int assoc;
|
||||||
|
Int line_size; /* bytes */
|
||||||
|
- Int sets;
|
||||||
|
Int sets_min_1;
|
||||||
|
Int line_size_bits;
|
||||||
|
Int tag_shift;
|
||||||
|
- HChar desc_line[128]; /* large enough */
|
||||||
|
UWord* tags;
|
||||||
|
-} cache_t2;
|
||||||
|
+ HChar desc_line[128];
|
||||||
|
+} cache_t2
|
||||||
|
+#ifdef __GNUC__
|
||||||
|
+__attribute__ ((aligned (8 * sizeof (Int))))
|
||||||
|
+#endif
|
||||||
|
+;
|
||||||
|
|
||||||
|
/* By this point, the size/assoc/line_size has been checked. */
|
||||||
|
static void cachesim_initcache(cache_t config, cache_t2* c)
|
||||||
|
{
|
||||||
|
- Int i;
|
||||||
|
+ Int sets;
|
||||||
|
|
||||||
|
c->size = config.size;
|
||||||
|
c->assoc = config.assoc;
|
||||||
|
c->line_size = config.line_size;
|
||||||
|
|
||||||
|
- c->sets = (c->size / c->line_size) / c->assoc;
|
||||||
|
- c->sets_min_1 = c->sets - 1;
|
||||||
|
+ sets = (c->size / c->line_size) / c->assoc;
|
||||||
|
+ c->sets_min_1 = sets - 1;
|
||||||
|
c->line_size_bits = VG_(log2)(c->line_size);
|
||||||
|
- c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
|
||||||
|
+ c->tag_shift = c->line_size_bits + VG_(log2)(sets);
|
||||||
|
|
||||||
|
if (c->assoc == 1) {
|
||||||
|
VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
|
||||||
|
@@ -72,11 +75,8 @@ static void cachesim_initcache(cache_t c
|
||||||
|
c->size, c->line_size, c->assoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
- c->tags = VG_(malloc)("cg.sim.ci.1",
|
||||||
|
- sizeof(UWord) * c->sets * c->assoc);
|
||||||
|
-
|
||||||
|
- for (i = 0; i < c->sets * c->assoc; i++)
|
||||||
|
- c->tags[i] = 0;
|
||||||
|
+ c->tags = VG_(calloc)("cg.sim.ci.1",
|
||||||
|
+ sizeof(UWord), sets * c->assoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This attribute forces GCC to inline the function, getting rid of a
|
15
SOURCES/valgrind-3.9.0-helgrind-race-supp.patch
Normal file
15
SOURCES/valgrind-3.9.0-helgrind-race-supp.patch
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
--- valgrind/glibc-2.34567-NPTL-helgrind.supp.jj 2009-08-19 15:37:48.000000000 +0200
|
||||||
|
+++ valgrind/glibc-2.34567-NPTL-helgrind.supp 2009-10-21 16:46:31.000000000 +0200
|
||||||
|
@@ -88,6 +88,12 @@
|
||||||
|
obj:*/lib*/libpthread-2.*so*
|
||||||
|
}
|
||||||
|
{
|
||||||
|
+ helgrind-glibc2X-102a
|
||||||
|
+ Helgrind:Race
|
||||||
|
+ fun:mythread_wrapper
|
||||||
|
+ obj:*vgpreload_helgrind*.so
|
||||||
|
+}
|
||||||
|
+{
|
||||||
|
helgrind-glibc2X-103
|
||||||
|
Helgrind:Race
|
||||||
|
fun:pthread_cond_*@@GLIBC_2.*
|
28
SOURCES/valgrind-3.9.0-ldso-supp.patch
Normal file
28
SOURCES/valgrind-3.9.0-ldso-supp.patch
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
--- valgrind/glibc-2.X.supp.in.jj 2011-10-26 23:24:45.000000000 +0200
|
||||||
|
+++ valgrind/glibc-2.X.supp.in 2012-05-07 10:55:20.395942656 +0200
|
||||||
|
@@ -124,7 +124,7 @@
|
||||||
|
glibc-2.5.x-on-SUSE-10.2-(PPC)-2a
|
||||||
|
Memcheck:Cond
|
||||||
|
fun:index
|
||||||
|
- obj:*ld-@GLIBC_VERSION@.*.so
|
||||||
|
+ obj:*ld-@GLIBC_VERSION@*.so
|
||||||
|
}
|
||||||
|
{
|
||||||
|
glibc-2.5.x-on-SuSE-10.2-(PPC)-2b
|
||||||
|
@@ -136,14 +136,14 @@
|
||||||
|
glibc-2.5.5-on-SuSE-10.2-(PPC)-2c
|
||||||
|
Memcheck:Addr4
|
||||||
|
fun:index
|
||||||
|
- obj:*ld-@GLIBC_VERSION@.*.so
|
||||||
|
+ obj:*ld-@GLIBC_VERSION@*.so
|
||||||
|
}
|
||||||
|
{
|
||||||
|
glibc-2.3.5-on-SuSE-10.1-(PPC)-3
|
||||||
|
Memcheck:Addr4
|
||||||
|
fun:*wordcopy_fwd_dest_aligned*
|
||||||
|
fun:mem*cpy
|
||||||
|
- obj:*lib*@GLIBC_VERSION@.*.so
|
||||||
|
+ obj:*lib*@GLIBC_VERSION@*.so
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
1844
SPECS/valgrind.spec
Normal file
1844
SPECS/valgrind.spec
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user