import valgrind-3.14.0-10.el8_0

This commit is contained in:
CentOS Sources 2019-06-18 12:42:06 -04:00 committed by Andrew Lukoshko
commit 6ee00ced0a
35 changed files with 10126 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
SOURCES/valgrind-3.14.0.tar.bz2

1
.valgrind.metadata Normal file
View File

@ -0,0 +1 @@
182afd405b92ddb6f52c6729e848eacf4b1daf46 SOURCES/valgrind-3.14.0.tar.bz2

View File

@ -0,0 +1,24 @@
commit 43fe4bc236d667257eeebfb4f6bcbe2b92aea455
Author: Mark Wielaard <mark@klomp.org>
Date: Fri Dec 14 14:32:27 2018 +0100
arm64: Fix PTRACE_TRACEME memcheck/tests/linux/getregset.vgtest testcase.
The sys_ptrace post didn't mark the thread as being in traceme mode.
This occassionally would make the memcheck/tests/linux/getregset.vgtest
testcase fail. With this patch it reliably passes.
diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c
index 9ef54b4..650f5b9 100644
--- a/coregrind/m_syswrap/syswrap-arm64-linux.c
+++ b/coregrind/m_syswrap/syswrap-arm64-linux.c
@@ -499,6 +499,9 @@ PRE(sys_ptrace)
POST(sys_ptrace)
{
switch (ARG1) {
+ case VKI_PTRACE_TRACEME:
+ ML_(linux_POST_traceme)(tid);
+ break;
case VKI_PTRACE_PEEKTEXT:
case VKI_PTRACE_PEEKDATA:
case VKI_PTRACE_PEEKUSR:

View File

@ -0,0 +1,18 @@
commit 27fe22378da38424102c5292b782cacdd9d7b9e4
Author: Julian Seward <jseward@acm.org>
Date: Tue Nov 20 12:09:03 2018 +0100
Add support for Iop_{Sar,Shr}8 on ppc. --expensive-definedness-checks=yes needs them.
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
index 5242176..750cf8d 100644
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -1528,7 +1528,6 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
True/*32bit shift*/,
tmp, tmp, amt));
r_srcL = tmp;
- vassert(0); /* AWAITING TEST CASE */
}
}
/* Only 64 expressions need 64bit shifts,

View File

@ -0,0 +1,59 @@
commit be7a73004583aab5d4c97cf55276ca58d5b3090b
Author: Mark Wielaard <mark@klomp.org>
Date: Wed Dec 12 14:15:28 2018 +0100
Mark helper regs defined in final_tidyup before freeres_wrapper call.
In final_tidyup we setup the guest to call the freeres_wrapper, which
will (possibly) call __gnu_cxx::__freeres() and/or __libc_freeres().
In a couple of cases (ppc64be, ppc64le and mips32) this involves setting
up one or more helper registers. Since we setup these guest registers
we should make sure to mark them as fully defined. Otherwise we might
see spurious warnings about undefined value usage if the guest register
happened to not be fully defined before.
This fixes PR402006.
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index 00702fc..22872a2 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -2304,22 +2304,35 @@ static void final_tidyup(ThreadId tid)
"Caught __NR_exit; running %s wrapper\n", msgs[to_run - 1]);
}
- /* set thread context to point to freeres_wrapper */
- /* ppc64be-linux note: freeres_wrapper gives us the real
+ /* Set thread context to point to freeres_wrapper.
+ ppc64be-linux note: freeres_wrapper gives us the real
function entry point, not a fn descriptor, so can use it
directly. However, we need to set R2 (the toc pointer)
appropriately. */
VG_(set_IP)(tid, freeres_wrapper);
+
# if defined(VGP_ppc64be_linux)
VG_(threads)[tid].arch.vex.guest_GPR2 = r2;
+ VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
+ offsetof(VexGuestPPC64State, guest_GPR2),
+ sizeof(VG_(threads)[tid].arch.vex.guest_GPR2));
# elif defined(VGP_ppc64le_linux)
/* setting GPR2 but not really needed, GPR12 is needed */
VG_(threads)[tid].arch.vex.guest_GPR2 = freeres_wrapper;
+ VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
+ offsetof(VexGuestPPC64State, guest_GPR2),
+ sizeof(VG_(threads)[tid].arch.vex.guest_GPR2));
VG_(threads)[tid].arch.vex.guest_GPR12 = freeres_wrapper;
+ VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
+ offsetof(VexGuestPPC64State, guest_GPR12),
+ sizeof(VG_(threads)[tid].arch.vex.guest_GPR12));
# endif
/* mips-linux note: we need to set t9 */
# if defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
VG_(threads)[tid].arch.vex.guest_r25 = freeres_wrapper;
+ VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
+ offsetof(VexGuestMIPS32State, guest_r25),
+ sizeof(VG_(threads)[tid].arch.vex.guest_r25));
# endif
/* Pass a parameter to freeres_wrapper(). */

View File

@ -0,0 +1,81 @@
commit 7f1dd9d5aec1f1fd4eb0ae3a311358a914f1d73f
Author: Julian Seward <jseward@acm.org>
Date: Tue Nov 20 10:18:29 2018 +0100
get_otrack_shadow_offset_wrk for ppc32 and ppc64: add missing cases for XER_OV32, XER_CA32 and C_FPCC.
The missing cases were discovered whilst testing fixes for bug 386945, but are
otherwise unrelated to that bug.
diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c
index 5ed101f..4ce746e 100644
--- a/memcheck/mc_machine.c
+++ b/memcheck/mc_machine.c
@@ -120,11 +120,11 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
Int o = offset;
tl_assert(sz > 0);
-#if defined(VGA_ppc64be)
+# if defined(VGA_ppc64be)
tl_assert(host_is_big_endian());
-#elif defined(VGA_ppc64le)
+# elif defined(VGA_ppc64le)
tl_assert(host_is_little_endian());
-#endif
+# endif
if (sz == 8 || sz == 4) {
/* The point of this is to achieve
@@ -132,11 +132,11 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
return GOF(GPRn);
by testing ox instead of o, and setting ox back 4 bytes when sz == 4.
*/
-#if defined(VGA_ppc64le)
+# if defined(VGA_ppc64le)
Int ox = o;
-#else
+# else
Int ox = sz == 8 ? o : (o - 4);
-#endif
+# endif
if (ox == GOF(GPR0)) return ox;
if (ox == GOF(GPR1)) return ox;
if (ox == GOF(GPR2)) return ox;
@@ -240,11 +240,13 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
if (o == GOF(VSR31) && sz == 8) return o;
/* For the various byte sized XER/CR pieces, use offset 8
- in VSR0 .. VSR19. */
+ in VSR0 .. VSR21. */
tl_assert(SZB(VSR0) == 16);
if (o == GOF(XER_SO) && sz == 1) return 8 +GOF(VSR0);
if (o == GOF(XER_OV) && sz == 1) return 8 +GOF(VSR1);
+ if (o == GOF(XER_OV32) && sz == 1) return 8 +GOF(VSR20);
if (o == GOF(XER_CA) && sz == 1) return 8 +GOF(VSR2);
+ if (o == GOF(XER_CA32) && sz == 1) return 8 +GOF(VSR21);
if (o == GOF(XER_BC) && sz == 1) return 8 +GOF(VSR3);
if (o == GOF(CR0_321) && sz == 1) return 8 +GOF(VSR4);
@@ -388,6 +390,7 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
if (o == GOF(IP_AT_SYSCALL) && sz == 4) return -1; /* slot unused */
if (o == GOF(FPROUND) && sz == 1) return -1;
if (o == GOF(DFPROUND) && sz == 1) return -1;
+ if (o == GOF(C_FPCC) && sz == 1) return -1;
if (o == GOF(VRSAVE) && sz == 4) return -1;
if (o == GOF(EMNOTE) && sz == 4) return -1;
if (o == GOF(CMSTART) && sz == 4) return -1;
@@ -440,11 +443,13 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
if (o == GOF(VSR31) && sz == 8) return o;
/* For the various byte sized XER/CR pieces, use offset 8
- in VSR0 .. VSR19. */
+ in VSR0 .. VSR21. */
tl_assert(SZB(VSR0) == 16);
if (o == GOF(XER_SO) && sz == 1) return 8 +GOF(VSR0);
if (o == GOF(XER_OV) && sz == 1) return 8 +GOF(VSR1);
+ if (o == GOF(XER_OV32) && sz == 1) return 8 +GOF(VSR20);
if (o == GOF(XER_CA) && sz == 1) return 8 +GOF(VSR2);
+ if (o == GOF(XER_CA32) && sz == 1) return 8 +GOF(VSR21);
if (o == GOF(XER_BC) && sz == 1) return 8 +GOF(VSR3);
if (o == GOF(CR0_321) && sz == 1) return 8 +GOF(VSR4);

View File

@ -0,0 +1,654 @@
commit a0d97e88ec6d71239d30a5a4b2b129e094150873
Author: Mark Wielaard <mark@klomp.org>
Date: Thu Dec 6 20:52:22 2018 +0100
Bug 401822 Fix asm constraints for ppc64 jm-vmx jm-insns.c test.
The mfvscr and vor instructions in jm-insns.c had a "=vr" constraint.
This should have been an "=v" constraint. This resolved assembler
warnings and the testcase failing on ppc64le with gcc 8.2 and
binutils 2.30.
diff --git a/none/tests/ppc32/jm-insns.c b/none/tests/ppc32/jm-insns.c
index e1a7da9..be02425 100644
--- a/none/tests/ppc32/jm-insns.c
+++ b/none/tests/ppc32/jm-insns.c
@@ -6269,7 +6269,7 @@ static void test_av_int_one_arg (const char* name, test_func_t func,
for (i=0; i<nb_viargs; i++) {
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
vec_in = (vector unsigned int)viargs[i];
vec_out = (vector unsigned int){ 0,0,0,0 };
@@ -6287,11 +6287,11 @@ static void test_av_int_one_arg (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6333,7 +6333,7 @@ static void test_av_int_two_args (const char* name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6349,11 +6349,11 @@ static void test_av_int_two_args (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6401,7 +6401,7 @@ static void test_av_int_three_args (const char* name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6418,11 +6418,11 @@ static void test_av_int_three_args (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6475,7 +6475,7 @@ static void vs128_cb (const char* name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6491,11 +6491,11 @@ static void vs128_cb (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6548,7 +6548,7 @@ static void vsplt_cb (const char* name, test_func_t func_IN,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6563,11 +6563,11 @@ static void vsplt_cb (const char* name, test_func_t func_IN,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6615,7 +6615,7 @@ static void vspltis_cb (const char* name, test_func_t func_IN,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6627,11 +6627,11 @@ static void vspltis_cb (const char* name, test_func_t func_IN,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6677,7 +6677,7 @@ static void vsldoi_cb (const char* name, test_func_t func_IN,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6693,11 +6693,11 @@ static void vsldoi_cb (const char* name, test_func_t func_IN,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6750,7 +6750,7 @@ static void lvs_cb (const char *name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6762,11 +6762,11 @@ static void lvs_cb (const char *name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6870,7 +6870,7 @@ static void test_av_int_ld_two_regs (const char *name,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6882,11 +6882,11 @@ static void test_av_int_ld_two_regs (const char *name,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6956,7 +6956,7 @@ static void test_av_int_st_three_regs (const char *name,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6974,7 +6974,7 @@ static void test_av_int_st_three_regs (const char *name,
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -7037,7 +7037,7 @@ static void test_av_float_one_arg (const char* name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -7052,11 +7052,11 @@ static void test_av_float_one_arg (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -7099,7 +7099,7 @@ static void test_av_float_two_args (const char* name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -7115,11 +7115,11 @@ static void test_av_float_two_args (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -7168,7 +7168,7 @@ static void test_av_float_three_args (const char* name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -7185,11 +7185,11 @@ static void test_av_float_three_args (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -7276,7 +7276,7 @@ static void vcvt_cb (const char* name, test_func_t func_IN,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -7291,11 +7291,11 @@ static void vcvt_cb (const char* name, test_func_t func_IN,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
diff --git a/none/tests/ppc64/jm-insns.c b/none/tests/ppc64/jm-insns.c
index e1a7da9..be02425 100644
--- a/none/tests/ppc64/jm-insns.c
+++ b/none/tests/ppc64/jm-insns.c
@@ -6269,7 +6269,7 @@ static void test_av_int_one_arg (const char* name, test_func_t func,
for (i=0; i<nb_viargs; i++) {
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
vec_in = (vector unsigned int)viargs[i];
vec_out = (vector unsigned int){ 0,0,0,0 };
@@ -6287,11 +6287,11 @@ static void test_av_int_one_arg (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6333,7 +6333,7 @@ static void test_av_int_two_args (const char* name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6349,11 +6349,11 @@ static void test_av_int_two_args (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6401,7 +6401,7 @@ static void test_av_int_three_args (const char* name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6418,11 +6418,11 @@ static void test_av_int_three_args (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6475,7 +6475,7 @@ static void vs128_cb (const char* name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6491,11 +6491,11 @@ static void vs128_cb (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6548,7 +6548,7 @@ static void vsplt_cb (const char* name, test_func_t func_IN,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6563,11 +6563,11 @@ static void vsplt_cb (const char* name, test_func_t func_IN,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6615,7 +6615,7 @@ static void vspltis_cb (const char* name, test_func_t func_IN,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6627,11 +6627,11 @@ static void vspltis_cb (const char* name, test_func_t func_IN,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6677,7 +6677,7 @@ static void vsldoi_cb (const char* name, test_func_t func_IN,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6693,11 +6693,11 @@ static void vsldoi_cb (const char* name, test_func_t func_IN,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6750,7 +6750,7 @@ static void lvs_cb (const char *name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6762,11 +6762,11 @@ static void lvs_cb (const char *name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6870,7 +6870,7 @@ static void test_av_int_ld_two_regs (const char *name,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6882,11 +6882,11 @@ static void test_av_int_ld_two_regs (const char *name,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -6956,7 +6956,7 @@ static void test_av_int_st_three_regs (const char *name,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -6974,7 +6974,7 @@ static void test_av_int_st_three_regs (const char *name,
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -7037,7 +7037,7 @@ static void test_av_float_one_arg (const char* name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -7052,11 +7052,11 @@ static void test_av_float_one_arg (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -7099,7 +7099,7 @@ static void test_av_float_two_args (const char* name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -7115,11 +7115,11 @@ static void test_av_float_two_args (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -7168,7 +7168,7 @@ static void test_av_float_three_args (const char* name, test_func_t func,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -7185,11 +7185,11 @@ static void test_av_float_three_args (const char* name, test_func_t func,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));
@@ -7276,7 +7276,7 @@ static void vcvt_cb (const char* name, test_func_t func_IN,
/* Save flags */
__asm__ __volatile__ ("mfcr %0" : "=r" (tmpcr));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
// reset VSCR and CR
vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
@@ -7291,11 +7291,11 @@ static void vcvt_cb (const char* name, test_func_t func_IN,
(*func)();
// retrieve output <- r17
- __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+ __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
// get CR,VSCR flags
__asm__ __volatile__ ("mfcr %0" : "=r" (flags));
- __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+ __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
/* Restore flags */
__asm__ __volatile__ ("mtcr %0" : : "r" (tmpcr));

View File

@ -0,0 +1,12 @@
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index c24db91..1e770b3 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -8022,6 +8022,7 @@ static inline void noteTmpUsesIn ( /*MOD*/HowUsed* useEnv,
use info. */
switch (at->tag) {
case Iex_GSPTR:
+ case Iex_VECRET:
case Iex_Const:
return;
case Iex_RdTmp: {

View File

@ -0,0 +1,453 @@
commit e221eca26be6b2396e3fcbf4117e630fc22e79f6
Author: Julian Seward <jseward@acm.org>
Date: Tue Nov 20 11:28:42 2018 +0100
Add Memcheck support for IROps added in 42719898.
memcheck/mc_translate.c:
Add mkRight{32,64} as right-travelling analogues to mkLeft{32,64}.
doCmpORD: for the cases of a signed comparison against zero, compute
definedness of the 3 result bits (lt,gt,eq) separately, and, for the lt and eq
bits, do it exactly accurately.
expensiveCountTrailingZeroes: no functional change. Re-analyse/verify and add
comments.
expensiveCountLeadingZeroes: add. Very similar to
expensiveCountTrailingZeroes.
Add some comments to mark unary ops which are self-shadowing.
Route Iop_Ctz{,Nat}{32,64} through expensiveCountTrailingZeroes.
Route Iop_Clz{,Nat}{32,64} through expensiveCountLeadingZeroes.
Add instrumentation for Iop_PopCount{32,64} and Iop_Reverse8sIn32_x1.
memcheck/tests/vbit-test/irops.c
Add dummy new entries for all new IROps, just enough to make it compile and
run.
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index 68a2ab3..c24db91 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -737,6 +737,34 @@ static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
}
+/* --------- The Right-family of operations. --------- */
+
+/* Unfortunately these are a lot more expensive then their Left
+ counterparts. Fortunately they are only very rarely used -- only for
+ count-leading-zeroes instrumentation. */
+
+static IRAtom* mkRight32 ( MCEnv* mce, IRAtom* a1 )
+{
+ for (Int i = 1; i <= 16; i *= 2) {
+ // a1 |= (a1 >>u i)
+ IRAtom* tmp
+ = assignNew('V', mce, Ity_I32, binop(Iop_Shr32, a1, mkU8(i)));
+ a1 = assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, tmp));
+ }
+ return a1;
+}
+
+static IRAtom* mkRight64 ( MCEnv* mce, IRAtom* a1 )
+{
+ for (Int i = 1; i <= 32; i *= 2) {
+ // a1 |= (a1 >>u i)
+ IRAtom* tmp
+ = assignNew('V', mce, Ity_I64, binop(Iop_Shr64, a1, mkU8(i)));
+ a1 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, tmp));
+ }
+ return a1;
+}
+
/* --------- 'Improvement' functions for AND/OR. --------- */
/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
@@ -1280,20 +1308,18 @@ static IRAtom* doCmpORD ( MCEnv* mce,
IRAtom* xxhash, IRAtom* yyhash,
IRAtom* xx, IRAtom* yy )
{
- Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
- Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
- IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
- IROp opAND = m64 ? Iop_And64 : Iop_And32;
- IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
- IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
- IRType ty = m64 ? Ity_I64 : Ity_I32;
- Int width = m64 ? 64 : 32;
+ Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
+ Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
+ IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
+ IROp opAND = m64 ? Iop_And64 : Iop_And32;
+ IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
+ IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
+ IROp op1UtoWS = m64 ? Iop_1Uto64 : Iop_1Uto32;
+ IRType ty = m64 ? Ity_I64 : Ity_I32;
+ Int width = m64 ? 64 : 32;
Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
- IRAtom* threeLeft1 = NULL;
- IRAtom* sevenLeft1 = NULL;
-
tl_assert(isShadowAtom(mce,xxhash));
tl_assert(isShadowAtom(mce,yyhash));
tl_assert(isOriginalAtom(mce,xx));
@@ -1312,30 +1338,55 @@ static IRAtom* doCmpORD ( MCEnv* mce,
/* fancy interpretation */
/* if yy is zero, then it must be fully defined (zero#). */
tl_assert(isZero(yyhash));
- threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
+ // This is still inaccurate, but I don't think it matters, since
+ // nobody writes code of the form
+ // "is <partially-undefined-value> signedly greater than zero?".
+ // We therefore simply declare "x >s 0" to be undefined if any bit in
+ // x is undefined. That's clearly suboptimal in some cases. Eg, if
+ // the highest order bit is a defined 1 then x is negative so it
+ // doesn't matter whether the remaining bits are defined or not.
+ IRAtom* t_0_gt_0_0
+ = assignNew(
+ 'V', mce,ty,
+ binop(
+ opAND,
+ mkPCastTo(mce,ty, xxhash),
+ m64 ? mkU64(1<<2) : mkU32(1<<2)
+ ));
+ // For "x <s 0", we can just copy the definedness of the top bit of x
+ // and we have a precise result.
+ IRAtom* t_lt_0_0_0
+ = assignNew(
+ 'V', mce,ty,
+ binop(
+ opSHL,
+ assignNew(
+ 'V', mce,ty,
+ binop(opSHR, xxhash, mkU8(width-1))),
+ mkU8(3)
+ ));
+ // For "x == 0" we can hand the problem off to expensiveCmpEQorNE.
+ IRAtom* t_0_0_eq_0
+ = assignNew(
+ 'V', mce,ty,
+ binop(
+ opSHL,
+ assignNew('V', mce,ty,
+ unop(
+ op1UtoWS,
+ expensiveCmpEQorNE(mce, ty, xxhash, yyhash, xx, yy))
+ ),
+ mkU8(1)
+ ));
return
binop(
opOR,
- assignNew(
- 'V', mce,ty,
- binop(
- opAND,
- mkPCastTo(mce,ty, xxhash),
- threeLeft1
- )),
- assignNew(
- 'V', mce,ty,
- binop(
- opSHL,
- assignNew(
- 'V', mce,ty,
- binop(opSHR, xxhash, mkU8(width-1))),
- mkU8(3)
- ))
- );
+ assignNew('V', mce,ty, binop(opOR, t_lt_0_0_0, t_0_gt_0_0)),
+ t_0_0_eq_0
+ );
} else {
/* standard interpretation */
- sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
+ IRAtom* sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
return
binop(
opAND,
@@ -2211,14 +2262,14 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
tl_assert(sameKindedAtoms(atom,vatom));
switch (czop) {
- case Iop_Ctz32:
+ case Iop_Ctz32: case Iop_CtzNat32:
ty = Ity_I32;
xorOp = Iop_Xor32;
subOp = Iop_Sub32;
andOp = Iop_And32;
one = mkU32(1);
break;
- case Iop_Ctz64:
+ case Iop_Ctz64: case Iop_CtzNat64:
ty = Ity_I64;
xorOp = Iop_Xor64;
subOp = Iop_Sub64;
@@ -2232,8 +2283,30 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
// improver = atom ^ (atom - 1)
//
- // That is, improver has its low ctz(atom) bits equal to one;
- // higher bits (if any) equal to zero.
+ // That is, improver has its low ctz(atom)+1 bits equal to one;
+ // higher bits (if any) equal to zero. So it's exactly the right
+ // mask to use to remove the irrelevant undefined input bits.
+ /* Here are some examples:
+ atom = U...U 1 0...0
+ atom-1 = U...U 0 1...1
+ ^ed = 0...0 1 11111, which correctly describes which bits of |atom|
+ actually influence the result
+ A boundary case
+ atom = 0...0
+ atom-1 = 1...1
+ ^ed = 11111, also a correct mask for the input: all input bits
+ are relevant
+ Another boundary case
+ atom = 1..1 1
+ atom-1 = 1..1 0
+ ^ed = 0..0 1, also a correct mask: only the rightmost input bit
+ is relevant
+ Now with misc U bits interspersed:
+ atom = U...U 1 0 U...U 0 1 0...0
+ atom-1 = U...U 1 0 U...U 0 0 1...1
+ ^ed = 0...0 0 0 0...0 0 1 1...1, also correct
+ (Per re-check/analysis of 14 Nov 2018)
+ */
improver = assignNew('V', mce,ty,
binop(xorOp,
atom,
@@ -2242,8 +2315,96 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
// improved = vatom & improver
//
- // That is, treat any V bits above the first ctz(atom) bits as
- // "defined".
+ // That is, treat any V bits to the left of the rightmost ctz(atom)+1
+ // bits as "defined".
+ improved = assignNew('V', mce, ty,
+ binop(andOp, vatom, improver));
+
+ // Return pessimizing cast of improved.
+ return mkPCastTo(mce, ty, improved);
+}
+
+static
+IRAtom* expensiveCountLeadingZeroes ( MCEnv* mce, IROp czop,
+ IRAtom* atom, IRAtom* vatom )
+{
+ IRType ty;
+ IROp shrOp, notOp, andOp;
+ IRAtom* (*mkRight)(MCEnv*, IRAtom*);
+ IRAtom *improver, *improved;
+ tl_assert(isShadowAtom(mce,vatom));
+ tl_assert(isOriginalAtom(mce,atom));
+ tl_assert(sameKindedAtoms(atom,vatom));
+
+ switch (czop) {
+ case Iop_Clz32: case Iop_ClzNat32:
+ ty = Ity_I32;
+ shrOp = Iop_Shr32;
+ notOp = Iop_Not32;
+ andOp = Iop_And32;
+ mkRight = mkRight32;
+ break;
+ case Iop_Clz64: case Iop_ClzNat64:
+ ty = Ity_I64;
+ shrOp = Iop_Shr64;
+ notOp = Iop_Not64;
+ andOp = Iop_And64;
+ mkRight = mkRight64;
+ break;
+ default:
+ ppIROp(czop);
+ VG_(tool_panic)("memcheck:expensiveCountLeadingZeroes");
+ }
+
+ // This is in principle very similar to how expensiveCountTrailingZeroes
+ // works. That function computed an "improver", which it used to mask
+ // off all but the rightmost 1-bit and the zeroes to the right of it,
+ // hence removing irrelevant bits from the input. Here, we play the
+ // exact same game but with the left-vs-right roles interchanged.
+ // Unfortunately calculation of the improver in this case is
+ // significantly more expensive.
+ //
+ // improver = ~(RIGHT(atom) >>u 1)
+ //
+ // That is, improver has its upper clz(atom)+1 bits equal to one;
+ // lower bits (if any) equal to zero. So it's exactly the right
+ // mask to use to remove the irrelevant undefined input bits.
+ /* Here are some examples:
+ atom = 0...0 1 U...U
+ R(atom) = 0...0 1 1...1
+ R(atom) >>u 1 = 0...0 0 1...1
+ ~(R(atom) >>u 1) = 1...1 1 0...0
+ which correctly describes which bits of |atom|
+ actually influence the result
+ A boundary case
+ atom = 0...0
+ R(atom) = 0...0
+ R(atom) >>u 1 = 0...0
+ ~(R(atom) >>u 1) = 1...1
+ also a correct mask for the input: all input bits
+ are relevant
+ Another boundary case
+ atom = 1 1..1
+ R(atom) = 1 1..1
+ R(atom) >>u 1 = 0 1..1
+ ~(R(atom) >>u 1) = 1 0..0
+ also a correct mask: only the leftmost input bit
+ is relevant
+ Now with misc U bits interspersed:
+ atom = 0...0 1 U...U 0 1 U...U
+ R(atom) = 0...0 1 1...1 1 1 1...1
+ R(atom) >>u 1 = 0...0 0 1...1 1 1 1...1
+ ~(R(atom) >>u 1) = 1...1 1 0...0 0 0 0...0, also correct
+ (Per initial implementation of 15 Nov 2018)
+ */
+ improver = mkRight(mce, atom);
+ improver = assignNew('V', mce, ty, binop(shrOp, improver, mkU8(1)));
+ improver = assignNew('V', mce, ty, unop(notOp, improver));
+
+ // improved = vatom & improver
+ //
+ // That is, treat any V bits to the right of the leftmost clz(atom)+1
+ // bits as "defined".
improved = assignNew('V', mce, ty,
binop(andOp, vatom, improver));
@@ -4705,6 +4866,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
case Iop_RecipEst32F0x4:
return unary32F0x4(mce, vatom);
+ // These are self-shadowing.
case Iop_32UtoV128:
case Iop_64UtoV128:
case Iop_Dup8x16:
@@ -4745,6 +4907,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
case Iop_MulI128by10Carry:
case Iop_F16toF64x2:
case Iop_F64toF16x2:
+ // FIXME JRS 2018-Nov-15. This is surely not correct!
return vatom;
case Iop_I32StoF128: /* signed I32 -> F128 */
@@ -4770,7 +4933,6 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
case Iop_RoundF64toF64_NegINF:
case Iop_RoundF64toF64_PosINF:
case Iop_RoundF64toF64_ZERO:
- case Iop_Clz64:
case Iop_D32toD64:
case Iop_I32StoD64:
case Iop_I32UtoD64:
@@ -4785,17 +4947,32 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
case Iop_D64toD128:
return mkPCastTo(mce, Ity_I128, vatom);
- case Iop_Clz32:
case Iop_TruncF64asF32:
case Iop_NegF32:
case Iop_AbsF32:
case Iop_F16toF32:
return mkPCastTo(mce, Ity_I32, vatom);
- case Iop_Ctz32:
- case Iop_Ctz64:
+ case Iop_Ctz32: case Iop_CtzNat32:
+ case Iop_Ctz64: case Iop_CtzNat64:
return expensiveCountTrailingZeroes(mce, op, atom, vatom);
+ case Iop_Clz32: case Iop_ClzNat32:
+ case Iop_Clz64: case Iop_ClzNat64:
+ return expensiveCountLeadingZeroes(mce, op, atom, vatom);
+
+ // PopCount32: this is slightly pessimistic. It is true that the
+ // result depends on all input bits, so that aspect of the PCast is
+ // correct. However, regardless of the input, only the lowest 5 bits
+ // out of the output can ever be undefined. So we could actually
+ // "improve" the results here by marking the top 27 bits of output as
+ // defined. A similar comment applies for PopCount64.
+ case Iop_PopCount32:
+ return mkPCastTo(mce, Ity_I32, vatom);
+ case Iop_PopCount64:
+ return mkPCastTo(mce, Ity_I64, vatom);
+
+ // These are self-shadowing.
case Iop_1Uto64:
case Iop_1Sto64:
case Iop_8Uto64:
@@ -4821,6 +4998,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
case Iop_V256to64_2: case Iop_V256to64_3:
return assignNew('V', mce, Ity_I64, unop(op, vatom));
+ // These are self-shadowing.
case Iop_64to32:
case Iop_64HIto32:
case Iop_1Uto32:
@@ -4830,8 +5008,10 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
case Iop_16Sto32:
case Iop_8Sto32:
case Iop_V128to32:
+ case Iop_Reverse8sIn32_x1:
return assignNew('V', mce, Ity_I32, unop(op, vatom));
+ // These are self-shadowing.
case Iop_8Sto16:
case Iop_8Uto16:
case Iop_32to16:
@@ -4840,6 +5020,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
case Iop_GetMSBs8x16:
return assignNew('V', mce, Ity_I16, unop(op, vatom));
+ // These are self-shadowing.
case Iop_1Uto8:
case Iop_1Sto8:
case Iop_16to8:
@@ -4868,6 +5049,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
case Iop_Not16:
case Iop_Not8:
case Iop_Not1:
+ // FIXME JRS 2018-Nov-15. This is surely not correct!
return vatom;
case Iop_CmpNEZ8x8:
@@ -4929,6 +5111,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
case Iop_Ctz64x2:
return mkPCast64x2(mce, vatom);
+ // This is self-shadowing.
case Iop_PwBitMtxXpose64x2:
return assignNew('V', mce, Ity_V128, unop(op, vatom));
diff --git a/memcheck/tests/vbit-test/irops.c b/memcheck/tests/vbit-test/irops.c
index bfd82fc..e8bf67d 100644
--- a/memcheck/tests/vbit-test/irops.c
+++ b/memcheck/tests/vbit-test/irops.c
@@ -111,6 +111,12 @@ static irop_t irops[] = {
{ DEFOP(Iop_Clz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
{ DEFOP(Iop_Ctz64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
{ DEFOP(Iop_Ctz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+ { DEFOP(Iop_ClzNat64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, // ppc32 asserts
+ { DEFOP(Iop_ClzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
+ { DEFOP(Iop_CtzNat64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+ { DEFOP(Iop_CtzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
+ { DEFOP(Iop_PopCount64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+ { DEFOP(Iop_PopCount32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
{ DEFOP(Iop_CmpLT32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
{ DEFOP(Iop_CmpLT64S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc, mips assert
{ DEFOP(Iop_CmpLE32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
@@ -336,6 +342,7 @@ static irop_t irops[] = {
{ DEFOP(Iop_Sad8Ux4, UNDEF_UNKNOWN), },
{ DEFOP(Iop_CmpNEZ16x2, UNDEF_UNKNOWN), },
{ DEFOP(Iop_CmpNEZ8x4, UNDEF_UNKNOWN), },
+ { DEFOP(Iop_Reverse8sIn32_x1, UNDEF_UNKNOWN) },
/* ------------------ 64-bit SIMD FP ------------------------ */
{ DEFOP(Iop_I32UtoFx2, UNDEF_UNKNOWN), },
{ DEFOP(Iop_I32StoFx2, UNDEF_UNKNOWN), },

View File

@ -0,0 +1,124 @@
commit 4271989815b5fc933c1e29bc75507c2726dc3738
Author: Julian Seward <jseward@acm.org>
Date: Tue Nov 20 10:52:33 2018 +0100
Add some new IROps to support improved Memcheck analysis of strlen etc.
This is part of the fix for bug 386945. It adds the following IROps, plus
their supporting type- and printing- fragments:
Iop_Reverse8sIn32_x1: 32-bit byteswap. A fancy name, but it is consistent
with naming for the other swapping IROps that already exist.
Iop_PopCount64, Iop_PopCount32: population count
Iop_ClzNat64, Iop_ClzNat32, Iop_CtzNat64, Iop_CtzNat32: counting leading and
trailing zeroes, with "natural" (Nat) semantics for a zero input, meaning, in
the case of zero input, return the number of bits in the word. These
functionally overlap with the existing Iop_Clz64, Iop_Clz32, Iop_Ctz64,
Iop_Ctz32. The existing operations are undefined in case of a zero input.
Adding these new variants avoids the complexity of having to change the
declared semantics of the existing operations. Instead they are deprecated
but still available for use.
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
index 823b6be..3221033 100644
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -194,6 +194,14 @@ void ppIROp ( IROp op )
case Iop_Ctz64: vex_printf("Ctz64"); return;
case Iop_Ctz32: vex_printf("Ctz32"); return;
+ case Iop_ClzNat64: vex_printf("ClzNat64"); return;
+ case Iop_ClzNat32: vex_printf("ClzNat32"); return;
+ case Iop_CtzNat64: vex_printf("CtzNat64"); return;
+ case Iop_CtzNat32: vex_printf("CtzNat32"); return;
+
+ case Iop_PopCount64: vex_printf("PopCount64"); return;
+ case Iop_PopCount32: vex_printf("PopCount32"); return;
+
case Iop_CmpLT32S: vex_printf("CmpLT32S"); return;
case Iop_CmpLE32S: vex_printf("CmpLE32S"); return;
case Iop_CmpLT32U: vex_printf("CmpLT32U"); return;
@@ -395,6 +403,7 @@ void ppIROp ( IROp op )
case Iop_CmpNEZ16x2: vex_printf("CmpNEZ16x2"); return;
case Iop_CmpNEZ8x4: vex_printf("CmpNEZ8x4"); return;
+ case Iop_Reverse8sIn32_x1: vex_printf("Reverse8sIn32_x1"); return;
case Iop_CmpF64: vex_printf("CmpF64"); return;
@@ -2719,6 +2728,7 @@ void typeOfPrimop ( IROp op,
UNARY(Ity_I16, Ity_I16);
case Iop_Not32:
case Iop_CmpNEZ16x2: case Iop_CmpNEZ8x4:
+ case Iop_Reverse8sIn32_x1:
UNARY(Ity_I32, Ity_I32);
case Iop_Not64:
@@ -2782,9 +2792,13 @@ void typeOfPrimop ( IROp op,
BINARY(Ity_I64,Ity_I64, Ity_I128);
case Iop_Clz32: case Iop_Ctz32:
+ case Iop_ClzNat32: case Iop_CtzNat32:
+ case Iop_PopCount32:
UNARY(Ity_I32, Ity_I32);
case Iop_Clz64: case Iop_Ctz64:
+ case Iop_ClzNat64: case Iop_CtzNat64:
+ case Iop_PopCount64:
UNARY(Ity_I64, Ity_I64);
case Iop_DivU32: case Iop_DivS32: case Iop_DivU32E: case Iop_DivS32E:
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
index 17bcb55..93fa5ac 100644
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -452,12 +452,21 @@ typedef
Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
- /* Wierdo integer stuff */
+ /* Counting bits */
+ /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of zero.
+ You must ensure they are never given a zero argument. As of
+ 2018-Nov-14 they are deprecated. Try to use the Nat variants
+ immediately below, if you can.
+ */
Iop_Clz64, Iop_Clz32, /* count leading zeroes */
Iop_Ctz64, Iop_Ctz32, /* count trailing zeros */
- /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
- zero. You must ensure they are never given a zero argument.
- */
+ /* Count leading/trailing zeroes, with "natural" semantics for the
+ case where the input is zero: then the result is the number of bits
+ in the word. */
+ Iop_ClzNat64, Iop_ClzNat32,
+ Iop_CtzNat64, Iop_CtzNat32,
+ /* Population count -- compute the number of 1 bits in the argument. */
+ Iop_PopCount64, Iop_PopCount32,
/* Standard integer comparisons */
Iop_CmpLT32S, Iop_CmpLT64S,
@@ -831,6 +840,9 @@ typedef
/* MISC (vector integer cmp != 0) */
Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
+ /* Byte swap in a 32-bit word */
+ Iop_Reverse8sIn32_x1,
+
/* ------------------ 64-bit SIMD FP ------------------------ */
/* Convertion to/from int */
@@ -1034,8 +1046,9 @@ typedef
Iop_Slice64, // (I64, I64, I8) -> I64
/* REVERSE the order of chunks in vector lanes. Chunks must be
- smaller than the vector lanes (obviously) and so may be 8-,
- 16- and 32-bit in size. */
+ smaller than the vector lanes (obviously) and so may be 8-, 16- and
+ 32-bit in size. Note that the degenerate case,
+ Iop_Reverse8sIn64_x1, is a simply a vanilla byte-swap. */
/* Examples:
Reverse8sIn16_x4([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
Reverse8sIn32_x2([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]

View File

@ -0,0 +1,256 @@
From 2c1f016e634bf79faf45e81c14c955c711bc202f Mon Sep 17 00:00:00 2001
From: Mark Wielaard <mark@klomp.org>
Date: Mon, 31 Dec 2018 22:26:31 +0100
Subject: [PATCH] Bug 402519 - POWER 3.0 addex instruction incorrectly
implemented
addex uses OV as carry in and carry out. For all other instructions
OV is the signed overflow flag. And instructions like adde use CA
as carry.
Replace set_XER_OV_OV32 with set_XER_OV_OV32_ADDEX, which will
call calculate_XER_CA_64 and calculate_XER_CA_32, but with OV
as input, and sets OV and OV32.
Enable test_addex in none/tests/ppc64/test_isa_3_0.c and update
the expected output. test_addex would fail to match the expected
output before this patch.
---
NEWS | 1 +
VEX/priv/guest_ppc_toIR.c | 52 ++++++++++++++---------
none/tests/ppc64/test_isa_3_0.c | 3 +-
none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE | 36 ++++++++++------
4 files changed, 58 insertions(+), 34 deletions(-)
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
index 18df822..d685383 100644
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -2645,21 +2645,6 @@ static void copy_OV_to_OV32( void ) {
putXER_OV32( getXER_OV() );
}
-static void set_XER_OV_OV32 ( IRType ty, UInt op, IRExpr* res,
- IRExpr* argL, IRExpr* argR )
-{
- if (ty == Ity_I32) {
- set_XER_OV_OV32_32( op, res, argL, argR );
- } else {
- IRExpr* xer_ov_32;
- set_XER_OV_64( op, res, argL, argR );
- xer_ov_32 = calculate_XER_OV_32( op, unop(Iop_64to32, res),
- unop(Iop_64to32, argL),
- unop(Iop_64to32, argR));
- putXER_OV32( unop(Iop_32to8, xer_ov_32) );
- }
-}
-
static void set_XER_OV_OV32_SO ( IRType ty, UInt op, IRExpr* res,
IRExpr* argL, IRExpr* argR )
{
@@ -3005,6 +2990,33 @@ static void set_XER_CA_CA32 ( IRType ty, UInt op, IRExpr* res,
}
}
+/* Used only by addex instruction, which uses and sets OV as carry. */
+static void set_XER_OV_OV32_ADDEX ( IRType ty, IRExpr* res,
+ IRExpr* argL, IRExpr* argR,
+ IRExpr* old_ov )
+{
+ if (ty == Ity_I32) {
+ IRTemp xer_ov = newTemp(Ity_I32);
+ assign ( xer_ov, unop(Iop_32to8,
+ calculate_XER_CA_32( PPCG_FLAG_OP_ADDE,
+ res, argL, argR, old_ov ) ) );
+ putXER_OV( mkexpr (xer_ov) );
+ putXER_OV32( mkexpr (xer_ov) );
+ } else {
+ IRExpr *xer_ov;
+ IRExpr* xer_ov_32;
+ xer_ov = calculate_XER_CA_64( PPCG_FLAG_OP_ADDE,
+ res, argL, argR, old_ov );
+ putXER_OV( unop(Iop_32to8, xer_ov) );
+ xer_ov_32 = calculate_XER_CA_32( PPCG_FLAG_OP_ADDE,
+ unop(Iop_64to32, res),
+ unop(Iop_64to32, argL),
+ unop(Iop_64to32, argR),
+ unop(Iop_64to32, old_ov) );
+ putXER_OV32( unop(Iop_32to8, xer_ov_32) );
+ }
+}
+
/*------------------------------------------------------------*/
@@ -5094,16 +5106,18 @@ static Bool dis_int_arith ( UInt theInstr )
}
case 0xAA: {// addex (Add Extended alternate carry bit Z23-form)
+ IRTemp old_xer_ov = newTemp(ty);
DIP("addex r%u,r%u,r%u,%d\n", rD_addr, rA_addr, rB_addr, (Int)flag_OE);
+ assign( old_xer_ov, mkWidenFrom32(ty, getXER_OV_32(), False) );
assign( rD, binop( mkSzOp(ty, Iop_Add8), mkexpr(rA),
binop( mkSzOp(ty, Iop_Add8), mkexpr(rB),
- mkWidenFrom8( ty, getXER_OV(), False ) ) ) );
+ mkexpr(old_xer_ov) ) ) );
/* CY bit is same as OE bit */
if (flag_OE == 0) {
- /* Exception, do not set SO bit */
- set_XER_OV_OV32( ty, PPCG_FLAG_OP_ADDE,
- mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+ /* Exception, do not set SO bit and set OV from carry. */
+ set_XER_OV_OV32_ADDEX( ty, mkexpr(rD), mkexpr(rA), mkexpr(rB),
+ mkexpr(old_xer_ov) );
} else {
/* CY=1, 2 and 3 (AKA flag_OE) are reserved */
vex_printf("addex instruction, CY = %d is reserved.\n", flag_OE);
diff --git a/none/tests/ppc64/test_isa_3_0.c b/none/tests/ppc64/test_isa_3_0.c
index 2d13505..1c2cda3 100644
--- a/none/tests/ppc64/test_isa_3_0.c
+++ b/none/tests/ppc64/test_isa_3_0.c
@@ -286,7 +286,7 @@ static test_list_t testgroup_ia_ops_two[] = {
{ &test_moduw, "moduw" },
{ &test_modsd, "modsd" },
{ &test_modud, "modud" },
- //{ &test_addex, "addex" },
+ { &test_addex, "addex" },
{ NULL , NULL },
};
@@ -2741,7 +2741,6 @@ static void testfunction_gpr_vector_logical_one (const char* instruction_name,
* rt, xa
*/
int i;
- int t;
volatile HWord_t res;
VERBOSE_FUNCTION_CALLOUT
diff --git a/none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE b/none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE
index 152ff28..cc0e88e 100644
--- a/none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE
+++ b/none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE
@@ -40,7 +40,17 @@ modud ffffffffffffffff, 0000000000000000 => 0000000000000000 (00000000)
modud ffffffffffffffff, 0000001cbe991def => 000000043eb0c0b2 (00000000)
modud ffffffffffffffff, ffffffffffffffff => 0000000000000000 (00000000)
-All done. Tested 4 different instructions
+addex 0000000000000000, 0000000000000000 => 0000000000000000 (00000000)
+addex 0000000000000000, 0000001cbe991def => 0000001cbe991def (00000000)
+addex 0000000000000000, ffffffffffffffff => ffffffffffffffff (00000000)
+addex 0000001cbe991def, 0000000000000000 => 0000001cbe991def (00000000)
+addex 0000001cbe991def, 0000001cbe991def => 000000397d323bde (00000000) OV32
+addex 0000001cbe991def, ffffffffffffffff => 0000001cbe991dee (00000000) OV OV32
+addex ffffffffffffffff, 0000000000000000 => 0000000000000000 (00000000) OV OV32
+addex ffffffffffffffff, 0000001cbe991def => 0000001cbe991def (00000000) OV OV32
+addex ffffffffffffffff, ffffffffffffffff => ffffffffffffffff (00000000) OV OV32
+
+All done. Tested 5 different instructions
ppc one argument plus shift:
Test instruction group [ppc one argument plus shift]
extswsli aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffffffffffffffff => aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffffffffffffffff
@@ -85,7 +95,7 @@ extswsli. aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffaa5599113377cc => aaaaaaaaaaaaaa
extswsli. 5152535455565758 5152535455565758 0 ffaa5599113377cc => 5152535455565758 5152535455565758 0 ffaa5599113377cc
extswsli. 0000000000000000 0000000000000000 0 ffaa5599113377cc => 0000000000000000 0000000000000000 0 ffaa5599113377cc
-All done. Tested 6 different instructions
+All done. Tested 7 different instructions
ppc three parameter ops:
Test instruction group [ppc three parameter ops]
maddhd 0000000000000000, 0000000000000000, 0000000000000000 => 0000000000000000 (00000000)
@@ -172,7 +182,7 @@ maddld ffffffffffffffff, ffffffffffffffff, 0000000000000000 => 000000000000000
maddld ffffffffffffffff, ffffffffffffffff, 0000001cbe991def => 0000001cbe991df0 (00000000)
maddld ffffffffffffffff, ffffffffffffffff, ffffffffffffffff => 0000000000000000 (00000000)
-All done. Tested 9 different instructions
+All done. Tested 10 different instructions
ppc count zeros:
Test instruction group [ppc count zeros]
cnttzw 0000000000000000 => 0000000000000020
@@ -197,7 +207,7 @@ cnttzd. 0000001cbe991def => 0000000000000000 Expected cr0 to be zero, it is (200
cnttzd. ffffffffffffffff => 0000000000000000 Expected cr0 to be zero, it is (20000000)
-All done. Tested 13 different instructions
+All done. Tested 14 different instructions
ppc set boolean:
Test instruction group [ppc set boolean]
setb cr_field:0 cr_value::00000000 => 0000000000000000
@@ -265,7 +275,7 @@ setb cr_field:7 cr_value::00000005 => 0000000000000001
setb cr_field:7 cr_value::00000006 => 0000000000000001
setb cr_field:7 cr_value::00000007 => 0000000000000001
-All done. Tested 14 different instructions
+All done. Tested 15 different instructions
ppc char compare:
Test instruction group [ppc char compare]
cmprb l=0 0x61 (a) (cmpeq:0x5b427b625a417a61) (cmprb:src22(a-z) src21(A-Z)) => in range/found
@@ -1711,7 +1721,7 @@ cmpeqb 0x5d (]) (cmpeq:0x4642666245416561) (cmprb:src22(a-e) src21(A-E)) =>
cmpeqb 0x60 (`) (cmpeq:0x4642666245416561) (cmprb:src22(a-e) src21(A-E)) =>
cmpeqb 0x5f (_) (cmpeq:0x4642666245416561) (cmprb:src22(a-e) src21(A-E)) =>
-All done. Tested 17 different instructions
+All done. Tested 18 different instructions
ppc vector scalar move to/from:
Test instruction group [ppc vector scalar move to/from]
mfvsrld aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffffffffffffffff => aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa ffffffffffffffff
@@ -1777,7 +1787,7 @@ mtvsrws aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffaa5599113377cc => 113377cc113377cc
mtvsrws 5152535455565758 5152535455565758 0 ffaa5599113377cc => 113377cc113377cc 113377cc113377cc 0 ffaa5599113377cc
mtvsrws 0000000000000000 0000000000000000 0 ffaa5599113377cc => 113377cc113377cc 113377cc113377cc 0 ffaa5599113377cc
-All done. Tested 20 different instructions
+All done. Tested 21 different instructions
ppc dfp significance:
Test instruction group [ppc dfp significance]
dtstsfi significance(0x00) +Finite 0 * 10 ^ -12 (GT) (4)
@@ -1862,7 +1872,7 @@ dtstsfiq significance(0x20) -inf (GT) (4)
dtstsfiq significance(0x30) -inf (GT) (4)
dtstsfiq significance(0x3f) -inf (GT) (4)
-All done. Tested 22 different instructions
+All done. Tested 23 different instructions
ppc bcd misc:
Test instruction group [ppc bcd misc]
bcdadd. p0 xa:0000000000000000 000000000000000c (+|0) xb:0000000000000000 000000000000000c (+|0) => (EQ) (2) xt:0000000000000000 000000000000000c(+|0)
@@ -33338,12 +33348,12 @@ bcdcfsq. p1 xa:0000000000000000 000000000000000c (+|0) xb:9999999999999999 99999
bcdcfsq. p1 xa:0000000000000000 000000000000000c (+|0) xb:0000000000000000 000000001234567d ( - ) => (GT) (4) xt:0000000000000000 000000305419901f(+|0)
-All done. Tested 51 different instructions
+All done. Tested 52 different instructions
ppc noop misc:
Test instruction group [ppc noop misc]
wait =>
-All done. Tested 52 different instructions
+All done. Tested 53 different instructions
ppc addpc_misc:
Test instruction group [ppc addpc_misc]
addpcis 0000000000000000 => 0000000000000000
@@ -33380,7 +33390,7 @@ subpcis 000000000000000d => 0000000000000000
subpcis 000000000000000e => 0000000000000000
subpcis 000000000000000f => 0000000000000000
-All done. Tested 54 different instructions
+All done. Tested 55 different instructions
ppc mffpscr:
Test instruction group [ppc mffpscr]
mffsce => 000000000.000000
@@ -33395,7 +33405,7 @@ mffs => 000000000.000000
fpscr: f14
local_fpscr:
-All done. Tested 57 different instructions
+All done. Tested 58 different instructions
ppc mffpscr:
Test instruction group [ppc mffpscr]
mffscdrni 0 => 0X0
@@ -33426,4 +33436,4 @@ mffscrn f15 0X1 => 0X200000000
mffscrn f15 0X2 => 0X200000000
fpscr: f14 local_fpscr: 30-DRN1 RN-bit62
-All done. Tested 61 different instructions
+All done. Tested 62 different instructions
--
1.8.3.1

View File

@ -0,0 +1,381 @@
commit 81d9832226d6e3d1ee78ee3133189d7b520e7eea
Author: Julian Seward <jseward@acm.org>
Date: Tue Nov 20 11:36:53 2018 +0100
ppc front end: use new IROps added in 42719898.
This pertains to bug 386945.
VEX/priv/guest_ppc_toIR.c:
gen_POPCOUNT: use Iop_PopCount{32,64} where possible.
gen_vpopcntd_mode32: use Iop_PopCount32.
for cntlz{w,d}, use Iop_CtzNat{32,64}.
gen_byterev32: use Iop_Reverse8sIn32_x1 instead of lengthy sequence.
verbose_Clz32: remove (was unused anyway).
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
index cb1cae1..8977d4f 100644
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -1595,7 +1595,8 @@ typedef enum {
/* Generate an IR sequence to do a popcount operation on the supplied
IRTemp, and return a new IRTemp holding the result. 'ty' may be
Ity_I32 or Ity_I64 only. */
-static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_type )
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src,
+ _popcount_data_type data_type )
{
/* Do count across 2^data_type bits,
byte: data_type = 3
@@ -1611,6 +1612,22 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
vassert(ty == Ity_I64 || ty == Ity_I32);
+ // Use a single IROp in cases where we can.
+
+ if (ty == Ity_I64 && data_type == DWORD) {
+ IRTemp res = newTemp(Ity_I64);
+ assign(res, unop(Iop_PopCount64, mkexpr(src)));
+ return res;
+ }
+
+ if (ty == Ity_I32 && data_type == WORD) {
+ IRTemp res = newTemp(Ity_I32);
+ assign(res, unop(Iop_PopCount32, mkexpr(src)));
+ return res;
+ }
+
+ // For the rest, we have to do it the slow way.
+
if (ty == Ity_I32) {
for (idx = 0; idx < WORD; idx++) {
@@ -1638,7 +1655,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
return nyu;
}
-// else, ty == Ity_I64
+ // else, ty == Ity_I64
vassert(mode64);
for (i = 0; i < DWORD; i++) {
@@ -1670,52 +1687,15 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
*/
static IRTemp gen_vpopcntd_mode32 ( IRTemp src1, IRTemp src2 )
{
- Int i, shift[6];
- IRTemp mask[6];
- IRTemp old = IRTemp_INVALID;
- IRTemp nyu1 = IRTemp_INVALID;
- IRTemp nyu2 = IRTemp_INVALID;
IRTemp retval = newTemp(Ity_I64);
vassert(!mode64);
- for (i = 0; i < WORD; i++) {
- mask[i] = newTemp(Ity_I32);
- shift[i] = 1 << i;
- }
- assign(mask[0], mkU32(0x55555555));
- assign(mask[1], mkU32(0x33333333));
- assign(mask[2], mkU32(0x0F0F0F0F));
- assign(mask[3], mkU32(0x00FF00FF));
- assign(mask[4], mkU32(0x0000FFFF));
- old = src1;
- for (i = 0; i < WORD; i++) {
- nyu1 = newTemp(Ity_I32);
- assign(nyu1,
- binop(Iop_Add32,
- binop(Iop_And32,
- mkexpr(old),
- mkexpr(mask[i])),
- binop(Iop_And32,
- binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
- mkexpr(mask[i]))));
- old = nyu1;
- }
-
- old = src2;
- for (i = 0; i < WORD; i++) {
- nyu2 = newTemp(Ity_I32);
- assign(nyu2,
- binop(Iop_Add32,
- binop(Iop_And32,
- mkexpr(old),
- mkexpr(mask[i])),
- binop(Iop_And32,
- binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
- mkexpr(mask[i]))));
- old = nyu2;
- }
- assign(retval, unop(Iop_32Uto64, binop(Iop_Add32, mkexpr(nyu1), mkexpr(nyu2))));
+ assign(retval,
+ unop(Iop_32Uto64,
+ binop(Iop_Add32,
+ unop(Iop_PopCount32, mkexpr(src1)),
+ unop(Iop_PopCount32, mkexpr(src2)))));
return retval;
}
@@ -5715,7 +5695,7 @@ static Bool dis_modulo_int ( UInt theInstr )
rA_address, rS_address);
assign( rS, getIReg( rS_address ) );
- assign( result, unop( Iop_Ctz32,
+ assign( result, unop( Iop_CtzNat32,
unop( Iop_64to32, mkexpr( rS ) ) ) );
assign( rA, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( result ) ) );
@@ -5746,7 +5726,7 @@ static Bool dis_modulo_int ( UInt theInstr )
rA_address, rS_address);
assign( rS, getIReg( rS_address ) );
- assign( rA, unop( Iop_Ctz64, mkexpr( rS ) ) );
+ assign( rA, unop( Iop_CtzNat64, mkexpr( rS ) ) );
if ( flag_rC == 1 )
set_CR0( mkexpr( rA ) );
@@ -6307,7 +6287,6 @@ static Bool dis_int_logic ( UInt theInstr )
IRTemp rS = newTemp(ty);
IRTemp rA = newTemp(ty);
IRTemp rB = newTemp(ty);
- IRExpr* irx;
Bool do_rc = False;
assign( rS, getIReg(rS_addr) );
@@ -6404,26 +6383,16 @@ static Bool dis_int_logic ( UInt theInstr )
break;
case 0x01A: { // cntlzw (Count Leading Zeros Word, PPC32 p371)
- IRExpr* lo32;
if (rB_addr!=0) {
vex_printf("dis_int_logic(ppc)(cntlzw,rB_addr)\n");
return False;
}
- DIP("cntlzw%s r%u,r%u\n",
- flag_rC ? ".":"", rA_addr, rS_addr);
+ DIP("cntlzw%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
// mode64: count in low word only
- lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
-
- // Iop_Clz32 undefined for arg==0, so deal with that case:
- irx = binop(Iop_CmpNE32, lo32, mkU32(0));
- assign(rA, mkWidenFrom32(ty,
- IRExpr_ITE( irx,
- unop(Iop_Clz32, lo32),
- mkU32(32)),
- False));
-
- // TODO: alternatively: assign(rA, verbose_Clz32(rS));
+ IRExpr* lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
+ IRExpr* res32 = unop(Iop_ClzNat32, lo32);
+ assign(rA, mode64 ? unop(Iop_32Uto64, res32) : res32);
break;
}
@@ -6521,14 +6490,8 @@ static Bool dis_int_logic ( UInt theInstr )
vex_printf("dis_int_logic(ppc)(cntlzd,rB_addr)\n");
return False;
}
- DIP("cntlzd%s r%u,r%u\n",
- flag_rC ? ".":"", rA_addr, rS_addr);
- // Iop_Clz64 undefined for arg==0, so deal with that case:
- irx = binop(Iop_CmpNE64, mkexpr(rS), mkU64(0));
- assign(rA, IRExpr_ITE( irx,
- unop(Iop_Clz64, mkexpr(rS)),
- mkU64(64) ));
- // TODO: alternatively: assign(rA, verbose_Clz64(rS));
+ DIP("cntlzd%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
+ assign(rA, unop(Iop_ClzNat64, mkexpr(rS)));
break;
case 0x1FC: // cmpb (Power6: compare bytes)
@@ -6574,8 +6537,9 @@ static Bool dis_int_logic ( UInt theInstr )
putFReg( rS_addr, mkexpr(frA));
return True;
}
- case 0x1FA: // popcntd (population count doubleword
+ case 0x1FA: // popcntd (population count doubleword)
{
+ vassert(mode64);
DIP("popcntd r%u,r%u\n", rA_addr, rS_addr);
IRTemp result = gen_POPCOUNT(ty, rS, DWORD);
putIReg( rA_addr, mkexpr(result) );
@@ -9154,18 +9118,7 @@ static Bool dis_int_shift ( UInt theInstr )
static IRExpr* /* :: Ity_I32 */ gen_byterev32 ( IRTemp t )
{
vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32);
- return
- binop(Iop_Or32,
- binop(Iop_Shl32, mkexpr(t), mkU8(24)),
- binop(Iop_Or32,
- binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)),
- mkU32(0x00FF0000)),
- binop(Iop_Or32,
- binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)),
- mkU32(0x0000FF00)),
- binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(24)),
- mkU32(0x000000FF) )
- )));
+ return unop(Iop_Reverse8sIn32_x1, mkexpr(t));
}
/* Generates code to swap the byte order in the lower half of an Ity_I32,
@@ -9225,6 +9178,10 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
{
+ // JRS FIXME:
+ // * is the host_endness conditional below actually necessary?
+ // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
+ // That would be a lot more efficient.
IRExpr * nextAddr;
IRTemp w3 = newTemp( Ity_I32 );
IRTemp w4 = newTemp( Ity_I32 );
@@ -17056,8 +17013,8 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
case 0x7C3: // vpopcntd
{
if (mode64) {
- /* Break vector into 64-bit double words and do the population count
- * on each double word.
+ /* Break vector into 64-bit double words and do the population
+ count on each double word.
*/
IRType ty = Ity_I64;
IRTemp bits0_63 = newTemp(Ity_I64);
@@ -17077,15 +17034,16 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
mkexpr( cnt_bits0_63 ) ) );
} else {
/* Break vector into 32-bit words and do the population count
- * on each doubleword.
+ on each 32-bit word.
*/
IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
- IRTemp cnt_bits0_63 = newTemp(Ity_I64);
+ IRTemp cnt_bits0_63 = newTemp(Ity_I64);
IRTemp cnt_bits64_127 = newTemp(Ity_I64);
DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr);
- breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
+ breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95,
+ &bits32_63, &bits0_31 );
cnt_bits0_63 = gen_vpopcntd_mode32(bits0_31, bits32_63);
cnt_bits64_127 = gen_vpopcntd_mode32(bits64_95, bits96_127);
@@ -29103,10 +29061,12 @@ DisResult disInstr_PPC_WRK (
/* Miscellaneous ISA 2.06 instructions */
case 0x1FA: // popcntd
+ if (!mode64) goto decode_failure;
+ /* else fallthru */
case 0x17A: // popcntw
case 0x7A: // popcntb
- if (dis_int_logic( theInstr )) goto decode_success;
- goto decode_failure;
+ if (dis_int_logic( theInstr )) goto decode_success;
+ goto decode_failure;
case 0x0FC: // bpermd
if (!mode64) goto decode_failure;
@@ -29669,94 +29629,6 @@ DisResult disInstr_PPC ( IRSB* irsb_IN,
return dres;
}
-
-/*------------------------------------------------------------*/
-/*--- Unused stuff ---*/
-/*------------------------------------------------------------*/
-
-///* A potentially more memcheck-friendly implementation of Clz32, with
-// the boundary case Clz32(0) = 32, which is what ppc requires. */
-//
-//static IRExpr* /* :: Ity_I32 */ verbose_Clz32 ( IRTemp arg )
-//{
-// /* Welcome ... to SSA R Us. */
-// IRTemp n1 = newTemp(Ity_I32);
-// IRTemp n2 = newTemp(Ity_I32);
-// IRTemp n3 = newTemp(Ity_I32);
-// IRTemp n4 = newTemp(Ity_I32);
-// IRTemp n5 = newTemp(Ity_I32);
-// IRTemp n6 = newTemp(Ity_I32);
-// IRTemp n7 = newTemp(Ity_I32);
-// IRTemp n8 = newTemp(Ity_I32);
-// IRTemp n9 = newTemp(Ity_I32);
-// IRTemp n10 = newTemp(Ity_I32);
-// IRTemp n11 = newTemp(Ity_I32);
-// IRTemp n12 = newTemp(Ity_I32);
-//
-// /* First, propagate the most significant 1-bit into all lower
-// positions in the word. */
-// /* unsigned int clz ( unsigned int n )
-// {
-// n |= (n >> 1);
-// n |= (n >> 2);
-// n |= (n >> 4);
-// n |= (n >> 8);
-// n |= (n >> 16);
-// return bitcount(~n);
-// }
-// */
-// assign(n1, mkexpr(arg));
-// assign(n2, binop(Iop_Or32, mkexpr(n1), binop(Iop_Shr32, mkexpr(n1), mkU8(1))));
-// assign(n3, binop(Iop_Or32, mkexpr(n2), binop(Iop_Shr32, mkexpr(n2), mkU8(2))));
-// assign(n4, binop(Iop_Or32, mkexpr(n3), binop(Iop_Shr32, mkexpr(n3), mkU8(4))));
-// assign(n5, binop(Iop_Or32, mkexpr(n4), binop(Iop_Shr32, mkexpr(n4), mkU8(8))));
-// assign(n6, binop(Iop_Or32, mkexpr(n5), binop(Iop_Shr32, mkexpr(n5), mkU8(16))));
-// /* This gives a word of the form 0---01---1. Now invert it, giving
-// a word of the form 1---10---0, then do a population-count idiom
-// (to count the 1s, which is the number of leading zeroes, or 32
-// if the original word was 0. */
-// assign(n7, unop(Iop_Not32, mkexpr(n6)));
-//
-// /* unsigned int bitcount ( unsigned int n )
-// {
-// n = n - ((n >> 1) & 0x55555555);
-// n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
-// n = (n + (n >> 4)) & 0x0F0F0F0F;
-// n = n + (n >> 8);
-// n = (n + (n >> 16)) & 0x3F;
-// return n;
-// }
-// */
-// assign(n8,
-// binop(Iop_Sub32,
-// mkexpr(n7),
-// binop(Iop_And32,
-// binop(Iop_Shr32, mkexpr(n7), mkU8(1)),
-// mkU32(0x55555555))));
-// assign(n9,
-// binop(Iop_Add32,
-// binop(Iop_And32, mkexpr(n8), mkU32(0x33333333)),
-// binop(Iop_And32,
-// binop(Iop_Shr32, mkexpr(n8), mkU8(2)),
-// mkU32(0x33333333))));
-// assign(n10,
-// binop(Iop_And32,
-// binop(Iop_Add32,
-// mkexpr(n9),
-// binop(Iop_Shr32, mkexpr(n9), mkU8(4))),
-// mkU32(0x0F0F0F0F)));
-// assign(n11,
-// binop(Iop_Add32,
-// mkexpr(n10),
-// binop(Iop_Shr32, mkexpr(n10), mkU8(8))));
-// assign(n12,
-// binop(Iop_Add32,
-// mkexpr(n11),
-// binop(Iop_Shr32, mkexpr(n11), mkU8(16))));
-// return
-// binop(Iop_And32, mkexpr(n12), mkU32(0x3F));
-//}
-
/*--------------------------------------------------------------------*/
/*--- end guest_ppc_toIR.c ---*/
/*--------------------------------------------------------------------*/

View File

@ -0,0 +1,257 @@
commit 97d336b79e36f6c99d8b07f49ebc9b780e6df84e
Author: Julian Seward <jseward@acm.org>
Date: Tue Nov 20 11:07:37 2018 +0100
Add ppc host-side isel and instruction support for IROps added in previous commit.
VEX/priv/host_ppc_defs.c, VEX/priv/host_ppc_defs.h:
Dont emit cnttz{w,d}. We may need them on a target which doesn't support
them. Instead we can generate a fairly reasonable alternative sequence with
cntlz{w,d} instead.
Add support for emitting popcnt{w,d}.
VEX/priv/host_ppc_isel.c
Add support for: Iop_ClzNat32 Iop_ClzNat64
Redo support for: Iop_Ctz{32,64} and their Nat equivalents, so as to not use
cnttz{w,d}, as mentioned above.
Add support for: Iop_PopCount64 Iop_PopCount32 Iop_Reverse8sIn32_x1
diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c
index b073c1d..f4b52e4 100644
--- a/VEX/priv/host_ppc_defs.c
+++ b/VEX/priv/host_ppc_defs.c
@@ -501,9 +501,9 @@ const HChar* showPPCUnaryOp ( PPCUnaryOp op ) {
case Pun_NEG: return "neg";
case Pun_CLZ32: return "cntlzw";
case Pun_CLZ64: return "cntlzd";
- case Pun_CTZ32: return "cnttzw";
- case Pun_CTZ64: return "cnttzd";
case Pun_EXTSW: return "extsw";
+ case Pun_POP32: return "popcntw";
+ case Pun_POP64: return "popcntd";
default: vpanic("showPPCUnaryOp");
}
}
@@ -4265,20 +4265,19 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
vassert(mode64);
p = mkFormX(p, 31, r_src, r_dst, 0, 58, 0, endness_host);
break;
- case Pun_CTZ32: // cnttzw r_dst, r_src
- /* Note oder of src and dst is backwards from normal */
- p = mkFormX(p, 31, r_src, r_dst, 0, 538, 0, endness_host);
- break;
- case Pun_CTZ64: // cnttzd r_dst, r_src
- /* Note oder of src and dst is backwards from normal */
- vassert(mode64);
- p = mkFormX(p, 31, r_src, r_dst, 0, 570, 0, endness_host);
- break;
case Pun_EXTSW: // extsw r_dst, r_src
vassert(mode64);
p = mkFormX(p, 31, r_src, r_dst, 0, 986, 0, endness_host);
break;
- default: goto bad;
+ case Pun_POP32: // popcntw r_dst, r_src
+ p = mkFormX(p, 31, r_src, r_dst, 0, 378, 0, endness_host);
+ break;
+ case Pun_POP64: // popcntd r_dst, r_src
+ vassert(mode64);
+ p = mkFormX(p, 31, r_src, r_dst, 0, 506, 0, endness_host);
+ break;
+ default:
+ goto bad;
}
goto done;
}
diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h
index 17baff5..321fba9 100644
--- a/VEX/priv/host_ppc_defs.h
+++ b/VEX/priv/host_ppc_defs.h
@@ -291,9 +291,9 @@ typedef
Pun_NOT,
Pun_CLZ32,
Pun_CLZ64,
- Pun_CTZ32,
- Pun_CTZ64,
- Pun_EXTSW
+ Pun_EXTSW,
+ Pun_POP32, // popcntw
+ Pun_POP64 // popcntd
}
PPCUnaryOp;
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
index 6bdb5f7..5242176 100644
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -2065,12 +2065,15 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
return r_dst;
}
break;
- case Iop_Clz32:
- case Iop_Clz64: {
+
+ case Iop_Clz32: case Iop_ClzNat32:
+ case Iop_Clz64: case Iop_ClzNat64: {
+ // cntlz is available even in the most basic (earliest) ppc
+ // variants, so it's safe to generate it unconditionally.
HReg r_src, r_dst;
- PPCUnaryOp op_clz = (op_unop == Iop_Clz32) ? Pun_CLZ32 :
- Pun_CLZ64;
- if (op_unop == Iop_Clz64 && !mode64)
+ PPCUnaryOp op_clz = (op_unop == Iop_Clz32 || op_unop == Iop_ClzNat32)
+ ? Pun_CLZ32 : Pun_CLZ64;
+ if ((op_unop == Iop_Clz64 || op_unop == Iop_ClzNat64) && !mode64)
goto irreducible;
/* Count leading zeroes. */
r_dst = newVRegI(env);
@@ -2079,18 +2082,133 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
return r_dst;
}
- case Iop_Ctz32:
- case Iop_Ctz64: {
- HReg r_src, r_dst;
- PPCUnaryOp op_clz = (op_unop == Iop_Ctz32) ? Pun_CTZ32 :
- Pun_CTZ64;
- if (op_unop == Iop_Ctz64 && !mode64)
- goto irreducible;
- /* Count trailing zeroes. */
- r_dst = newVRegI(env);
- r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
- addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
- return r_dst;
+ //case Iop_Ctz32:
+ case Iop_CtzNat32:
+ //case Iop_Ctz64:
+ case Iop_CtzNat64:
+ {
+ // Generate code using Clz, because we can't assume the host has
+ // Ctz. In particular, part of the fix for bug 386945 involves
+ // creating a Ctz in ir_opt.c from smaller fragments.
+ PPCUnaryOp op_clz = Pun_CLZ64;
+ Int WS = 64;
+ if (op_unop == Iop_Ctz32 || op_unop == Iop_CtzNat32) {
+ op_clz = Pun_CLZ32;
+ WS = 32;
+ }
+ /* Compute ctz(arg) = wordsize - clz(~arg & (arg - 1)), thusly:
+ t1 = arg - 1
+ t2 = not arg
+ t2 = t2 & t1
+ t2 = clz t2
+ t1 = WS
+ t2 = t1 - t2
+ // result in t2
+ */
+ HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+ HReg t1 = newVRegI(env);
+ HReg t2 = newVRegI(env);
+ addInstr(env, PPCInstr_Alu(Palu_SUB, t1, arg, PPCRH_Imm(True, 1)));
+ addInstr(env, PPCInstr_Unary(Pun_NOT, t2, arg));
+ addInstr(env, PPCInstr_Alu(Palu_AND, t2, t2, PPCRH_Reg(t1)));
+ addInstr(env, PPCInstr_Unary(op_clz, t2, t2));
+ addInstr(env, PPCInstr_LI(t1, WS, False/*!64-bit imm*/));
+ addInstr(env, PPCInstr_Alu(Palu_SUB, t2, t1, PPCRH_Reg(t2)));
+ return t2;
+ }
+
+ case Iop_PopCount64: {
+ // popcnt{x,d} is only available in later arch revs (ISA 3.0,
+ // maybe) so it's not really correct to emit it here without a caps
+ // check for the host.
+ if (mode64) {
+ HReg r_dst = newVRegI(env);
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+ addInstr(env, PPCInstr_Unary(Pun_POP64, r_dst, r_src));
+ return r_dst;
+ }
+ // We don't expect to be required to handle this in 32-bit mode.
+ break;
+ }
+
+ case Iop_PopCount32: {
+ // Similar comment as for Ctz just above applies -- we really
+ // should have a caps check here.
+
+ HReg r_dst = newVRegI(env);
+ // This actually generates popcntw, which in 64 bit mode does a
+ // 32-bit count individually for both low and high halves of the
+ // word. Per the comment at the top of iselIntExpr_R, in the 64
+ // bit mode case, the user of this result is required to ignore
+ // the upper 32 bits of the result. In 32 bit mode this is all
+ // moot. It is however unclear from the PowerISA 3.0 docs that
+ // the instruction exists in 32 bit mode; however our own front
+ // end (guest_ppc_toIR.c) accepts it, so I guess it does exist.
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+ addInstr(env, PPCInstr_Unary(Pun_POP32, r_dst, r_src));
+ return r_dst;
+ }
+
+ case Iop_Reverse8sIn32_x1: {
+ // A bit of a mouthful, but simply .. 32-bit byte swap.
+ // This is pretty rubbish code. We could do vastly better if
+ // rotates, and better, rotate-inserts, were allowed. Note that
+ // even on a 64 bit target, the right shifts must be done as 32-bit
+ // so as to introduce zero bits in the right places. So it seems
+ // simplest to do the whole sequence in 32-bit insns.
+ /*
+ r = <argument> // working temporary, initial byte order ABCD
+ Mask = 00FF00FF
+ nMask = not Mask
+ tHi = and r, Mask
+ tHi = shl tHi, 8
+ tLo = and r, nMask
+ tLo = shr tLo, 8
+ r = or tHi, tLo // now r has order BADC
+ and repeat for 16 bit chunks ..
+ Mask = 0000FFFF
+ nMask = not Mask
+ tHi = and r, Mask
+ tHi = shl tHi, 16
+ tLo = and r, nMask
+ tLo = shr tLo, 16
+ r = or tHi, tLo // now r has order DCBA
+ */
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+ HReg rr = newVRegI(env);
+ HReg rMask = newVRegI(env);
+ HReg rnMask = newVRegI(env);
+ HReg rtHi = newVRegI(env);
+ HReg rtLo = newVRegI(env);
+ // Copy r_src since we need to modify it
+ addInstr(env, mk_iMOVds_RR(rr, r_src));
+ // Swap within 16-bit lanes
+ addInstr(env, PPCInstr_LI(rMask, 0x00FF00FFULL,
+ False/* !64bit imm*/));
+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
+ rtHi, rtHi,
+ PPCRH_Imm(False/*!signed imm*/, 8)));
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
+ rtLo, rtLo,
+ PPCRH_Imm(False/*!signed imm*/, 8)));
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
+ // And now swap the two 16-bit chunks
+ addInstr(env, PPCInstr_LI(rMask, 0x0000FFFFULL,
+ False/* !64bit imm*/));
+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
+ rtHi, rtHi,
+ PPCRH_Imm(False/*!signed imm*/, 16)));
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
+ rtLo, rtLo,
+ PPCRH_Imm(False/*!signed imm*/, 16)));
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
+ return rr;
}
case Iop_Left8:

View File

@ -0,0 +1,130 @@
commit 7bdd6731f8337fd57bf91772aa1917e44239d7c2
Author: Mark Wielaard <mark@klomp.org>
Date: Fri Dec 7 10:42:22 2018 -0500
Implement ppc64 ldbrx as 64-bit load and Iop_Reverse8sIn64_x1.
This makes it possible for memcheck to analyse the new gcc strcmp
inlined code correctly even if the ldbrx load is partly beyond an
addressable block.
Partially resolves bug 386945.
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
index 8977d4f..a81dace 100644
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -9178,24 +9178,28 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
{
- // JRS FIXME:
- // * is the host_endness conditional below actually necessary?
- // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
- // That would be a lot more efficient.
- IRExpr * nextAddr;
- IRTemp w3 = newTemp( Ity_I32 );
- IRTemp w4 = newTemp( Ity_I32 );
- DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
- assign( w1, load( Ity_I32, mkexpr( EA ) ) );
- assign( w2, gen_byterev32( w1 ) );
- nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
- ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );
- assign( w3, load( Ity_I32, nextAddr ) );
- assign( w4, gen_byterev32( w3 ) );
- if (host_endness == VexEndnessLE)
- putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w2 ), mkexpr( w4 ) ) );
+ /* Caller makes sure we are only called in mode64. */
+
+ /* If we supported swapping LE/BE loads in the backend then we could
+ just load the value with the bytes reversed by doing a BE load
+ on an LE machine and a LE load on a BE machine.
+
+ IRTemp dw1 = newTemp(Ity_I64);
+ if (host_endness == VexEndnessBE)
+ assign( dw1, IRExpr_Load(Iend_LE, Ity_I64, mkexpr(EA)));
else
- putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
+ assign( dw1, IRExpr_Load(Iend_BE, Ity_I64, mkexpr(EA)));
+ putIReg( rD_addr, mkexpr(dw1) );
+
+ But since we currently don't we load the value as is and then
+ switch it around with Iop_Reverse8sIn64_x1. */
+
+ IRTemp dw1 = newTemp(Ity_I64);
+ IRTemp dw2 = newTemp(Ity_I64);
+ DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+ assign( dw1, load(Ity_I64, mkexpr(EA)) );
+ assign( dw2, unop(Iop_Reverse8sIn64_x1, mkexpr(dw1)) );
+ putIReg( rD_addr, mkexpr(dw2) );
break;
}
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
index 750cf8d..4fc3eb5 100644
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -2210,6 +2210,63 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
return rr;
}
+ case Iop_Reverse8sIn64_x1: {
+ /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
+ Can only be used in 64bit mode. */
+ vassert (mode64);
+
+ HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+ HReg rr = newVRegI(env);
+ HReg rMask = newVRegI(env);
+ HReg rnMask = newVRegI(env);
+ HReg rtHi = newVRegI(env);
+ HReg rtLo = newVRegI(env);
+
+ // Copy r_src since we need to modify it
+ addInstr(env, mk_iMOVds_RR(rr, r_src));
+
+ // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
+ addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
+ True/* 64bit imm*/));
+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
+ rtHi, rtHi,
+ PPCRH_Imm(False/*!signed imm*/, 8)));
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
+ rtLo, rtLo,
+ PPCRH_Imm(False/*!signed imm*/, 8)));
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
+
+ // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
+ addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
+ True/* !64bit imm*/));
+ addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
+ rtHi, rtHi,
+ PPCRH_Imm(False/*!signed imm*/, 16)));
+ addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
+ rtLo, rtLo,
+ PPCRH_Imm(False/*!signed imm*/, 16)));
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
+
+ // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
+ /* We don't need to mask anymore, just two more shifts and an or. */
+ addInstr(env, mk_iMOVds_RR(rtLo, rr));
+ addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
+ rtLo, rtLo,
+ PPCRH_Imm(False/*!signed imm*/, 32)));
+ addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
+ rr, rr,
+ PPCRH_Imm(False/*!signed imm*/, 32)));
+ addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
+
+ return rr;
+ }
+
case Iop_Left8:
case Iop_Left16:
case Iop_Left32:

View File

@ -0,0 +1,88 @@
commit 5c00e04a1b61475a7f731f8cfede114201815e0a
Author: Mark Wielaard <mark@klomp.org>
Date: Sun Dec 9 23:25:05 2018 +0100
Implement ppc64 lxvb16x as 128-bit vector load with reversed double words.
This makes it possible for memcheck to know which part of the 128bit
vector is defined, even if the load is partly beyond an addressable block.
Partially resolves bug 386945.
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
index 7af4973..ec2f90a 100644
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -20702,54 +20702,29 @@ dis_vx_load ( UInt theInstr )
{
DIP("lxvb16x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
- IRTemp byte[16];
- int i;
- UInt ea_off = 0;
- IRExpr* irx_addr;
- IRTemp tmp_low[9];
- IRTemp tmp_hi[9];
+ /* The result of lxvb16x should be the same on big and little
+ endian systems. We do a host load, then reverse the bytes in
+ the double words. If the host load was little endian we swap
+ them around again. */
- tmp_low[0] = newTemp( Ity_I64 );
- tmp_hi[0] = newTemp( Ity_I64 );
- assign( tmp_low[0], mkU64( 0 ) );
- assign( tmp_hi[0], mkU64( 0 ) );
-
- for ( i = 0; i < 8; i++ ) {
- byte[i] = newTemp( Ity_I64 );
- tmp_low[i+1] = newTemp( Ity_I64 );
-
- irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
- ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
- ea_off += 1;
-
- assign( byte[i], binop( Iop_Shl64,
- unop( Iop_8Uto64,
- load( Ity_I8, irx_addr ) ),
- mkU8( 8 * ( 7 - i ) ) ) );
+ IRTemp high = newTemp(Ity_I64);
+ IRTemp high_rev = newTemp(Ity_I64);
+ IRTemp low = newTemp(Ity_I64);
+ IRTemp low_rev = newTemp(Ity_I64);
- assign( tmp_low[i+1],
- binop( Iop_Or64,
- mkexpr( byte[i] ), mkexpr( tmp_low[i] ) ) );
- }
+ IRExpr *t128 = load( Ity_V128, mkexpr( EA ) );
- for ( i = 0; i < 8; i++ ) {
- byte[i + 8] = newTemp( Ity_I64 );
- tmp_hi[i+1] = newTemp( Ity_I64 );
+ assign( high, unop(Iop_V128HIto64, t128) );
+ assign( high_rev, unop(Iop_Reverse8sIn64_x1, mkexpr(high)) );
+ assign( low, unop(Iop_V128to64, t128) );
+ assign( low_rev, unop(Iop_Reverse8sIn64_x1, mkexpr(low)) );
- irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
- ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
- ea_off += 1;
+ if (host_endness == VexEndnessLE)
+ t128 = binop( Iop_64HLtoV128, mkexpr (low_rev), mkexpr (high_rev) );
+ else
+ t128 = binop( Iop_64HLtoV128, mkexpr (high_rev), mkexpr (low_rev) );
- assign( byte[i+8], binop( Iop_Shl64,
- unop( Iop_8Uto64,
- load( Ity_I8, irx_addr ) ),
- mkU8( 8 * ( 7 - i ) ) ) );
- assign( tmp_hi[i+1], binop( Iop_Or64,
- mkexpr( byte[i+8] ),
- mkexpr( tmp_hi[i] ) ) );
- }
- putVSReg( XT, binop( Iop_64HLtoV128,
- mkexpr( tmp_low[8] ), mkexpr( tmp_hi[8] ) ) );
+ putVSReg( XT, t128 );
break;
}

View File

@ -0,0 +1,47 @@
commit b7d65cab4f3e9a6f66a496e723e53ed736c4d2e7
Author: Mark Wielaard <mark@klomp.org>
Date: Sun Dec 9 00:55:42 2018 +0100
Implement ppc64 lxvd2x as 128-bit load with double word swap for ppc64le.
This makes it possible for memcheck to know which part of the 128bit
vector is defined, even if the load is partly beyond an addressable block.
Partially resolves bug 386945.
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
index a81dace..7af4973 100644
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -20590,16 +20590,22 @@ dis_vx_load ( UInt theInstr )
}
case 0x34C: // lxvd2x
{
- IROp addOp = ty == Ity_I64 ? Iop_Add64 : Iop_Add32;
- IRExpr * high, *low;
- ULong ea_off = 8;
- IRExpr* high_addr;
+ IRExpr *t128;
DIP("lxvd2x %d,r%u,r%u\n", XT, rA_addr, rB_addr);
- high = load( Ity_I64, mkexpr( EA ) );
- high_addr = binop( addOp, mkexpr( EA ), ty == Ity_I64 ? mkU64( ea_off )
- : mkU32( ea_off ) );
- low = load( Ity_I64, high_addr );
- putVSReg( XT, binop( Iop_64HLtoV128, high, low ) );
+ t128 = load( Ity_V128, mkexpr( EA ) );
+
+ /* The data in the vec register should be in big endian order.
+ So if we just did a little endian load then swap around the
+ high and low double words. */
+ if (host_endness == VexEndnessLE) {
+ IRTemp high = newTemp(Ity_I64);
+ IRTemp low = newTemp(Ity_I64);
+ assign( high, unop(Iop_V128HIto64, t128) );
+ assign( low, unop(Iop_V128to64, t128) );
+ t128 = binop( Iop_64HLtoV128, mkexpr (low), mkexpr (high) );
+ }
+
+ putVSReg( XT, t128 );
break;
}
case 0x14C: // lxvdsx

View File

@ -0,0 +1,111 @@
commit 3967a99c26e8b314634a6b1fd8927cbb2bb5d060
Author: Mark Wielaard <mark@klomp.org>
Date: Wed Dec 12 14:11:29 2018 +0100
Implement minimal ptrace support for ppc64[le]-linux.
diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c
index 6549dd1..0fdcc8e 100644
--- a/coregrind/m_syswrap/syswrap-ppc64-linux.c
+++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c
@@ -388,6 +388,7 @@ DECL_TEMPLATE(ppc64_linux, sys_mmap);
//zz DECL_TEMPLATE(ppc64_linux, sys_sigreturn);
DECL_TEMPLATE(ppc64_linux, sys_rt_sigreturn);
DECL_TEMPLATE(ppc64_linux, sys_fadvise64);
+DECL_TEMPLATE(ppc64_linux, sys_ptrace);
PRE(sys_mmap)
{
@@ -511,6 +512,72 @@ PRE(sys_rt_sigreturn)
*flags |= SfPollAfter;
}
+// ARG3 is only used for pointers into the traced process's address
+// space and for offsets into the traced process's struct
+// user_regs_struct. It is never a pointer into this process's memory
+// space, and we should therefore not check anything it points to.
+// powerpc does have other ways to get/set registers, we only support
+// GET/SETREGSET for now.
+PRE(sys_ptrace)
+{
+ PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4);
+ PRE_REG_READ4(int, "ptrace",
+ long, request, long, pid, long, addr, long, data);
+ switch (ARG1) {
+ case VKI_PTRACE_PEEKTEXT:
+ case VKI_PTRACE_PEEKDATA:
+ case VKI_PTRACE_PEEKUSR:
+ PRE_MEM_WRITE( "ptrace(peek)", ARG4,
+ sizeof (long));
+ break;
+ case VKI_PTRACE_GETEVENTMSG:
+ PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
+ break;
+ case VKI_PTRACE_GETSIGINFO:
+ PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
+ break;
+ case VKI_PTRACE_SETSIGINFO:
+ PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
+ break;
+ case VKI_PTRACE_GETREGSET:
+ ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
+ break;
+ case VKI_PTRACE_SETREGSET:
+ ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
+ break;
+ default:
+ break;
+ }
+}
+
+POST(sys_ptrace)
+{
+ switch (ARG1) {
+ case VKI_PTRACE_TRACEME:
+ ML_(linux_POST_traceme)(tid);
+ break;
+ case VKI_PTRACE_PEEKTEXT:
+ case VKI_PTRACE_PEEKDATA:
+ case VKI_PTRACE_PEEKUSR:
+ POST_MEM_WRITE( ARG4, sizeof (long));
+ break;
+ case VKI_PTRACE_GETEVENTMSG:
+ POST_MEM_WRITE( ARG4, sizeof(unsigned long));
+ break;
+ case VKI_PTRACE_GETSIGINFO:
+ /* XXX: This is a simplification. Different parts of the
+ * siginfo_t are valid depending on the type of signal.
+ */
+ POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
+ break;
+ case VKI_PTRACE_GETREGSET:
+ ML_(linux_POST_getregset)(tid, ARG3, ARG4);
+ break;
+ default:
+ break;
+ }
+}
+
#undef PRE
#undef POST
@@ -562,8 +629,7 @@ static SyscallTableEntry syscall_table[] = {
GENX_(__NR_getuid, sys_getuid), // 24
// _____(__NR_stime, sys_stime), // 25
-// When ptrace is supported, memcheck/tests/linux/getregset should be enabled
-// _____(__NR_ptrace, sys_ptrace), // 26
+ PLAXY(__NR_ptrace, sys_ptrace), // 26
GENX_(__NR_alarm, sys_alarm), // 27
// _____(__NR_oldfstat, sys_oldfstat), // 28
GENX_(__NR_pause, sys_pause), // 29
diff --git a/memcheck/tests/linux/getregset.vgtest b/memcheck/tests/linux/getregset.vgtest
index 4c66108..c35be4c 100644
--- a/memcheck/tests/linux/getregset.vgtest
+++ b/memcheck/tests/linux/getregset.vgtest
@@ -1,4 +1,4 @@
prog: getregset
vgopts: -q
-prereq: ((../../../tests/os_test linux 2.6.33 && ! ../../../tests/arch_test mips32) || ../../../tests/os_test linux 3.10.0 ) && ! ../../../tests/arch_test ppc64
+prereq: ((../../../tests/os_test linux 2.6.33 && ! ../../../tests/arch_test mips32) || ../../../tests/os_test linux 3.10.0 )

View File

@ -0,0 +1,28 @@
commit 321771ee63740333ad355244e0764295218843b8
Author: Mark Wielaard <mark@klomp.org>
Date: Sun Dec 9 14:26:39 2018 +0100
memcheck: Allow unaligned loads of 128bit vectors on ppc64[le].
On powerpc partial unaligned loads of vectors from partially invalid
addresses are OK and could be generated by our translation of lxvd2x.
Adjust partial_load memcheck tests to allow partial loads of 16 byte
vectors on powerpc64.
Part of resolving bug #386945.
diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
index 737f79d..101916b 100644
--- a/memcheck/mc_main.c
+++ b/memcheck/mc_main.c
@@ -1354,6 +1354,9 @@ void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
tl_assert(szB == 16); // s390 doesn't have > 128 bit SIMD
/* OK if all loaded bytes are from the same page. */
Bool alignedOK = ((a & 0xfff) <= 0x1000 - szB);
+# elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
+ /* lxvd2x might generate an unaligned 128 bit vector load. */
+ Bool alignedOK = (szB == 16);
# else
/* OK if the address is aligned by the load size. */
Bool alignedOK = (0 == (a & (szB - 1)));

View File

@ -0,0 +1,148 @@
commit c5a5bea00af75f6ac50da10967d956f117b956f1
Author: Mark Wielaard <mark@klomp.org>
Date: Sat Dec 8 13:47:43 2018 -0500
memcheck: Allow unaligned loads of words on ppc64[le].
On powerpc partial unaligned loads of words from partially invalid
addresses are OK and could be generated by our translation of ldbrx.
Adjust partial_load memcheck tests to allow partial loads of words
on powerpc64.
Part of resolving bug #386945.
diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
index 3ef7cb9..737f79d 100644
--- a/memcheck/mc_main.c
+++ b/memcheck/mc_main.c
@@ -1508,6 +1508,9 @@ ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
# if defined(VGA_mips64) && defined(VGABI_N32)
if (szB == VG_WORDSIZE * 2 && VG_IS_WORD_ALIGNED(a)
&& n_addrs_bad < VG_WORDSIZE * 2)
+# elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
+ /* On power unaligned loads of words are OK. */
+ if (szB == VG_WORDSIZE && n_addrs_bad < VG_WORDSIZE)
# else
if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
&& n_addrs_bad < VG_WORDSIZE)
diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
index 2af4dd1..70b8ada 100644
--- a/memcheck/tests/Makefile.am
+++ b/memcheck/tests/Makefile.am
@@ -235,8 +235,10 @@ EXTRA_DIST = \
partiallydefinedeq.stdout.exp \
partial_load_ok.vgtest partial_load_ok.stderr.exp \
partial_load_ok.stderr.exp64 \
+ partial_load_ok.stderr.exp-ppc64 \
partial_load_dflt.vgtest partial_load_dflt.stderr.exp \
partial_load_dflt.stderr.exp64 \
+ partial_load_dflt.stderr.exp-ppc64 \
partial_load_dflt.stderr.expr-s390x-mvc \
pdb-realloc.stderr.exp pdb-realloc.vgtest \
pdb-realloc2.stderr.exp pdb-realloc2.stdout.exp pdb-realloc2.vgtest \
diff --git a/memcheck/tests/partial_load.c b/memcheck/tests/partial_load.c
index 0b2f10b..685ca8d 100644
--- a/memcheck/tests/partial_load.c
+++ b/memcheck/tests/partial_load.c
@@ -1,14 +1,14 @@
-
+#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
int main ( void )
{
- long w;
- int i;
- char* p;
-
+ long w; int i; char* p;
assert(sizeof(long) == sizeof(void*));
+#if defined(__powerpc64__)
+ fprintf (stderr, "powerpc64\n"); /* Used to select correct .exp file. */
+#endif
/* partial load, which --partial-loads-ok=yes should suppress */
p = calloc( sizeof(long)-1, 1 );
@@ -16,7 +16,7 @@ int main ( void )
w = *(long*)p;
free(p);
- /* partial but misaligned, cannot be suppressed */
+ /* partial but misaligned, ppc64[le] ok, but otherwise cannot be suppressed */
p = calloc( sizeof(long), 1 );
assert(p);
p++;
diff --git a/memcheck/tests/partial_load_dflt.stderr.exp-ppc64 b/memcheck/tests/partial_load_dflt.stderr.exp-ppc64
new file mode 100644
index 0000000..cf32bcf
--- /dev/null
+++ b/memcheck/tests/partial_load_dflt.stderr.exp-ppc64
@@ -0,0 +1,23 @@
+
+powerpc64
+Invalid read of size 2
+ at 0x........: main (partial_load.c:30)
+ Address 0x........ is 0 bytes inside a block of size 1 alloc'd
+ at 0x........: calloc (vg_replace_malloc.c:...)
+ by 0x........: main (partial_load.c:28)
+
+Invalid read of size 8
+ at 0x........: main (partial_load.c:37)
+ Address 0x........ is 0 bytes inside a block of size 8 free'd
+ at 0x........: free (vg_replace_malloc.c:...)
+ by 0x........: main (partial_load.c:36)
+
+
+HEAP SUMMARY:
+ in use at exit: ... bytes in ... blocks
+ total heap usage: ... allocs, ... frees, ... bytes allocated
+
+For a detailed leak analysis, rerun with: --leak-check=full
+
+For counts of detected and suppressed errors, rerun with: -v
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
diff --git a/memcheck/tests/partial_load_ok.stderr.exp-ppc64 b/memcheck/tests/partial_load_ok.stderr.exp-ppc64
new file mode 100644
index 0000000..cf32bcf
--- /dev/null
+++ b/memcheck/tests/partial_load_ok.stderr.exp-ppc64
@@ -0,0 +1,23 @@
+
+powerpc64
+Invalid read of size 2
+ at 0x........: main (partial_load.c:30)
+ Address 0x........ is 0 bytes inside a block of size 1 alloc'd
+ at 0x........: calloc (vg_replace_malloc.c:...)
+ by 0x........: main (partial_load.c:28)
+
+Invalid read of size 8
+ at 0x........: main (partial_load.c:37)
+ Address 0x........ is 0 bytes inside a block of size 8 free'd
+ at 0x........: free (vg_replace_malloc.c:...)
+ by 0x........: main (partial_load.c:36)
+
+
+HEAP SUMMARY:
+ in use at exit: ... bytes in ... blocks
+ total heap usage: ... allocs, ... frees, ... bytes allocated
+
+For a detailed leak analysis, rerun with: --leak-check=full
+
+For counts of detected and suppressed errors, rerun with: -v
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
diff -ur valgrind-3.14.0.orig/memcheck/tests/Makefile.in valgrind-3.14.0/memcheck/tests/Makefile.in
--- valgrind-3.14.0.orig/memcheck/tests/Makefile.in 2018-12-12 23:17:07.525501080 +0100
+++ valgrind-3.14.0/memcheck/tests/Makefile.in 2018-12-12 23:18:13.404014757 +0100
@@ -1546,8 +1546,10 @@
partiallydefinedeq.stdout.exp \
partial_load_ok.vgtest partial_load_ok.stderr.exp \
partial_load_ok.stderr.exp64 \
+ partial_load_ok.stderr.exp-ppc64 \
partial_load_dflt.vgtest partial_load_dflt.stderr.exp \
partial_load_dflt.stderr.exp64 \
+ partial_load_dflt.stderr.exp-ppc64 \
partial_load_dflt.stderr.expr-s390x-mvc \
pdb-realloc.stderr.exp pdb-realloc.vgtest \
pdb-realloc2.stderr.exp pdb-realloc2.stdout.exp pdb-realloc2.vgtest \

View File

@ -0,0 +1,84 @@
commit 71002d8a5111d02ce8049c55017a8d948c820e35
Author: Andreas Arnez <arnez@linux.ibm.com>
Date: Thu Oct 25 13:47:12 2018 +0200
Bug 400490 s390x: Fix register allocation for VRs vs FPRs
On s390x, if vector registers are available, they are fed to the register
allocator as if they were separate from the floating-point registers. But
in fact the FPRs are embedded in the VRs. So for instance, if both f3 and
v3 are allocated and used at the same time, corruption will result.
This is fixed by offering only the non-overlapping VRs, v16 to v31, to the
register allocator instead.
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
index 6c22ac8..98ac938 100644
--- a/VEX/priv/host_s390_defs.c
+++ b/VEX/priv/host_s390_defs.c
@@ -59,7 +59,6 @@ static UInt s390_tchain_load64_len(void);
/* A mapping from register number to register index */
static Int gpr_index[16]; // GPR regno -> register index
-static Int fpr_index[16]; // FPR regno -> register index
static Int vr_index[32]; // VR regno -> register index
HReg
@@ -73,7 +72,7 @@ s390_hreg_gpr(UInt regno)
HReg
s390_hreg_fpr(UInt regno)
{
- Int ix = fpr_index[regno];
+ Int ix = vr_index[regno];
vassert(ix >= 0);
return mkHReg(/*virtual*/False, HRcFlt64, regno, ix);
}
@@ -463,11 +462,9 @@ getRRegUniverse_S390(void)
RRegUniverse__init(ru);
- /* Assign invalid values to the gpr/fpr/vr_index */
+ /* Assign invalid values to the gpr/vr_index */
for (UInt i = 0; i < sizeof gpr_index / sizeof gpr_index[0]; ++i)
gpr_index[i] = -1;
- for (UInt i = 0; i < sizeof fpr_index / sizeof fpr_index[0]; ++i)
- fpr_index[i] = -1;
for (UInt i = 0; i < sizeof vr_index / sizeof vr_index[0]; ++i)
vr_index[i] = -1;
@@ -494,17 +491,17 @@ getRRegUniverse_S390(void)
ru->allocable_start[HRcFlt64] = ru->size;
for (UInt regno = 8; regno <= 15; ++regno) {
- fpr_index[regno] = ru->size;
+ vr_index[regno] = ru->size;
ru->regs[ru->size++] = s390_hreg_fpr(regno);
}
for (UInt regno = 0; regno <= 7; ++regno) {
- fpr_index[regno] = ru->size;
+ vr_index[regno] = ru->size;
ru->regs[ru->size++] = s390_hreg_fpr(regno);
}
ru->allocable_end[HRcFlt64] = ru->size - 1;
ru->allocable_start[HRcVec128] = ru->size;
- for (UInt regno = 0; regno <= 31; ++regno) {
+ for (UInt regno = 16; regno <= 31; ++regno) {
vr_index[regno] = ru->size;
ru->regs[ru->size++] = s390_hreg_vr(regno);
}
@@ -527,12 +524,12 @@ getRRegUniverse_S390(void)
/* Sanity checking */
for (UInt i = 0; i < sizeof gpr_index / sizeof gpr_index[0]; ++i)
vassert(gpr_index[i] >= 0);
- for (UInt i = 0; i < sizeof fpr_index / sizeof fpr_index[0]; ++i)
- vassert(fpr_index[i] >= 0);
for (UInt i = 0; i < sizeof vr_index / sizeof vr_index[0]; ++i)
vassert(vr_index[i] >= 0);
initialised = True;
+
+ RRegUniverse__check_is_sane(ru);
return ru;
}

View File

@ -0,0 +1,41 @@
commit 9545e9f96beda6e9f2205bdb3c3e96edaf8d9e2b
Author: Andreas Arnez <arnez@linux.ibm.com>
Date: Tue Oct 30 17:06:38 2018 +0100
Bug 400491 s390x: Sign-extend immediate operand of LOCHI and friends
The VEX implementation of each of the z/Architecture instructions LOCHI,
LOCHHI, and LOCGHI treats the immediate 16-bit operand as an unsigned
integer instead of a signed integer. This is fixed.
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
index 60b6081..9c4d79b 100644
--- a/VEX/priv/guest_s390_toIR.c
+++ b/VEX/priv/guest_s390_toIR.c
@@ -16307,7 +16307,7 @@ static const HChar *
s390_irgen_LOCHHI(UChar r1, UChar m3, UShort i2, UChar unused)
{
next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)));
- put_gpr_w0(r1, mkU32(i2));
+ put_gpr_w0(r1, mkU32((UInt)(Int)(Short)i2));
return "lochhi";
}
@@ -16316,7 +16316,7 @@ static const HChar *
s390_irgen_LOCHI(UChar r1, UChar m3, UShort i2, UChar unused)
{
next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)));
- put_gpr_w1(r1, mkU32(i2));
+ put_gpr_w1(r1, mkU32((UInt)(Int)(Short)i2));
return "lochi";
}
@@ -16325,7 +16325,7 @@ static const HChar *
s390_irgen_LOCGHI(UChar r1, UChar m3, UShort i2, UChar unused)
{
next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)));
- put_gpr_dw0(r1, mkU64(i2));
+ put_gpr_dw0(r1, mkU64((UInt)(Int)(Short)i2));
return "locghi";
}

View File

@ -0,0 +1,32 @@
commit 467c7c4c9665c0f8b41a4416722a027ebc05df2b
Author: Andreas Arnez <arnez@linux.ibm.com>
Date: Mon Jan 21 14:10:00 2019 +0100
Bug 403552 s390x: Fix vector facility bit number
The wrong bit number was used when checking for the vector facility. This
can result in a fatal emulation error: "Encountered an instruction that
requires the vector facility. That facility is not available on this
host."
In many cases the wrong facility bit was usually set as well, hence
nothing bad happened. But when running Valgrind within a Qemu/KVM guest,
the wrong bit was not (always?) set and the emulation error occurred.
This fix simply corrects the vector facility bit number, changing it from
128 to 129.
diff --git a/VEX/pub/libvex_s390x_common.h b/VEX/pub/libvex_s390x_common.h
index a8a66b96b..8723ee21d 100644
--- a/VEX/pub/libvex_s390x_common.h
+++ b/VEX/pub/libvex_s390x_common.h
@@ -103,7 +103,7 @@
#define S390_FAC_MSA5 57 // message-security-assist 5
#define S390_FAC_TREXE 73 // transactional execution
#define S390_FAC_MSA4 77 // message-security-assist 4
-#define S390_FAC_VX 128 // vector facility
+#define S390_FAC_VX 129 // vector facility
/*--------------------------------------------------------------*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,408 @@
commit 50bd2282bce101012a5668b670cb185375600d2d
Author: Andreas Arnez <arnez@linux.ibm.com>
Date: Thu Oct 18 17:51:57 2018 +0200
Bug 397187 s390x: Add vector register support for vgdb
On s390x machines with a vector facility, Valgrind's gdbserver didn't
represent the vector registers. This is fixed.
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
index 8de1996..94030fd 100644
--- a/coregrind/Makefile.am
+++ b/coregrind/Makefile.am
@@ -685,6 +685,11 @@ GDBSERVER_XML_FILES = \
m_gdbserver/s390x-linux64-valgrind-s1.xml \
m_gdbserver/s390x-linux64-valgrind-s2.xml \
m_gdbserver/s390x-linux64.xml \
+ m_gdbserver/s390-vx-valgrind-s1.xml \
+ m_gdbserver/s390-vx-valgrind-s2.xml \
+ m_gdbserver/s390-vx.xml \
+ m_gdbserver/s390x-vx-linux-valgrind.xml \
+ m_gdbserver/s390x-vx-linux.xml \
m_gdbserver/mips-cp0-valgrind-s1.xml \
m_gdbserver/mips-cp0-valgrind-s2.xml \
m_gdbserver/mips-cp0.xml \
diff --git a/coregrind/m_gdbserver/s390-vx-valgrind-s1.xml b/coregrind/m_gdbserver/s390-vx-valgrind-s1.xml
new file mode 100644
index 0000000..ca461b3
--- /dev/null
+++ b/coregrind/m_gdbserver/s390-vx-valgrind-s1.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.s390.vx-valgrind-s1">
+ <reg name="v0ls1" bitsize="64" type="uint64"/>
+ <reg name="v1ls1" bitsize="64" type="uint64"/>
+ <reg name="v2ls1" bitsize="64" type="uint64"/>
+ <reg name="v3ls1" bitsize="64" type="uint64"/>
+ <reg name="v4ls1" bitsize="64" type="uint64"/>
+ <reg name="v5ls1" bitsize="64" type="uint64"/>
+ <reg name="v6ls1" bitsize="64" type="uint64"/>
+ <reg name="v7ls1" bitsize="64" type="uint64"/>
+ <reg name="v8ls1" bitsize="64" type="uint64"/>
+ <reg name="v9ls1" bitsize="64" type="uint64"/>
+ <reg name="v10ls1" bitsize="64" type="uint64"/>
+ <reg name="v11ls1" bitsize="64" type="uint64"/>
+ <reg name="v12ls1" bitsize="64" type="uint64"/>
+ <reg name="v13ls1" bitsize="64" type="uint64"/>
+ <reg name="v14ls1" bitsize="64" type="uint64"/>
+ <reg name="v15ls1" bitsize="64" type="uint64"/>
+
+ <reg name="v16s1" bitsize="128" type="uint128"/>
+ <reg name="v17s1" bitsize="128" type="uint128"/>
+ <reg name="v18s1" bitsize="128" type="uint128"/>
+ <reg name="v19s1" bitsize="128" type="uint128"/>
+ <reg name="v20s1" bitsize="128" type="uint128"/>
+ <reg name="v21s1" bitsize="128" type="uint128"/>
+ <reg name="v22s1" bitsize="128" type="uint128"/>
+ <reg name="v23s1" bitsize="128" type="uint128"/>
+ <reg name="v24s1" bitsize="128" type="uint128"/>
+ <reg name="v25s1" bitsize="128" type="uint128"/>
+ <reg name="v26s1" bitsize="128" type="uint128"/>
+ <reg name="v27s1" bitsize="128" type="uint128"/>
+ <reg name="v28s1" bitsize="128" type="uint128"/>
+ <reg name="v29s1" bitsize="128" type="uint128"/>
+ <reg name="v30s1" bitsize="128" type="uint128"/>
+ <reg name="v31s1" bitsize="128" type="uint128"/>
+</feature>
diff --git a/coregrind/m_gdbserver/s390-vx-valgrind-s2.xml b/coregrind/m_gdbserver/s390-vx-valgrind-s2.xml
new file mode 100644
index 0000000..eccbd8d
--- /dev/null
+++ b/coregrind/m_gdbserver/s390-vx-valgrind-s2.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.s390.vx-valgrind-s2">
+ <reg name="v0ls2" bitsize="64" type="uint64"/>
+ <reg name="v1ls2" bitsize="64" type="uint64"/>
+ <reg name="v2ls2" bitsize="64" type="uint64"/>
+ <reg name="v3ls2" bitsize="64" type="uint64"/>
+ <reg name="v4ls2" bitsize="64" type="uint64"/>
+ <reg name="v5ls2" bitsize="64" type="uint64"/>
+ <reg name="v6ls2" bitsize="64" type="uint64"/>
+ <reg name="v7ls2" bitsize="64" type="uint64"/>
+ <reg name="v8ls2" bitsize="64" type="uint64"/>
+ <reg name="v9ls2" bitsize="64" type="uint64"/>
+ <reg name="v10ls2" bitsize="64" type="uint64"/>
+ <reg name="v11ls2" bitsize="64" type="uint64"/>
+ <reg name="v12ls2" bitsize="64" type="uint64"/>
+ <reg name="v13ls2" bitsize="64" type="uint64"/>
+ <reg name="v14ls2" bitsize="64" type="uint64"/>
+ <reg name="v15ls2" bitsize="64" type="uint64"/>
+
+ <reg name="v16s2" bitsize="128" type="uint128"/>
+ <reg name="v17s2" bitsize="128" type="uint128"/>
+ <reg name="v18s2" bitsize="128" type="uint128"/>
+ <reg name="v19s2" bitsize="128" type="uint128"/>
+ <reg name="v20s2" bitsize="128" type="uint128"/>
+ <reg name="v21s2" bitsize="128" type="uint128"/>
+ <reg name="v22s2" bitsize="128" type="uint128"/>
+ <reg name="v23s2" bitsize="128" type="uint128"/>
+ <reg name="v24s2" bitsize="128" type="uint128"/>
+ <reg name="v25s2" bitsize="128" type="uint128"/>
+ <reg name="v26s2" bitsize="128" type="uint128"/>
+ <reg name="v27s2" bitsize="128" type="uint128"/>
+ <reg name="v28s2" bitsize="128" type="uint128"/>
+ <reg name="v29s2" bitsize="128" type="uint128"/>
+ <reg name="v30s2" bitsize="128" type="uint128"/>
+ <reg name="v31s2" bitsize="128" type="uint128"/>
+</feature>
diff --git a/coregrind/m_gdbserver/s390-vx.xml b/coregrind/m_gdbserver/s390-vx.xml
new file mode 100644
index 0000000..2a16873
--- /dev/null
+++ b/coregrind/m_gdbserver/s390-vx.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.s390.vx">
+ <vector id="v4f" type="ieee_single" count="4"/>
+ <vector id="v2d" type="ieee_double" count="2"/>
+ <vector id="v16i8" type="int8" count="16"/>
+ <vector id="v8i16" type="int16" count="8"/>
+ <vector id="v4i32" type="int32" count="4"/>
+ <vector id="v2i64" type="int64" count="2"/>
+ <union id="vec128">
+ <field name="v4_float" type="v4f"/>
+ <field name="v2_double" type="v2d"/>
+ <field name="v16_int8" type="v16i8"/>
+ <field name="v8_int16" type="v8i16"/>
+ <field name="v4_int32" type="v4i32"/>
+ <field name="v2_int64" type="v2i64"/>
+ <field name="uint128" type="uint128"/>
+ </union>
+
+ <reg name="v0l" bitsize="64" type="uint64"/>
+ <reg name="v1l" bitsize="64" type="uint64"/>
+ <reg name="v2l" bitsize="64" type="uint64"/>
+ <reg name="v3l" bitsize="64" type="uint64"/>
+ <reg name="v4l" bitsize="64" type="uint64"/>
+ <reg name="v5l" bitsize="64" type="uint64"/>
+ <reg name="v6l" bitsize="64" type="uint64"/>
+ <reg name="v7l" bitsize="64" type="uint64"/>
+ <reg name="v8l" bitsize="64" type="uint64"/>
+ <reg name="v9l" bitsize="64" type="uint64"/>
+ <reg name="v10l" bitsize="64" type="uint64"/>
+ <reg name="v11l" bitsize="64" type="uint64"/>
+ <reg name="v12l" bitsize="64" type="uint64"/>
+ <reg name="v13l" bitsize="64" type="uint64"/>
+ <reg name="v14l" bitsize="64" type="uint64"/>
+ <reg name="v15l" bitsize="64" type="uint64"/>
+
+ <reg name="v16" bitsize="128" type="vec128"/>
+ <reg name="v17" bitsize="128" type="vec128"/>
+ <reg name="v18" bitsize="128" type="vec128"/>
+ <reg name="v19" bitsize="128" type="vec128"/>
+ <reg name="v20" bitsize="128" type="vec128"/>
+ <reg name="v21" bitsize="128" type="vec128"/>
+ <reg name="v22" bitsize="128" type="vec128"/>
+ <reg name="v23" bitsize="128" type="vec128"/>
+ <reg name="v24" bitsize="128" type="vec128"/>
+ <reg name="v25" bitsize="128" type="vec128"/>
+ <reg name="v26" bitsize="128" type="vec128"/>
+ <reg name="v27" bitsize="128" type="vec128"/>
+ <reg name="v28" bitsize="128" type="vec128"/>
+ <reg name="v29" bitsize="128" type="vec128"/>
+ <reg name="v30" bitsize="128" type="vec128"/>
+ <reg name="v31" bitsize="128" type="vec128"/>
+</feature>
diff --git a/coregrind/m_gdbserver/s390x-vx-linux-valgrind.xml b/coregrind/m_gdbserver/s390x-vx-linux-valgrind.xml
new file mode 100644
index 0000000..0237002
--- /dev/null
+++ b/coregrind/m_gdbserver/s390x-vx-linux-valgrind.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2018 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!-- S/390 64-bit user-level code. -->
+
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+ <architecture>s390:64-bit</architecture>
+ <xi:include href="s390x-core64.xml"/>
+ <xi:include href="s390-acr.xml"/>
+ <xi:include href="s390-fpr.xml"/>
+ <xi:include href="s390x-linux64.xml"/>
+ <xi:include href="s390-vx.xml"/>
+ <xi:include href="s390x-core64-valgrind-s1.xml"/>
+ <xi:include href="s390-acr-valgrind-s1.xml"/>
+ <xi:include href="s390-fpr-valgrind-s1.xml"/>
+ <xi:include href="s390x-linux64-valgrind-s1.xml"/>
+ <xi:include href="s390-vx-valgrind-s1.xml"/>
+ <xi:include href="s390x-core64-valgrind-s2.xml"/>
+ <xi:include href="s390-acr-valgrind-s2.xml"/>
+ <xi:include href="s390-fpr-valgrind-s2.xml"/>
+ <xi:include href="s390x-linux64-valgrind-s2.xml"/>
+ <xi:include href="s390-vx-valgrind-s2.xml"/>
+</target>
diff --git a/coregrind/m_gdbserver/s390x-vx-linux.xml b/coregrind/m_gdbserver/s390x-vx-linux.xml
new file mode 100644
index 0000000..e431c5b
--- /dev/null
+++ b/coregrind/m_gdbserver/s390x-vx-linux.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2018 Free Software Foundation, Inc.
+
+ Copying and distribution of this file, with or without modification,
+ are permitted in any medium without royalty provided the copyright
+ notice and this notice are preserved. -->
+
+<!-- S/390 64-bit user-level code. -->
+
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+ <architecture>s390:64-bit</architecture>
+ <xi:include href="s390x-core64.xml"/>
+ <xi:include href="s390-acr.xml"/>
+ <xi:include href="s390-fpr.xml"/>
+ <xi:include href="s390x-linux64.xml"/>
+ <xi:include href="s390-vx.xml"/>
+</target>
diff --git a/coregrind/m_gdbserver/valgrind-low-s390x.c b/coregrind/m_gdbserver/valgrind-low-s390x.c
index 7bbb2e3..a667f4b 100644
--- a/coregrind/m_gdbserver/valgrind-low-s390x.c
+++ b/coregrind/m_gdbserver/valgrind-low-s390x.c
@@ -88,9 +88,42 @@ static struct reg regs[] = {
{ "f14", 2592, 64 },
{ "f15", 2656, 64 },
{ "orig_r2", 2720, 64 },
+ { "v0l", 2784, 64 },
+ { "v1l", 2848, 64 },
+ { "v2l", 2912, 64 },
+ { "v3l", 2976, 64 },
+ { "v4l", 3040, 64 },
+ { "v5l", 3104, 64 },
+ { "v6l", 3168, 64 },
+ { "v7l", 3232, 64 },
+ { "v8l", 3296, 64 },
+ { "v9l", 3360, 64 },
+ { "v10l", 3424, 64 },
+ { "v11l", 3488, 64 },
+ { "v12l", 3552, 64 },
+ { "v13l", 3616, 64 },
+ { "v14l", 3680, 64 },
+ { "v15l", 3744, 64 },
+ { "v16", 3808, 128 },
+ { "v17", 3936, 128 },
+ { "v18", 4064, 128 },
+ { "v19", 4192, 128 },
+ { "v20", 4320, 128 },
+ { "v21", 4448, 128 },
+ { "v22", 4576, 128 },
+ { "v23", 4704, 128 },
+ { "v24", 4832, 128 },
+ { "v25", 4960, 128 },
+ { "v26", 5088, 128 },
+ { "v27", 5216, 128 },
+ { "v28", 5344, 128 },
+ { "v29", 5472, 128 },
+ { "v30", 5600, 128 },
+ { "v31", 5728, 128 },
};
static const char *expedite_regs[] = { "r14", "r15", "pswa", 0 };
-#define num_regs (sizeof (regs) / sizeof (regs[0]))
+#define num_regs_all (sizeof (regs) / sizeof (regs[0]))
+static int num_regs;
static
CORE_ADDR get_pc (void)
@@ -165,7 +198,7 @@ void transfer_register (ThreadId tid, int abs_regno, void * buf,
case 32: VG_(transfer) (&s390x->guest_a14, buf, dir, size, mod); break;
case 33: VG_(transfer) (&s390x->guest_a15, buf, dir, size, mod); break;
case 34: VG_(transfer) (&s390x->guest_fpc, buf, dir, size, mod); break;
- case 35: VG_(transfer) (&s390x->guest_v0, buf, dir, size, mod); break;
+ case 35: VG_(transfer) (&s390x->guest_v0.w64[0], buf, dir, size, mod); break;
case 36: VG_(transfer) (&s390x->guest_v1.w64[0], buf, dir, size, mod); break;
case 37: VG_(transfer) (&s390x->guest_v2.w64[0], buf, dir, size, mod); break;
case 38: VG_(transfer) (&s390x->guest_v3.w64[0], buf, dir, size, mod); break;
@@ -182,18 +215,65 @@ void transfer_register (ThreadId tid, int abs_regno, void * buf,
case 49: VG_(transfer) (&s390x->guest_v14.w64[0], buf, dir, size, mod); break;
case 50: VG_(transfer) (&s390x->guest_v15.w64[0], buf, dir, size, mod); break;
case 51: *mod = False; break; //GDBTD??? { "orig_r2", 0, 64 },
+ case 52: VG_(transfer) (&s390x->guest_v0.w64[1], buf, dir, size, mod); break;
+ case 53: VG_(transfer) (&s390x->guest_v1.w64[1], buf, dir, size, mod); break;
+ case 54: VG_(transfer) (&s390x->guest_v2.w64[1], buf, dir, size, mod); break;
+ case 55: VG_(transfer) (&s390x->guest_v3.w64[1], buf, dir, size, mod); break;
+ case 56: VG_(transfer) (&s390x->guest_v4.w64[1], buf, dir, size, mod); break;
+ case 57: VG_(transfer) (&s390x->guest_v5.w64[1], buf, dir, size, mod); break;
+ case 58: VG_(transfer) (&s390x->guest_v6.w64[1], buf, dir, size, mod); break;
+ case 59: VG_(transfer) (&s390x->guest_v7.w64[1], buf, dir, size, mod); break;
+ case 60: VG_(transfer) (&s390x->guest_v8.w64[1], buf, dir, size, mod); break;
+ case 61: VG_(transfer) (&s390x->guest_v9.w64[1], buf, dir, size, mod); break;
+ case 62: VG_(transfer) (&s390x->guest_v10.w64[1], buf, dir, size, mod); break;
+ case 63: VG_(transfer) (&s390x->guest_v11.w64[1], buf, dir, size, mod); break;
+ case 64: VG_(transfer) (&s390x->guest_v12.w64[1], buf, dir, size, mod); break;
+ case 65: VG_(transfer) (&s390x->guest_v13.w64[1], buf, dir, size, mod); break;
+ case 66: VG_(transfer) (&s390x->guest_v14.w64[1], buf, dir, size, mod); break;
+ case 67: VG_(transfer) (&s390x->guest_v15.w64[1], buf, dir, size, mod); break;
+ case 68: VG_(transfer) (&s390x->guest_v16, buf, dir, size, mod); break;
+ case 69: VG_(transfer) (&s390x->guest_v17, buf, dir, size, mod); break;
+ case 70: VG_(transfer) (&s390x->guest_v18, buf, dir, size, mod); break;
+ case 71: VG_(transfer) (&s390x->guest_v19, buf, dir, size, mod); break;
+ case 72: VG_(transfer) (&s390x->guest_v20, buf, dir, size, mod); break;
+ case 73: VG_(transfer) (&s390x->guest_v21, buf, dir, size, mod); break;
+ case 74: VG_(transfer) (&s390x->guest_v22, buf, dir, size, mod); break;
+ case 75: VG_(transfer) (&s390x->guest_v23, buf, dir, size, mod); break;
+ case 76: VG_(transfer) (&s390x->guest_v24, buf, dir, size, mod); break;
+ case 77: VG_(transfer) (&s390x->guest_v25, buf, dir, size, mod); break;
+ case 78: VG_(transfer) (&s390x->guest_v26, buf, dir, size, mod); break;
+ case 79: VG_(transfer) (&s390x->guest_v27, buf, dir, size, mod); break;
+ case 80: VG_(transfer) (&s390x->guest_v28, buf, dir, size, mod); break;
+ case 81: VG_(transfer) (&s390x->guest_v29, buf, dir, size, mod); break;
+ case 82: VG_(transfer) (&s390x->guest_v30, buf, dir, size, mod); break;
+ case 83: VG_(transfer) (&s390x->guest_v31, buf, dir, size, mod); break;
default: vg_assert(0);
}
}
static
+Bool have_vx (void)
+{
+ VexArch va;
+ VexArchInfo vai;
+ VG_(machine_get_VexArchInfo) (&va, &vai);
+ return (vai.hwcaps & VEX_HWCAPS_S390X_VX) != 0;
+}
+
+static
const char* target_xml (Bool shadow_mode)
{
if (shadow_mode) {
- return "s390x-generic-valgrind.xml";
+ if (have_vx())
+ return "s390x-vx-linux-valgrind.xml";
+ else
+ return "s390x-generic-valgrind.xml";
} else {
- return "s390x-generic.xml";
- }
+ if (have_vx())
+ return "s390x-vx-linux.xml";
+ else
+ return "s390x-generic.xml";
+ }
}
static CORE_ADDR** target_get_dtv (ThreadState *tst)
@@ -206,7 +286,7 @@ static CORE_ADDR** target_get_dtv (ThreadState *tst)
}
static struct valgrind_target_ops low_target = {
- num_regs,
+ -1, // Override at init time.
regs,
17, //sp = r15, which is register offset 17 in regs
transfer_register,
@@ -220,6 +300,11 @@ static struct valgrind_target_ops low_target = {
void s390x_init_architecture (struct valgrind_target_ops *target)
{
*target = low_target;
+ if (have_vx())
+ num_regs = num_regs_all;
+ else
+ num_regs = num_regs_all - 32; // Remove all VX registers.
+ target->num_regs = num_regs;
set_register_cache (regs, num_regs);
gdbserver_expedite_regs = expedite_regs;
}
diff -ru valgrind-3.14.0.orig/coregrind/Makefile.in valgrind-3.14.0/coregrind/Makefile.in
--- valgrind-3.14.0.orig/coregrind/Makefile.in 2018-11-20 17:30:03.075888111 +0100
+++ valgrind-3.14.0/coregrind/Makefile.in 2018-11-20 17:31:14.999314275 +0100
@@ -1869,6 +1869,11 @@
m_gdbserver/s390x-linux64-valgrind-s1.xml \
m_gdbserver/s390x-linux64-valgrind-s2.xml \
m_gdbserver/s390x-linux64.xml \
+ m_gdbserver/s390-vx-valgrind-s1.xml \
+ m_gdbserver/s390-vx-valgrind-s2.xml \
+ m_gdbserver/s390-vx.xml \
+ m_gdbserver/s390x-vx-linux-valgrind.xml \
+ m_gdbserver/s390x-vx-linux.xml \
m_gdbserver/mips-cp0-valgrind-s1.xml \
m_gdbserver/mips-cp0-valgrind-s2.xml \
m_gdbserver/mips-cp0.xml \

View File

@ -0,0 +1,51 @@
From d10cd86ee32bf76495f79c02df62fc242adbcbe3 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.vnet.ibm.com>
Date: Thu, 26 Jul 2018 16:35:24 +0200
Subject: [PATCH] s390x: More fixes for z13 support
This patch addresses the following:
* Fix the implementation of LOCGHI. Previously Valgrind performed 32-bit
sign extension instead of 64-bit sign extension on the immediate value.
* Advertise VXRS in HWCAP. If no VXRS are advertised, but the program
uses vector registers, this could cause problems with a glibc built with
"-march=z13".
---
VEX/priv/guest_s390_toIR.c | 2 +-
coregrind/m_initimg/initimg-linux.c | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
index 9c4d79b87..50a5a4177 100644
--- a/VEX/priv/guest_s390_toIR.c
+++ b/VEX/priv/guest_s390_toIR.c
@@ -16325,7 +16325,7 @@ static const HChar *
s390_irgen_LOCGHI(UChar r1, UChar m3, UShort i2, UChar unused)
{
next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)));
- put_gpr_dw0(r1, mkU64((UInt)(Int)(Short)i2));
+ put_gpr_dw0(r1, mkU64((ULong)(Long)(Short)i2));
return "locghi";
}
diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
index 61cc458bc..8a7f0d024 100644
--- a/coregrind/m_initimg/initimg-linux.c
+++ b/coregrind/m_initimg/initimg-linux.c
@@ -699,9 +699,9 @@ Addr setup_client_stack( void* init_sp,
}
# elif defined(VGP_s390x_linux)
{
- /* Advertise hardware features "below" TE only. TE and VXRS
- (and anything above) are not supported by Valgrind. */
- auxv->u.a_val &= VKI_HWCAP_S390_TE - 1;
+ /* Advertise hardware features "below" TE and VXRS. TE itself
+ and anything above VXRS is not supported by Valgrind. */
+ auxv->u.a_val &= (VKI_HWCAP_S390_TE - 1) | VKI_HWCAP_S390_VXRS;
}
# elif defined(VGP_arm64_linux)
{
--
2.17.0

View File

@ -0,0 +1,145 @@
commit dc1523fb3550b4ed9dd4c178741626daaa474da7
Author: Mark Wielaard <mark@klomp.org>
Date: Mon Dec 10 17:18:20 2018 +0100
PR386945 set_AV_CR6 patch
https://bugs.kde.org/show_bug.cgi?id=386945#c62
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
index ec2f90a..c3cc6d0 100644
--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
@@ -2062,45 +2062,88 @@ static void set_CR0 ( IRExpr* result )
static void set_AV_CR6 ( IRExpr* result, Bool test_all_ones )
{
/* CR6[0:3] = {all_ones, 0, all_zeros, 0}
- all_ones = (v[0] && v[1] && v[2] && v[3])
- all_zeros = ~(v[0] || v[1] || v[2] || v[3])
+ 32 bit: all_zeros = (v[0] || v[1] || v[2] || v[3]) == 0x0000'0000
+ all_ones = ~(v[0] && v[1] && v[2] && v[3]) == 0x0000'0000
+ where v[] denotes 32-bit lanes
+ or
+ 64 bit: all_zeros = (v[0] || v[1]) == 0x0000'0000'0000'0000
+ all_ones = ~(v[0] && v[1]) == 0x0000'0000'0000'0000
+ where v[] denotes 64-bit lanes
+
+ The 32- and 64-bit versions compute the same thing, but the 64-bit one
+ tries to be a bit more efficient.
*/
- IRTemp v0 = newTemp(Ity_V128);
- IRTemp v1 = newTemp(Ity_V128);
- IRTemp v2 = newTemp(Ity_V128);
- IRTemp v3 = newTemp(Ity_V128);
- IRTemp rOnes = newTemp(Ity_I8);
- IRTemp rZeros = newTemp(Ity_I8);
-
vassert(typeOfIRExpr(irsb->tyenv,result) == Ity_V128);
- assign( v0, result );
- assign( v1, binop(Iop_ShrV128, result, mkU8(32)) );
- assign( v2, binop(Iop_ShrV128, result, mkU8(64)) );
- assign( v3, binop(Iop_ShrV128, result, mkU8(96)) );
+ IRTemp overlappedOred = newTemp(Ity_V128);
+ IRTemp overlappedAnded = newTemp(Ity_V128);
+
+ if (mode64) {
+ IRTemp v0 = newTemp(Ity_V128);
+ IRTemp v1 = newTemp(Ity_V128);
+ assign( v0, result );
+ assign( v1, binop(Iop_ShrV128, result, mkU8(64)) );
+ assign(overlappedOred,
+ binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)));
+ assign(overlappedAnded,
+ binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)));
+ } else {
+ IRTemp v0 = newTemp(Ity_V128);
+ IRTemp v1 = newTemp(Ity_V128);
+ IRTemp v2 = newTemp(Ity_V128);
+ IRTemp v3 = newTemp(Ity_V128);
+ assign( v0, result );
+ assign( v1, binop(Iop_ShrV128, result, mkU8(32)) );
+ assign( v2, binop(Iop_ShrV128, result, mkU8(64)) );
+ assign( v3, binop(Iop_ShrV128, result, mkU8(96)) );
+ assign(overlappedOred,
+ binop(Iop_OrV128,
+ binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)),
+ binop(Iop_OrV128, mkexpr(v2), mkexpr(v3))));
+ assign(overlappedAnded,
+ binop(Iop_AndV128,
+ binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
+ binop(Iop_AndV128, mkexpr(v2), mkexpr(v3))));
+ }
+
+ IRTemp rOnes = newTemp(Ity_I8);
+ IRTemp rZeroes = newTemp(Ity_I8);
- assign( rZeros, unop(Iop_1Uto8,
- binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
- unop(Iop_Not32,
- unop(Iop_V128to32,
- binop(Iop_OrV128,
- binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)),
- binop(Iop_OrV128, mkexpr(v2), mkexpr(v3))))
- ))) );
+ if (mode64) {
+ assign(rZeroes,
+ unop(Iop_1Uto8,
+ binop(Iop_CmpEQ64,
+ mkU64(0),
+ unop(Iop_V128to64, mkexpr(overlappedOred)))));
+ assign(rOnes,
+ unop(Iop_1Uto8,
+ binop(Iop_CmpEQ64,
+ mkU64(0),
+ unop(Iop_Not64,
+ unop(Iop_V128to64, mkexpr(overlappedAnded))))));
+ } else {
+ assign(rZeroes,
+ unop(Iop_1Uto8,
+ binop(Iop_CmpEQ32,
+ mkU32(0),
+ unop(Iop_V128to32, mkexpr(overlappedOred)))));
+ assign(rOnes,
+ unop(Iop_1Uto8,
+ binop(Iop_CmpEQ32,
+ mkU32(0),
+ unop(Iop_Not32,
+ unop(Iop_V128to32, mkexpr(overlappedAnded))))));
+ }
+
+ // rOnes might not be used below. But iropt will remove it, so there's no
+ // inefficiency as a result.
if (test_all_ones) {
- assign( rOnes, unop(Iop_1Uto8,
- binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
- unop(Iop_V128to32,
- binop(Iop_AndV128,
- binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
- binop(Iop_AndV128, mkexpr(v2), mkexpr(v3)))
- ))) );
putCR321( 6, binop(Iop_Or8,
binop(Iop_Shl8, mkexpr(rOnes), mkU8(3)),
- binop(Iop_Shl8, mkexpr(rZeros), mkU8(1))) );
+ binop(Iop_Shl8, mkexpr(rZeroes), mkU8(1))) );
} else {
- putCR321( 6, binop(Iop_Shl8, mkexpr(rZeros), mkU8(1)) );
+ putCR321( 6, binop(Iop_Shl8, mkexpr(rZeroes), mkU8(1)) );
}
putCR0( 6, mkU8(0) );
}
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index c24db91..7f69ee3 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -8322,6 +8322,9 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure,
# elif defined(VGA_amd64)
mce.dlbo.dl_Add64 = DLauto;
mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive;
+# elif defined(VGA_ppc64le)
+ // Needed by (at least) set_AV_CR6() in the front end.
+ mce.dlbo.dl_CmpEQ64_CmpNE64 = DLexpensive;
# endif
/* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then

View File

@ -0,0 +1,244 @@
commit 0c701ba2a4b10a5f6f3fae31cb0ec6ca034d51d9
Author: Mark Wielaard <mark@klomp.org>
Date: Fri Dec 7 14:01:20 2018 +0100
Fix sigkill.stderr.exp for glibc-2.28.
glibc 2.28 filters out some bad signal numbers and returns
Invalid argument instead of passing such bad signal numbers
the kernel sigaction syscall. So we won't see such bad signal
numbers and won't print "bad signal number" ourselves.
Add a new memcheck/tests/sigkill.stderr.exp-glibc-2.28 to catch
this case.
diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
index 76e0e90..2af4dd1 100644
--- a/memcheck/tests/Makefile.am
+++ b/memcheck/tests/Makefile.am
@@ -260,7 +260,8 @@ EXTRA_DIST = \
sh-mem-random.stdout.exp sh-mem-random.vgtest \
sigaltstack.stderr.exp sigaltstack.vgtest \
sigkill.stderr.exp sigkill.stderr.exp-darwin sigkill.stderr.exp-mips32 \
- sigkill.stderr.exp-solaris sigkill.vgtest \
+ sigkill.stderr.exp-solaris \
+ sigkill.stderr.exp-glibc-2.28 sigkill.vgtest \
signal2.stderr.exp signal2.stdout.exp signal2.vgtest \
sigprocmask.stderr.exp sigprocmask.stderr.exp2 sigprocmask.vgtest \
static_malloc.stderr.exp static_malloc.vgtest \
diff --git a/memcheck/tests/sigkill.stderr.exp-glibc-2.28 b/memcheck/tests/sigkill.stderr.exp-glibc-2.28
new file mode 100644
index 0000000..0e5f0cb
--- /dev/null
+++ b/memcheck/tests/sigkill.stderr.exp-glibc-2.28
@@ -0,0 +1,197 @@
+
+setting signal 1: Success
+getting signal 1: Success
+
+setting signal 2: Success
+getting signal 2: Success
+
+setting signal 3: Success
+getting signal 3: Success
+
+setting signal 4: Success
+getting signal 4: Success
+
+setting signal 5: Success
+getting signal 5: Success
+
+setting signal 6: Success
+getting signal 6: Success
+
+setting signal 7: Success
+getting signal 7: Success
+
+setting signal 8: Success
+getting signal 8: Success
+
+setting signal 9: Warning: ignored attempt to set SIGKILL handler in sigaction();
+ the SIGKILL signal is uncatchable
+Invalid argument
+getting signal 9: Success
+
+setting signal 10: Success
+getting signal 10: Success
+
+setting signal 11: Success
+getting signal 11: Success
+
+setting signal 12: Success
+getting signal 12: Success
+
+setting signal 13: Success
+getting signal 13: Success
+
+setting signal 14: Success
+getting signal 14: Success
+
+setting signal 15: Success
+getting signal 15: Success
+
+setting signal 16: Success
+getting signal 16: Success
+
+setting signal 17: Success
+getting signal 17: Success
+
+setting signal 18: Success
+getting signal 18: Success
+
+setting signal 19: Warning: ignored attempt to set SIGSTOP handler in sigaction();
+ the SIGSTOP signal is uncatchable
+Invalid argument
+getting signal 19: Success
+
+setting signal 20: Success
+getting signal 20: Success
+
+setting signal 21: Success
+getting signal 21: Success
+
+setting signal 22: Success
+getting signal 22: Success
+
+setting signal 23: Success
+getting signal 23: Success
+
+setting signal 24: Success
+getting signal 24: Success
+
+setting signal 25: Success
+getting signal 25: Success
+
+setting signal 26: Success
+getting signal 26: Success
+
+setting signal 27: Success
+getting signal 27: Success
+
+setting signal 28: Success
+getting signal 28: Success
+
+setting signal 29: Success
+getting signal 29: Success
+
+setting signal 30: Success
+getting signal 30: Success
+
+setting signal 31: Success
+getting signal 31: Success
+
+setting signal 34: Success
+getting signal 34: Success
+
+setting signal 35: Success
+getting signal 35: Success
+
+setting signal 36: Success
+getting signal 36: Success
+
+setting signal 37: Success
+getting signal 37: Success
+
+setting signal 38: Success
+getting signal 38: Success
+
+setting signal 39: Success
+getting signal 39: Success
+
+setting signal 40: Success
+getting signal 40: Success
+
+setting signal 41: Success
+getting signal 41: Success
+
+setting signal 42: Success
+getting signal 42: Success
+
+setting signal 43: Success
+getting signal 43: Success
+
+setting signal 44: Success
+getting signal 44: Success
+
+setting signal 45: Success
+getting signal 45: Success
+
+setting signal 46: Success
+getting signal 46: Success
+
+setting signal 47: Success
+getting signal 47: Success
+
+setting signal 48: Success
+getting signal 48: Success
+
+setting signal 49: Success
+getting signal 49: Success
+
+setting signal 50: Success
+getting signal 50: Success
+
+setting signal 51: Success
+getting signal 51: Success
+
+setting signal 52: Success
+getting signal 52: Success
+
+setting signal 53: Success
+getting signal 53: Success
+
+setting signal 54: Success
+getting signal 54: Success
+
+setting signal 55: Success
+getting signal 55: Success
+
+setting signal 56: Success
+getting signal 56: Success
+
+setting signal 57: Success
+getting signal 57: Success
+
+setting signal 58: Success
+getting signal 58: Success
+
+setting signal 59: Success
+getting signal 59: Success
+
+setting signal 60: Success
+getting signal 60: Success
+
+setting signal 61: Success
+getting signal 61: Success
+
+setting signal 62: Success
+getting signal 62: Success
+
+setting signal 65: Invalid argument
+getting signal 65: Invalid argument
+
+
+HEAP SUMMARY:
+ in use at exit: ... bytes in ... blocks
+ total heap usage: ... allocs, ... frees, ... bytes allocated
+
+For a detailed leak analysis, rerun with: --leak-check=full
+
+For counts of detected and suppressed errors, rerun with: -v
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
diff -ur valgrind-3.14.0.orig/memcheck/tests/Makefile.in valgrind-3.14.0/memcheck/tests/Makefile.in
--- valgrind-3.14.0.orig/memcheck/tests/Makefile.in 2018-12-13 00:30:45.013839247 +0100
+++ valgrind-3.14.0/memcheck/tests/Makefile.in 2018-12-13 00:30:54.242636002 +0100
@@ -1573,7 +1573,8 @@
sh-mem-random.stdout.exp sh-mem-random.vgtest \
sigaltstack.stderr.exp sigaltstack.vgtest \
sigkill.stderr.exp sigkill.stderr.exp-darwin sigkill.stderr.exp-mips32 \
- sigkill.stderr.exp-solaris sigkill.vgtest \
+ sigkill.stderr.exp-solaris \
+ sigkill.stderr.exp-glibc-2.28 sigkill.vgtest \
signal2.stderr.exp signal2.stdout.exp signal2.vgtest \
sigprocmask.stderr.exp sigprocmask.stderr.exp2 sigprocmask.vgtest \
static_malloc.stderr.exp static_malloc.vgtest \

View File

@ -0,0 +1,82 @@
commit cb5d7e047598bff6d0f1d707a70d9fb1a1c7f0e2
Author: Julian Seward <jseward@acm.org>
Date: Tue Nov 20 11:46:55 2018 +0100
VEX/priv/ir_opt.c
fold_Expr: transform PopCount64(And64(Add64(x,-1),Not64(x))) into CtzNat64(x).
This is part of the fix for bug 386945.
diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c
index f40870b..23964be 100644
--- a/VEX/priv/ir_opt.c
+++ b/VEX/priv/ir_opt.c
@@ -1377,6 +1377,8 @@ static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
case Iex_Unop:
/* UNARY ops */
if (e->Iex.Unop.arg->tag == Iex_Const) {
+
+ /* cases where the arg is a const */
switch (e->Iex.Unop.op) {
case Iop_1Uto8:
e2 = IRExpr_Const(IRConst_U8(toUChar(
@@ -1690,8 +1692,56 @@ static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
default:
goto unhandled;
- }
- }
+ } // switch (e->Iex.Unop.op)
+
+ } else {
+
+ /* other cases (identities, etc) */
+ switch (e->Iex.Unop.op) {
+ case Iop_PopCount64: {
+ // PopCount64( And64( Add64(x,-1), Not64(x) ) ) ==> CtzNat64(x)
+ // bindings:
+ // a1:And64( a11:Add64(a111:x,a112:-1), a12:Not64(a121:x) )
+ IRExpr* a1 = chase(env, e->Iex.Unop.arg);
+ if (!a1)
+ goto nomatch;
+ if (a1->tag != Iex_Binop || a1->Iex.Binop.op != Iop_And64)
+ goto nomatch;
+ // a1 is established
+ IRExpr* a11 = chase(env, a1->Iex.Binop.arg1);
+ if (!a11)
+ goto nomatch;
+ if (a11->tag != Iex_Binop || a11->Iex.Binop.op != Iop_Add64)
+ goto nomatch;
+ // a11 is established
+ IRExpr* a12 = chase(env, a1->Iex.Binop.arg2);
+ if (!a12)
+ goto nomatch;
+ if (a12->tag != Iex_Unop || a12->Iex.Unop.op != Iop_Not64)
+ goto nomatch;
+ // a12 is established
+ IRExpr* a111 = a11->Iex.Binop.arg1;
+ IRExpr* a112 = chase(env, a11->Iex.Binop.arg2);
+ IRExpr* a121 = a12->Iex.Unop.arg;
+ if (!a111 || !a112 || !a121)
+ goto nomatch;
+ // a111 and a121 need to be the same temp.
+ if (!eqIRAtom(a111, a121))
+ goto nomatch;
+ // Finally, a112 must be a 64-bit version of -1.
+ if (!isOnesU(a112))
+ goto nomatch;
+ // Match established. Transform.
+ e2 = IRExpr_Unop(Iop_CtzNat64, a111);
+ break;
+ nomatch:
+ break;
+ }
+ default:
+ break;
+ } // switch (e->Iex.Unop.op)
+
+ } // if (e->Iex.Unop.arg->tag == Iex_Const)
break;
case Iex_Binop:

View File

@ -0,0 +1,98 @@
commit 262275da43425ba2b8c240e47063e36b39167996
Author: Mark Wielaard <mark@klomp.org>
Date: Wed Dec 12 13:55:01 2018 +0100
Fix memcheck/tests/undef_malloc_args testcase.
diff --git a/coregrind/m_replacemalloc/vg_replace_malloc.c b/coregrind/m_replacemalloc/vg_replace_malloc.c
index 28bdb4a..564829a 100644
--- a/coregrind/m_replacemalloc/vg_replace_malloc.c
+++ b/coregrind/m_replacemalloc/vg_replace_malloc.c
@@ -216,9 +216,19 @@ static void init(void);
Apart of allowing memcheck to detect an error, the macro
TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED has no effect and
has a minimal cost for other tools replacing malloc functions.
+
+ Creating an "artificial" use of _x that works reliably is not entirely
+ straightforward. Simply comparing it against zero often produces no
+ warning if _x contains at least one nonzero bit is defined, because
+ Memcheck knows that the result of the comparison will be defined (cf
+ expensiveCmpEQorNE).
+
+ Really we want to PCast _x, so as to create a value which is entirely
+ undefined if any bit of _x is undefined. But there's no portable way to do
+ that.
*/
-#define TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED(x) \
- if ((ULong)x == 0) __asm__ __volatile__( "" ::: "memory" )
+#define TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED(_x) \
+ if ((UWord)(_x) == 0) __asm__ __volatile__( "" ::: "memory" )
/*---------------------- malloc ----------------------*/
@@ -504,7 +514,7 @@ static void init(void);
void VG_REPLACE_FUNCTION_EZU(10040,soname,fnname) (void *zone, void *p) \
{ \
DO_INIT; \
- TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED((UWord) zone); \
+ TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED((UWord)zone ^ (UWord)p); \
MALLOC_TRACE(#fnname "(%p, %p)\n", zone, p ); \
if (p == NULL) \
return; \
diff --git a/memcheck/tests/undef_malloc_args.c b/memcheck/tests/undef_malloc_args.c
index 99e2799..654d70d 100644
--- a/memcheck/tests/undef_malloc_args.c
+++ b/memcheck/tests/undef_malloc_args.c
@@ -11,29 +11,29 @@ int main (int argc, char*argv[])
{
size_t size = def_size;
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, 1);
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, sizeof(size));
p = malloc(size);
}
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&p, 1);
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&p, sizeof(p));
new_p = realloc(p, def_size);
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&new_p, 1);
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&new_p, sizeof(new_p));
new_p = realloc(new_p, def_size);
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&new_p, 1);
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&new_p, sizeof(new_p));
free (new_p);
{
size_t nmemb = 1;
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&nmemb, 1);
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&nmemb, sizeof(nmemb));
new_p = calloc(nmemb, def_size);
free (new_p);
}
#if 0
{
size_t alignment = 1;
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&alignment, 1);
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&alignment, sizeof(alignment));
new_p = memalign(alignment, def_size);
free(new_p);
}
@@ -41,14 +41,14 @@ int main (int argc, char*argv[])
{
size_t nmemb = 16;
size_t size = def_size;
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, 1);
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, sizeof(size));
new_p = memalign(nmemb, size);
free(new_p);
}
{
size_t size = def_size;
- (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, 1);
+ (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, sizeof(size));
new_p = valloc(size);
free (new_p);
}

View File

@ -0,0 +1,89 @@
commit 5fdabb72fdcba6bcf788eaa19c1ee557c13b8a7a
Author: Mark Wielaard <mark@klomp.org>
Date: Sat Dec 1 23:54:40 2018 +0100
Bug 401627 - Add wcsncmp override and testcase.
glibc 2.28 added an avx2 optimized variant of wstrncmp which memcheck
cannot proof correct. Add a simple override in vg_replace_strmem.c.
diff --git a/memcheck/tests/wcs.c b/memcheck/tests/wcs.c
index 15730ad..538304b 100644
--- a/memcheck/tests/wcs.c
+++ b/memcheck/tests/wcs.c
@@ -1,5 +1,6 @@
-// Uses various wchar_t * functions that have hand written SSE assembly
-// implementations in glibc. wcslen, wcscpy, wcscmp, wcsrchr, wcschr.
+// Uses various wchar_t * functions that have hand written SSE and/or AVX2
+// assembly implementations in glibc.
+// wcslen, wcscpy, wcscmp, wcsncmp, wcsrchr, wcschr.
#include <stdio.h>
#include <stdlib.h>
@@ -18,6 +19,8 @@ int main(int argc, char **argv)
c = wcscpy (b, a);
fprintf (stderr, "wcscmp equal: %d\n", wcscmp (a, b)); // wcscmp equal: 0
+ fprintf (stderr,
+ "wcsncmp equal: %d\n", wcsncmp (a, b, l)); // wcsncmp equal: 0
d = wcsrchr (a, L'd');
e = wcschr (a, L'd');
diff --git a/memcheck/tests/wcs.stderr.exp b/memcheck/tests/wcs.stderr.exp
index 41d74c8..d5b5959 100644
--- a/memcheck/tests/wcs.stderr.exp
+++ b/memcheck/tests/wcs.stderr.exp
@@ -1,3 +1,4 @@
wcslen: 53
wcscmp equal: 0
+wcsncmp equal: 0
wcsrchr == wcschr: 1
diff --git a/shared/vg_replace_strmem.c b/shared/vg_replace_strmem.c
index d6927f0..89a7dcc 100644
--- a/shared/vg_replace_strmem.c
+++ b/shared/vg_replace_strmem.c
@@ -103,6 +103,7 @@
20420 STPNCPY
20430 WMEMCHR
20440 WCSNLEN
+ 20450 WSTRNCMP
*/
#if defined(VGO_solaris)
@@ -1927,6 +1928,36 @@ static inline void my_exit ( int x )
WCSCMP(VG_Z_LIBC_SONAME, wcscmp)
#endif
+/*---------------------- wcsncmp ----------------------*/
+
+// This is a wchar_t equivalent to strncmp. We don't
+// have wchar_t available here, but in the GNU C Library
+// wchar_t is always 32 bits wide and wcsncmp uses signed
+// comparison, not unsigned as in strncmp function.
+
+#define WCSNCMP(soname, fnname) \
+ int VG_REPLACE_FUNCTION_EZU(20450,soname,fnname) \
+ ( const Int* s1, const Int* s2, SizeT nmax ); \
+ int VG_REPLACE_FUNCTION_EZU(20450,soname,fnname) \
+ ( const Int* s1, const Int* s2, SizeT nmax ) \
+ { \
+ SizeT n = 0; \
+ while (True) { \
+ if (n >= nmax) return 0; \
+ if (*s1 == 0 && *s2 == 0) return 0; \
+ if (*s1 == 0) return -1; \
+ if (*s2 == 0) return 1; \
+ \
+ if (*s1 < *s2) return -1; \
+ if (*s1 > *s2) return 1; \
+ \
+ s1++; s2++; n++; \
+ } \
+ }
+#if defined(VGO_linux)
+ WCSNCMP(VG_Z_LIBC_SONAME, wcsncmp)
+#endif
+
/*---------------------- wcscpy ----------------------*/
// This is a wchar_t equivalent to strcpy. We don't

View File

@ -0,0 +1,54 @@
--- valgrind-3.8.1/cachegrind/cg_sim.c.jj 2011-10-26 23:24:32.000000000 +0200
+++ valgrind-3.8.1/cachegrind/cg_sim.c 2011-12-09 17:31:19.256023683 +0100
@@ -42,27 +42,30 @@ typedef struct {
Int size; /* bytes */
Int assoc;
Int line_size; /* bytes */
- Int sets;
Int sets_min_1;
Int line_size_bits;
Int tag_shift;
- HChar desc_line[128]; /* large enough */
UWord* tags;
-} cache_t2;
+ HChar desc_line[128];
+} cache_t2
+#ifdef __GNUC__
+__attribute__ ((aligned (8 * sizeof (Int))))
+#endif
+;
/* By this point, the size/assoc/line_size has been checked. */
static void cachesim_initcache(cache_t config, cache_t2* c)
{
- Int i;
+ Int sets;
c->size = config.size;
c->assoc = config.assoc;
c->line_size = config.line_size;
- c->sets = (c->size / c->line_size) / c->assoc;
- c->sets_min_1 = c->sets - 1;
+ sets = (c->size / c->line_size) / c->assoc;
+ c->sets_min_1 = sets - 1;
c->line_size_bits = VG_(log2)(c->line_size);
- c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
+ c->tag_shift = c->line_size_bits + VG_(log2)(sets);
if (c->assoc == 1) {
VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
@@ -72,11 +75,8 @@ static void cachesim_initcache(cache_t c
c->size, c->line_size, c->assoc);
}
- c->tags = VG_(malloc)("cg.sim.ci.1",
- sizeof(UWord) * c->sets * c->assoc);
-
- for (i = 0; i < c->sets * c->assoc; i++)
- c->tags[i] = 0;
+ c->tags = VG_(calloc)("cg.sim.ci.1",
+ sizeof(UWord), sets * c->assoc);
}
/* This attribute forces GCC to inline the function, getting rid of a

View File

@ -0,0 +1,15 @@
--- valgrind/glibc-2.34567-NPTL-helgrind.supp.jj 2009-08-19 15:37:48.000000000 +0200
+++ valgrind/glibc-2.34567-NPTL-helgrind.supp 2009-10-21 16:46:31.000000000 +0200
@@ -88,6 +88,12 @@
obj:*/lib*/libpthread-2.*so*
}
{
+ helgrind-glibc2X-102a
+ Helgrind:Race
+ fun:mythread_wrapper
+ obj:*vgpreload_helgrind*.so
+}
+{
helgrind-glibc2X-103
Helgrind:Race
fun:pthread_cond_*@@GLIBC_2.*

View File

@ -0,0 +1,28 @@
--- valgrind/glibc-2.X.supp.in.jj 2011-10-26 23:24:45.000000000 +0200
+++ valgrind/glibc-2.X.supp.in 2012-05-07 10:55:20.395942656 +0200
@@ -124,7 +124,7 @@
glibc-2.5.x-on-SUSE-10.2-(PPC)-2a
Memcheck:Cond
fun:index
- obj:*ld-@GLIBC_VERSION@.*.so
+ obj:*ld-@GLIBC_VERSION@*.so
}
{
glibc-2.5.x-on-SuSE-10.2-(PPC)-2b
@@ -136,14 +136,14 @@
glibc-2.5.5-on-SuSE-10.2-(PPC)-2c
Memcheck:Addr4
fun:index
- obj:*ld-@GLIBC_VERSION@.*.so
+ obj:*ld-@GLIBC_VERSION@*.so
}
{
glibc-2.3.5-on-SuSE-10.1-(PPC)-3
Memcheck:Addr4
fun:*wordcopy_fwd_dest_aligned*
fun:mem*cpy
- obj:*lib*@GLIBC_VERSION@.*.so
+ obj:*lib*@GLIBC_VERSION@*.so
}
{

1844
SPECS/valgrind.spec Normal file

File diff suppressed because it is too large Load Diff