import valgrind-3.14.0-10.el8_0

2019-06-18 12:42:06 -04:00 · 2019-06-18 12:42:06 -04:00 · 6ee00ced0a
commit 6ee00ced0a
35 changed files with 10126 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+SOURCES/valgrind-3.14.0.tar.bz2
--- a/.valgrind.metadata
+++ b/.valgrind.metadata
@ -0,0 +1 @@
+182afd405b92ddb6f52c6729e848eacf4b1daf46 SOURCES/valgrind-3.14.0.tar.bz2
--- a/SOURCES/valgrind-3.14.0-arm64-ptrace-traceme.patch
+++ b/SOURCES/valgrind-3.14.0-arm64-ptrace-traceme.patch
@ -0,0 +1,24 @@
+commit 43fe4bc236d667257eeebfb4f6bcbe2b92aea455
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Fri Dec 14 14:32:27 2018 +0100
+
+    arm64: Fix PTRACE_TRACEME memcheck/tests/linux/getregset.vgtest testcase.
+    
+    The sys_ptrace post didn't mark the thread as being in traceme mode.
+    This occassionally would make the memcheck/tests/linux/getregset.vgtest
+    testcase fail. With this patch it reliably passes.
+
+diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c
+index 9ef54b4..650f5b9 100644
+--- a/coregrind/m_syswrap/syswrap-arm64-linux.c
+++ b/coregrind/m_syswrap/syswrap-arm64-linux.c
+@@ -499,6 +499,9 @@ PRE(sys_ptrace)
+ POST(sys_ptrace)
+ {
+    switch (ARG1) {
+   case VKI_PTRACE_TRACEME:
+         ML_(linux_POST_traceme)(tid);
+         break;
+    case VKI_PTRACE_PEEKTEXT:
+    case VKI_PTRACE_PEEKDATA:
+    case VKI_PTRACE_PEEKUSR:
--- a/SOURCES/valgrind-3.14.0-enable-ppc-Iop_Sar_Shr8.patch
+++ b/SOURCES/valgrind-3.14.0-enable-ppc-Iop_Sar_Shr8.patch
@ -0,0 +1,18 @@
+commit 27fe22378da38424102c5292b782cacdd9d7b9e4
+Author: Julian Seward <jseward@acm.org>
+Date:   Tue Nov 20 12:09:03 2018 +0100
+
+    Add support for Iop_{Sar,Shr}8 on ppc.  --expensive-definedness-checks=yes needs them.
+
+diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
+index 5242176..750cf8d 100644
+--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
+@@ -1528,7 +1528,6 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
+                                            True/*32bit shift*/,
+                                            tmp, tmp,    amt));
+                r_srcL = tmp;
+-               vassert(0); /* AWAITING TEST CASE */
+             }
+          }
+          /* Only 64 expressions need 64bit shifts,
--- a/SOURCES/valgrind-3.14.0-final_tidyup.patch
+++ b/SOURCES/valgrind-3.14.0-final_tidyup.patch
@ -0,0 +1,59 @@
+commit be7a73004583aab5d4c97cf55276ca58d5b3090b
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Wed Dec 12 14:15:28 2018 +0100
+
+    Mark helper regs defined in final_tidyup before freeres_wrapper call.
+    
+    In final_tidyup we setup the guest to call the freeres_wrapper, which
+    will (possibly) call __gnu_cxx::__freeres() and/or __libc_freeres().
+    
+    In a couple of cases (ppc64be, ppc64le and mips32) this involves setting
+    up one or more helper registers. Since we setup these guest registers
+    we should make sure to mark them as fully defined. Otherwise we might
+    see spurious warnings about undefined value usage if the guest register
+    happened to not be fully defined before.
+    
+    This fixes PR402006.
+
+diff --git a/coregrind/m_main.c b/coregrind/m_main.c
+index 00702fc..22872a2 100644
+--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
+@@ -2304,22 +2304,35 @@ static void final_tidyup(ThreadId tid)
+                    "Caught __NR_exit; running %s wrapper\n", msgs[to_run - 1]);
+    }
+       
+-   /* set thread context to point to freeres_wrapper */
+-   /* ppc64be-linux note: freeres_wrapper gives us the real
+   /* Set thread context to point to freeres_wrapper.
+      ppc64be-linux note: freeres_wrapper gives us the real
+       function entry point, not a fn descriptor, so can use it
+       directly.  However, we need to set R2 (the toc pointer)
+       appropriately. */
+    VG_(set_IP)(tid, freeres_wrapper);
+
+ #  if defined(VGP_ppc64be_linux)
+    VG_(threads)[tid].arch.vex.guest_GPR2 = r2;
+   VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
+            offsetof(VexGuestPPC64State, guest_GPR2),
+            sizeof(VG_(threads)[tid].arch.vex.guest_GPR2));
+ #  elif  defined(VGP_ppc64le_linux)
+    /* setting GPR2 but not really needed, GPR12 is needed */
+    VG_(threads)[tid].arch.vex.guest_GPR2  = freeres_wrapper;
+   VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
+            offsetof(VexGuestPPC64State, guest_GPR2),
+            sizeof(VG_(threads)[tid].arch.vex.guest_GPR2));
+    VG_(threads)[tid].arch.vex.guest_GPR12 = freeres_wrapper;
+   VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
+            offsetof(VexGuestPPC64State, guest_GPR12),
+            sizeof(VG_(threads)[tid].arch.vex.guest_GPR12));
+ #  endif
+    /* mips-linux note: we need to set t9 */
+ #  if defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
+    VG_(threads)[tid].arch.vex.guest_r25 = freeres_wrapper;
+   VG_TRACK(post_reg_write, Vg_CoreClientReq, tid,
+            offsetof(VexGuestMIPS32State, guest_r25),
+            sizeof(VG_(threads)[tid].arch.vex.guest_r25));
+ #  endif
+ 
+    /* Pass a parameter to freeres_wrapper(). */
--- a/SOURCES/valgrind-3.14.0-get_otrack_shadow_offset_wrk-ppc.patch
+++ b/SOURCES/valgrind-3.14.0-get_otrack_shadow_offset_wrk-ppc.patch
@ -0,0 +1,81 @@
+commit 7f1dd9d5aec1f1fd4eb0ae3a311358a914f1d73f
+Author: Julian Seward <jseward@acm.org>
+Date:   Tue Nov 20 10:18:29 2018 +0100
+
+    get_otrack_shadow_offset_wrk for ppc32 and ppc64: add missing cases for XER_OV32, XER_CA32 and C_FPCC.
+    
+    The missing cases were discovered whilst testing fixes for bug 386945, but are
+    otherwise unrelated to that bug.
+
+diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c
+index 5ed101f..4ce746e 100644
+--- a/memcheck/mc_machine.c
+++ b/memcheck/mc_machine.c
+@@ -120,11 +120,11 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
+    Int  o    = offset;
+    tl_assert(sz > 0);
+ 
+-#if defined(VGA_ppc64be)
+#  if defined(VGA_ppc64be)
+    tl_assert(host_is_big_endian());
+-#elif defined(VGA_ppc64le)
+#  elif defined(VGA_ppc64le)
+    tl_assert(host_is_little_endian());
+-#endif
+#  endif
+ 
+    if (sz == 8 || sz == 4) {
+       /* The point of this is to achieve
+@@ -132,11 +132,11 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
+             return GOF(GPRn);
+          by testing ox instead of o, and setting ox back 4 bytes when sz == 4.
+       */
+-#if defined(VGA_ppc64le)
+#     if defined(VGA_ppc64le)
+       Int ox = o;
+-#else
+#     else
+       Int ox = sz == 8 ? o : (o - 4);
+-#endif
+#     endif
+       if (ox == GOF(GPR0)) return ox;
+       if (ox == GOF(GPR1)) return ox;
+       if (ox == GOF(GPR2)) return ox;
+@@ -240,11 +240,13 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
+    if (o == GOF(VSR31) && sz == 8) return o;
+ 
+    /* For the various byte sized XER/CR pieces, use offset 8
+-      in VSR0 .. VSR19. */
+      in VSR0 .. VSR21. */
+    tl_assert(SZB(VSR0) == 16);
+    if (o == GOF(XER_SO) && sz == 1) return 8 +GOF(VSR0);
+    if (o == GOF(XER_OV) && sz == 1) return 8 +GOF(VSR1);
+   if (o == GOF(XER_OV32) && sz == 1) return 8 +GOF(VSR20);
+    if (o == GOF(XER_CA) && sz == 1) return 8 +GOF(VSR2);
+   if (o == GOF(XER_CA32) && sz == 1) return 8 +GOF(VSR21);
+    if (o == GOF(XER_BC) && sz == 1) return 8 +GOF(VSR3);
+ 
+    if (o == GOF(CR0_321) && sz == 1) return 8 +GOF(VSR4);
+@@ -388,6 +390,7 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
+    if (o == GOF(IP_AT_SYSCALL) && sz == 4) return -1; /* slot unused */
+    if (o == GOF(FPROUND)   && sz == 1) return -1;
+    if (o == GOF(DFPROUND)  && sz == 1) return -1;
+   if (o == GOF(C_FPCC)    && sz == 1) return -1;
+    if (o == GOF(VRSAVE)    && sz == 4) return -1;
+    if (o == GOF(EMNOTE)    && sz == 4) return -1;
+    if (o == GOF(CMSTART)   && sz == 4) return -1;
+@@ -440,11 +443,13 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
+    if (o == GOF(VSR31) && sz == 8) return o;
+ 
+    /* For the various byte sized XER/CR pieces, use offset 8
+-      in VSR0 .. VSR19. */
+      in VSR0 .. VSR21. */
+    tl_assert(SZB(VSR0) == 16);
+    if (o == GOF(XER_SO) && sz == 1) return 8 +GOF(VSR0);
+    if (o == GOF(XER_OV) && sz == 1) return 8 +GOF(VSR1);
+   if (o == GOF(XER_OV32) && sz == 1) return 8 +GOF(VSR20);
+    if (o == GOF(XER_CA) && sz == 1) return 8 +GOF(VSR2);
+   if (o == GOF(XER_CA32) && sz == 1) return 8 +GOF(VSR21);
+    if (o == GOF(XER_BC) && sz == 1) return 8 +GOF(VSR3);
+ 
+    if (o == GOF(CR0_321) && sz == 1) return 8 +GOF(VSR4);
--- a/SOURCES/valgrind-3.14.0-jm-vmx-constraints.patch
+++ b/SOURCES/valgrind-3.14.0-jm-vmx-constraints.patch
@ -0,0 +1,654 @@
+commit a0d97e88ec6d71239d30a5a4b2b129e094150873
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Thu Dec 6 20:52:22 2018 +0100
+
+    Bug 401822 Fix asm constraints for ppc64 jm-vmx jm-insns.c test.
+    
+    The mfvscr and vor instructions in jm-insns.c had a "=vr" constraint.
+    This should have been an "=v" constraint. This resolved assembler
+    warnings and the testcase failing on ppc64le with gcc 8.2 and
+    binutils 2.30.
+
+diff --git a/none/tests/ppc32/jm-insns.c b/none/tests/ppc32/jm-insns.c
+index e1a7da9..be02425 100644
+--- a/none/tests/ppc32/jm-insns.c
+++ b/none/tests/ppc32/jm-insns.c
+@@ -6269,7 +6269,7 @@ static void test_av_int_one_arg (const char* name, test_func_t func,
+    for (i=0; i<nb_viargs; i++) {
+       /* Save flags */
+       __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+       vec_in  = (vector unsigned int)viargs[i];
+       vec_out = (vector unsigned int){ 0,0,0,0 };
+@@ -6287,11 +6287,11 @@ static void test_av_int_one_arg (const char* name, test_func_t func,
+       (*func)();
+       
+       // retrieve output <- r17
+-      __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+      __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+ 
+       // get CR,VSCR flags
+       __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+       
+       /* Restore flags */
+       __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6333,7 +6333,7 @@ static void test_av_int_two_args (const char* name, test_func_t func,
+          
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6349,11 +6349,11 @@ static void test_av_int_two_args (const char* name, test_func_t func,
+          (*func)();
+ 
+          // retrieve output <- r17
+-         __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+         __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+          
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+          
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6401,7 +6401,7 @@ static void test_av_int_three_args (const char* name, test_func_t func,
+             
+             /* Save flags */
+             __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-            __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+            __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+             // reset VSCR and CR
+             vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6418,11 +6418,11 @@ static void test_av_int_three_args (const char* name, test_func_t func,
+             (*func)();
+             
+             // retrieve output <- r17
+-            __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+            __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+             
+             // get CR,VSCR flags
+             __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-            __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+            __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+             
+             /* Restore flags */
+             __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6475,7 +6475,7 @@ static void vs128_cb (const char* name, test_func_t func,
+          
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6491,11 +6491,11 @@ static void vs128_cb (const char* name, test_func_t func,
+          (*func)();
+          
+          // retrieve output <- r17
+-         __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+         __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+          
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+          
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6548,7 +6548,7 @@ static void vsplt_cb (const char* name, test_func_t func_IN,
+ 
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6563,11 +6563,11 @@ static void vsplt_cb (const char* name, test_func_t func_IN,
+          (*func)();
+          
+          // retrieve output <- r17
+-         __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+         __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+          
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+          
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6615,7 +6615,7 @@ static void vspltis_cb (const char* name, test_func_t func_IN,
+       
+       /* Save flags */
+       __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+       
+       // reset VSCR and CR
+       vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6627,11 +6627,11 @@ static void vspltis_cb (const char* name, test_func_t func_IN,
+       (*func)();
+       
+       // retrieve output <- r17
+-      __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+      __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+       
+       // get CR,VSCR flags
+       __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+       
+       /* Restore flags */
+       __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6677,7 +6677,7 @@ static void vsldoi_cb (const char* name, test_func_t func_IN,
+             
+             /* Save flags */
+             __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-            __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+            __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+             
+             // reset VSCR and CR
+             vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6693,11 +6693,11 @@ static void vsldoi_cb (const char* name, test_func_t func_IN,
+             (*func)();
+          
+             // retrieve output <- r17
+-            __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+            __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+             
+             // get CR,VSCR flags
+             __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-            __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+            __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+             
+             /* Restore flags */
+             __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6750,7 +6750,7 @@ static void lvs_cb (const char *name, test_func_t func,
+ 
+       /* Save flags */
+       __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+       
+       // reset VSCR and CR
+       vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6762,11 +6762,11 @@ static void lvs_cb (const char *name, test_func_t func,
+       (*func)();
+       
+       // retrieve output <- r17
+-      __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+      __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+          
+       // get CR,VSCR flags
+       __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+       
+       /* Restore flags */
+       __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6870,7 +6870,7 @@ static void test_av_int_ld_two_regs (const char *name,
+          
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+          
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6882,11 +6882,11 @@ static void test_av_int_ld_two_regs (const char *name,
+          (*func)();
+          
+          // retrieve output <- r17
+-         __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+         __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+          
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+          
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6956,7 +6956,7 @@ static void test_av_int_st_three_regs (const char *name,
+ 
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+          
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6974,7 +6974,7 @@ static void test_av_int_st_three_regs (const char *name,
+          
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+          
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -7037,7 +7037,7 @@ static void test_av_float_one_arg (const char* name, test_func_t func,
+       
+       /* Save flags */
+       __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+       // reset VSCR and CR
+       vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -7052,11 +7052,11 @@ static void test_av_float_one_arg (const char* name, test_func_t func,
+       (*func)();
+       
+       // retrieve output <- r17
+-      __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+      __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+       
+       // get CR,VSCR flags
+       __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+       
+       /* Restore flags */
+       __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -7099,7 +7099,7 @@ static void test_av_float_two_args (const char* name, test_func_t func,
+ 
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -7115,11 +7115,11 @@ static void test_av_float_two_args (const char* name, test_func_t func,
+          (*func)();
+ 
+          // retrieve output <- r17
+-         __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+         __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+ 
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+ 
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -7168,7 +7168,7 @@ static void test_av_float_three_args (const char* name, test_func_t func,
+             
+             /* Save flags */
+             __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-            __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+            __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+             // reset VSCR and CR
+             vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -7185,11 +7185,11 @@ static void test_av_float_three_args (const char* name, test_func_t func,
+             (*func)();
+ 
+             // retrieve output <- r17
+-            __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+            __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+ 
+             // get CR,VSCR flags
+             __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-            __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+            __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+ 
+             /* Restore flags */
+             __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -7276,7 +7276,7 @@ static void vcvt_cb (const char* name, test_func_t func_IN,
+          
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -7291,11 +7291,11 @@ static void vcvt_cb (const char* name, test_func_t func_IN,
+          (*func)();
+          
+          // retrieve output <- r17
+-         __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+         __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+          
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+          
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+
+diff --git a/none/tests/ppc64/jm-insns.c b/none/tests/ppc64/jm-insns.c
+index e1a7da9..be02425 100644
+--- a/none/tests/ppc64/jm-insns.c
+++ b/none/tests/ppc64/jm-insns.c
+@@ -6269,7 +6269,7 @@ static void test_av_int_one_arg (const char* name, test_func_t func,
+    for (i=0; i<nb_viargs; i++) {
+       /* Save flags */
+       __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+       vec_in  = (vector unsigned int)viargs[i];
+       vec_out = (vector unsigned int){ 0,0,0,0 };
+@@ -6287,11 +6287,11 @@ static void test_av_int_one_arg (const char* name, test_func_t func,
+       (*func)();
+       
+       // retrieve output <- r17
+-      __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+      __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+ 
+       // get CR,VSCR flags
+       __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+       
+       /* Restore flags */
+       __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6333,7 +6333,7 @@ static void test_av_int_two_args (const char* name, test_func_t func,
+          
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6349,11 +6349,11 @@ static void test_av_int_two_args (const char* name, test_func_t func,
+          (*func)();
+ 
+          // retrieve output <- r17
+-         __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+         __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+          
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+          
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6401,7 +6401,7 @@ static void test_av_int_three_args (const char* name, test_func_t func,
+             
+             /* Save flags */
+             __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-            __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+            __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+             // reset VSCR and CR
+             vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6418,11 +6418,11 @@ static void test_av_int_three_args (const char* name, test_func_t func,
+             (*func)();
+             
+             // retrieve output <- r17
+-            __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+            __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+             
+             // get CR,VSCR flags
+             __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-            __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+            __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+             
+             /* Restore flags */
+             __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6475,7 +6475,7 @@ static void vs128_cb (const char* name, test_func_t func,
+          
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6491,11 +6491,11 @@ static void vs128_cb (const char* name, test_func_t func,
+          (*func)();
+          
+          // retrieve output <- r17
+-         __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+         __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+          
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+          
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6548,7 +6548,7 @@ static void vsplt_cb (const char* name, test_func_t func_IN,
+ 
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6563,11 +6563,11 @@ static void vsplt_cb (const char* name, test_func_t func_IN,
+          (*func)();
+          
+          // retrieve output <- r17
+-         __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+         __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+          
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+          
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6615,7 +6615,7 @@ static void vspltis_cb (const char* name, test_func_t func_IN,
+       
+       /* Save flags */
+       __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+       
+       // reset VSCR and CR
+       vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6627,11 +6627,11 @@ static void vspltis_cb (const char* name, test_func_t func_IN,
+       (*func)();
+       
+       // retrieve output <- r17
+-      __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+      __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+       
+       // get CR,VSCR flags
+       __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+       
+       /* Restore flags */
+       __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6677,7 +6677,7 @@ static void vsldoi_cb (const char* name, test_func_t func_IN,
+             
+             /* Save flags */
+             __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-            __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+            __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+             
+             // reset VSCR and CR
+             vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6693,11 +6693,11 @@ static void vsldoi_cb (const char* name, test_func_t func_IN,
+             (*func)();
+          
+             // retrieve output <- r17
+-            __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+            __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+             
+             // get CR,VSCR flags
+             __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-            __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+            __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+             
+             /* Restore flags */
+             __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6750,7 +6750,7 @@ static void lvs_cb (const char *name, test_func_t func,
+ 
+       /* Save flags */
+       __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+       
+       // reset VSCR and CR
+       vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6762,11 +6762,11 @@ static void lvs_cb (const char *name, test_func_t func,
+       (*func)();
+       
+       // retrieve output <- r17
+-      __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+      __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+          
+       // get CR,VSCR flags
+       __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+       
+       /* Restore flags */
+       __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6870,7 +6870,7 @@ static void test_av_int_ld_two_regs (const char *name,
+          
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+          
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6882,11 +6882,11 @@ static void test_av_int_ld_two_regs (const char *name,
+          (*func)();
+          
+          // retrieve output <- r17
+-         __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+         __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+          
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+          
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -6956,7 +6956,7 @@ static void test_av_int_st_three_regs (const char *name,
+ 
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+          
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -6974,7 +6974,7 @@ static void test_av_int_st_three_regs (const char *name,
+          
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+          
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -7037,7 +7037,7 @@ static void test_av_float_one_arg (const char* name, test_func_t func,
+       
+       /* Save flags */
+       __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+       // reset VSCR and CR
+       vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -7052,11 +7052,11 @@ static void test_av_float_one_arg (const char* name, test_func_t func,
+       (*func)();
+       
+       // retrieve output <- r17
+-      __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+      __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+       
+       // get CR,VSCR flags
+       __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-      __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+      __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+       
+       /* Restore flags */
+       __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -7099,7 +7099,7 @@ static void test_av_float_two_args (const char* name, test_func_t func,
+ 
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -7115,11 +7115,11 @@ static void test_av_float_two_args (const char* name, test_func_t func,
+          (*func)();
+ 
+          // retrieve output <- r17
+-         __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+         __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+ 
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+ 
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -7168,7 +7168,7 @@ static void test_av_float_three_args (const char* name, test_func_t func,
+             
+             /* Save flags */
+             __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-            __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+            __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+             // reset VSCR and CR
+             vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -7185,11 +7185,11 @@ static void test_av_float_three_args (const char* name, test_func_t func,
+             (*func)();
+ 
+             // retrieve output <- r17
+-            __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+            __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+ 
+             // get CR,VSCR flags
+             __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-            __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+            __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+ 
+             /* Restore flags */
+             __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
+@@ -7276,7 +7276,7 @@ static void vcvt_cb (const char* name, test_func_t func_IN,
+          
+          /* Save flags */
+          __asm__ __volatile__ ("mfcr   %0" : "=r"  (tmpcr));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (tmpvscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (tmpvscr));
+ 
+          // reset VSCR and CR
+          vscr = (vector unsigned int){ 0,0,0,DEFAULT_VSCR };
+@@ -7291,11 +7291,11 @@ static void vcvt_cb (const char* name, test_func_t func_IN,
+          (*func)();
+          
+          // retrieve output <- r17
+-         __asm__ __volatile__ ("vor %0,17,17" : "=vr" (vec_out));
+         __asm__ __volatile__ ("vor %0,17,17" : "=v" (vec_out));
+          
+          // get CR,VSCR flags
+          __asm__ __volatile__ ("mfcr   %0" : "=r" (flags));
+-         __asm__ __volatile__ ("mfvscr %0" : "=vr" (vscr));
+         __asm__ __volatile__ ("mfvscr %0" : "=v" (vscr));
+          
+          /* Restore flags */
+          __asm__ __volatile__ ("mtcr   %0" : : "r"  (tmpcr));
--- a/SOURCES/valgrind-3.14.0-mc_translate-vecret.patch
+++ b/SOURCES/valgrind-3.14.0-mc_translate-vecret.patch
@ -0,0 +1,12 @@
+diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
+index c24db91..1e770b3 100644
+--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
+@@ -8022,6 +8022,7 @@ static inline void noteTmpUsesIn ( /*MOD*/HowUsed* useEnv,
+       use info. */
+    switch (at->tag) {
+       case Iex_GSPTR:
+      case Iex_VECRET:
+       case Iex_Const:
+          return;
+       case Iex_RdTmp: {
--- a/SOURCES/valgrind-3.14.0-memcheck-new-IROps.patch
+++ b/SOURCES/valgrind-3.14.0-memcheck-new-IROps.patch
@ -0,0 +1,453 @@
+commit e221eca26be6b2396e3fcbf4117e630fc22e79f6
+Author: Julian Seward <jseward@acm.org>
+Date:   Tue Nov 20 11:28:42 2018 +0100
+
+    Add Memcheck support for IROps added in 42719898.
+    
+    memcheck/mc_translate.c:
+    
+    Add mkRight{32,64} as right-travelling analogues to mkLeft{32,64}.
+    
+    doCmpORD: for the cases of a signed comparison against zero, compute
+    definedness of the 3 result bits (lt,gt,eq) separately, and, for the lt and eq
+    bits, do it exactly accurately.
+    
+    expensiveCountTrailingZeroes: no functional change.  Re-analyse/verify and add
+    comments.
+    
+    expensiveCountLeadingZeroes: add.  Very similar to
+    expensiveCountTrailingZeroes.
+    
+    Add some comments to mark unary ops which are self-shadowing.
+    
+    Route Iop_Ctz{,Nat}{32,64} through expensiveCountTrailingZeroes.
+    Route Iop_Clz{,Nat}{32,64} through expensiveCountLeadingZeroes.
+    
+    Add instrumentation for Iop_PopCount{32,64} and Iop_Reverse8sIn32_x1.
+    
+    memcheck/tests/vbit-test/irops.c
+    
+    Add dummy new entries for all new IROps, just enough to make it compile and
+    run.
+
+diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
+index 68a2ab3..c24db91 100644
+--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
+@@ -737,6 +737,34 @@ static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
+    return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
+ }
+ 
+/* --------- The Right-family of operations. --------- */
+
+/* Unfortunately these are a lot more expensive then their Left
+   counterparts.  Fortunately they are only very rarely used -- only for
+   count-leading-zeroes instrumentation. */
+
+static IRAtom* mkRight32 ( MCEnv* mce, IRAtom* a1 )
+{
+   for (Int i = 1; i <= 16; i *= 2) {
+      // a1 |= (a1 >>u i)
+      IRAtom* tmp
+         = assignNew('V', mce, Ity_I32, binop(Iop_Shr32, a1, mkU8(i)));
+      a1 = assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, tmp));
+   }
+   return a1;
+}
+
+static IRAtom* mkRight64 ( MCEnv* mce, IRAtom* a1 )
+{
+   for (Int i = 1; i <= 32; i *= 2) {
+      // a1 |= (a1 >>u i)
+      IRAtom* tmp
+         = assignNew('V', mce, Ity_I64, binop(Iop_Shr64, a1, mkU8(i)));
+      a1 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, tmp));
+   }
+   return a1;
+}
+
+ /* --------- 'Improvement' functions for AND/OR. --------- */
+ 
+ /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
+@@ -1280,20 +1308,18 @@ static IRAtom* doCmpORD ( MCEnv*  mce,
+                           IRAtom* xxhash, IRAtom* yyhash, 
+                           IRAtom* xx,     IRAtom* yy )
+ {
+-   Bool   m64    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
+-   Bool   syned  = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
+-   IROp   opOR   = m64 ? Iop_Or64  : Iop_Or32;
+-   IROp   opAND  = m64 ? Iop_And64 : Iop_And32;
+-   IROp   opSHL  = m64 ? Iop_Shl64 : Iop_Shl32;
+-   IROp   opSHR  = m64 ? Iop_Shr64 : Iop_Shr32;
+-   IRType ty     = m64 ? Ity_I64   : Ity_I32;
+-   Int    width  = m64 ? 64        : 32;
+   Bool   m64      = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
+   Bool   syned    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
+   IROp   opOR     = m64 ? Iop_Or64   : Iop_Or32;
+   IROp   opAND    = m64 ? Iop_And64  : Iop_And32;
+   IROp   opSHL    = m64 ? Iop_Shl64  : Iop_Shl32;
+   IROp   opSHR    = m64 ? Iop_Shr64  : Iop_Shr32;
+   IROp   op1UtoWS = m64 ? Iop_1Uto64 : Iop_1Uto32;
+   IRType ty       = m64 ? Ity_I64    : Ity_I32;
+   Int    width    = m64 ? 64         : 32;
+ 
+    Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
+ 
+-   IRAtom* threeLeft1 = NULL;
+-   IRAtom* sevenLeft1 = NULL;
+-
+    tl_assert(isShadowAtom(mce,xxhash));
+    tl_assert(isShadowAtom(mce,yyhash));
+    tl_assert(isOriginalAtom(mce,xx));
+@@ -1312,30 +1338,55 @@ static IRAtom* doCmpORD ( MCEnv*  mce,
+       /* fancy interpretation */
+       /* if yy is zero, then it must be fully defined (zero#). */
+       tl_assert(isZero(yyhash));
+-      threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
+      // This is still inaccurate, but I don't think it matters, since
+      // nobody writes code of the form
+      // "is <partially-undefined-value> signedly greater than zero?".
+      // We therefore simply declare "x >s 0" to be undefined if any bit in
+      // x is undefined.  That's clearly suboptimal in some cases.  Eg, if
+      // the highest order bit is a defined 1 then x is negative so it
+      // doesn't matter whether the remaining bits are defined or not.
+      IRAtom* t_0_gt_0_0
+         = assignNew(
+              'V', mce,ty,
+              binop(
+                 opAND,
+                 mkPCastTo(mce,ty, xxhash),
+                 m64 ? mkU64(1<<2) : mkU32(1<<2)
+              ));
+      // For "x <s 0", we can just copy the definedness of the top bit of x
+      // and we have a precise result.
+      IRAtom* t_lt_0_0_0
+         = assignNew(
+              'V', mce,ty,
+              binop(
+                 opSHL,
+                 assignNew(
+                    'V', mce,ty,
+                    binop(opSHR, xxhash, mkU8(width-1))),
+                 mkU8(3)
+              ));
+      // For "x == 0" we can hand the problem off to expensiveCmpEQorNE.
+      IRAtom* t_0_0_eq_0
+         = assignNew(
+              'V', mce,ty,
+              binop(
+                 opSHL,
+                 assignNew('V', mce,ty,
+                    unop(
+                    op1UtoWS,
+                    expensiveCmpEQorNE(mce, ty, xxhash, yyhash, xx, yy))
+                 ),
+                 mkU8(1)
+              ));
+       return
+          binop(
+             opOR,
+-            assignNew(
+-               'V', mce,ty,
+-               binop(
+-                  opAND,
+-                  mkPCastTo(mce,ty, xxhash), 
+-                  threeLeft1
+-               )),
+-            assignNew(
+-               'V', mce,ty,
+-               binop(
+-                  opSHL,
+-                  assignNew(
+-                     'V', mce,ty,
+-                     binop(opSHR, xxhash, mkU8(width-1))),
+-                  mkU8(3)
+-               ))
+-	 );
+            assignNew('V', mce,ty, binop(opOR, t_lt_0_0_0, t_0_gt_0_0)),
+            t_0_0_eq_0
+         );
+    } else {
+       /* standard interpretation */
+-      sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
+      IRAtom* sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
+       return 
+          binop( 
+             opAND, 
+@@ -2211,14 +2262,14 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
+    tl_assert(sameKindedAtoms(atom,vatom));
+ 
+    switch (czop) {
+-      case Iop_Ctz32:
+      case Iop_Ctz32: case Iop_CtzNat32:
+          ty = Ity_I32;
+          xorOp = Iop_Xor32;
+          subOp = Iop_Sub32;
+          andOp = Iop_And32;
+          one = mkU32(1);
+          break;
+-      case Iop_Ctz64:
+      case Iop_Ctz64: case Iop_CtzNat64:
+          ty = Ity_I64;
+          xorOp = Iop_Xor64;
+          subOp = Iop_Sub64;
+@@ -2232,8 +2283,30 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
+ 
+    // improver = atom ^ (atom - 1)
+    //
+-   // That is, improver has its low ctz(atom) bits equal to one;
+-   // higher bits (if any) equal to zero.
+   // That is, improver has its low ctz(atom)+1 bits equal to one;
+   // higher bits (if any) equal to zero.  So it's exactly the right
+   // mask to use to remove the irrelevant undefined input bits.
+   /* Here are some examples:
+         atom   = U...U 1 0...0
+         atom-1 = U...U 0 1...1
+         ^ed    = 0...0 1 11111, which correctly describes which bits of |atom|
+                                 actually influence the result
+      A boundary case
+         atom   = 0...0
+         atom-1 = 1...1
+         ^ed    = 11111, also a correct mask for the input: all input bits
+                         are relevant
+      Another boundary case
+         atom   = 1..1 1
+         atom-1 = 1..1 0
+         ^ed    = 0..0 1, also a correct mask: only the rightmost input bit
+                          is relevant
+      Now with misc U bits interspersed:
+         atom   = U...U 1 0 U...U 0 1 0...0
+         atom-1 = U...U 1 0 U...U 0 0 1...1
+         ^ed    = 0...0 0 0 0...0 0 1 1...1, also correct
+      (Per re-check/analysis of 14 Nov 2018)
+   */
+    improver = assignNew('V', mce,ty,
+                         binop(xorOp,
+                               atom,
+@@ -2242,8 +2315,96 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
+ 
+    // improved = vatom & improver
+    //
+-   // That is, treat any V bits above the first ctz(atom) bits as
+-   // "defined".
+   // That is, treat any V bits to the left of the rightmost ctz(atom)+1
+   // bits as "defined".
+   improved = assignNew('V', mce, ty,
+                        binop(andOp, vatom, improver));
+
+   // Return pessimizing cast of improved.
+   return mkPCastTo(mce, ty, improved);
+}
+
+static
+IRAtom* expensiveCountLeadingZeroes ( MCEnv* mce, IROp czop,
+                                      IRAtom* atom, IRAtom* vatom )
+{
+   IRType ty;
+   IROp shrOp, notOp, andOp;
+   IRAtom* (*mkRight)(MCEnv*, IRAtom*);
+   IRAtom *improver, *improved;
+   tl_assert(isShadowAtom(mce,vatom));
+   tl_assert(isOriginalAtom(mce,atom));
+   tl_assert(sameKindedAtoms(atom,vatom));
+
+   switch (czop) {
+      case Iop_Clz32: case Iop_ClzNat32:
+         ty = Ity_I32;
+         shrOp = Iop_Shr32;
+         notOp = Iop_Not32;
+         andOp = Iop_And32;
+         mkRight = mkRight32;
+         break;
+      case Iop_Clz64: case Iop_ClzNat64:
+         ty = Ity_I64;
+         shrOp = Iop_Shr64;
+         notOp = Iop_Not64;
+         andOp = Iop_And64;
+         mkRight = mkRight64;
+         break;
+      default:
+         ppIROp(czop);
+         VG_(tool_panic)("memcheck:expensiveCountLeadingZeroes");
+   }
+
+   // This is in principle very similar to how expensiveCountTrailingZeroes
+   // works.  That function computed an "improver", which it used to mask
+   // off all but the rightmost 1-bit and the zeroes to the right of it,
+   // hence removing irrelevant bits from the input.  Here, we play the
+   // exact same game but with the left-vs-right roles interchanged.
+   // Unfortunately calculation of the improver in this case is
+   // significantly more expensive.
+   //
+   // improver = ~(RIGHT(atom) >>u 1)
+   //
+   // That is, improver has its upper clz(atom)+1 bits equal to one;
+   // lower bits (if any) equal to zero.  So it's exactly the right
+   // mask to use to remove the irrelevant undefined input bits.
+   /* Here are some examples:
+         atom             = 0...0 1 U...U
+         R(atom)          = 0...0 1 1...1
+         R(atom) >>u 1    = 0...0 0 1...1
+         ~(R(atom) >>u 1) = 1...1 1 0...0
+                            which correctly describes which bits of |atom|
+                            actually influence the result
+      A boundary case
+         atom             = 0...0
+         R(atom)          = 0...0
+         R(atom) >>u 1    = 0...0
+         ~(R(atom) >>u 1) = 1...1
+                            also a correct mask for the input: all input bits
+                            are relevant
+      Another boundary case
+         atom             = 1 1..1
+         R(atom)          = 1 1..1
+         R(atom) >>u 1    = 0 1..1
+         ~(R(atom) >>u 1) = 1 0..0
+                            also a correct mask: only the leftmost input bit
+                            is relevant
+      Now with misc U bits interspersed:
+         atom             = 0...0 1 U...U 0 1 U...U
+         R(atom)          = 0...0 1 1...1 1 1 1...1
+         R(atom) >>u 1    = 0...0 0 1...1 1 1 1...1
+         ~(R(atom) >>u 1) = 1...1 1 0...0 0 0 0...0, also correct
+      (Per initial implementation of 15 Nov 2018)
+   */
+   improver = mkRight(mce, atom);
+   improver = assignNew('V', mce, ty, binop(shrOp, improver, mkU8(1)));
+   improver = assignNew('V', mce, ty, unop(notOp, improver));
+
+   // improved = vatom & improver
+   //
+   // That is, treat any V bits to the right of the leftmost clz(atom)+1
+   // bits as "defined".
+    improved = assignNew('V', mce, ty,
+                         binop(andOp, vatom, improver));
+ 
+@@ -4705,6 +4866,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
+       case Iop_RecipEst32F0x4:
+          return unary32F0x4(mce, vatom);
+ 
+      // These are self-shadowing.
+       case Iop_32UtoV128:
+       case Iop_64UtoV128:
+       case Iop_Dup8x16:
+@@ -4745,6 +4907,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
+       case Iop_MulI128by10Carry:
+       case Iop_F16toF64x2:
+       case Iop_F64toF16x2:
+         // FIXME JRS 2018-Nov-15.  This is surely not correct!
+          return vatom;
+ 
+       case Iop_I32StoF128: /* signed I32 -> F128 */
+@@ -4770,7 +4933,6 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
+       case Iop_RoundF64toF64_NegINF:
+       case Iop_RoundF64toF64_PosINF:
+       case Iop_RoundF64toF64_ZERO:
+-      case Iop_Clz64:
+       case Iop_D32toD64:
+       case Iop_I32StoD64:
+       case Iop_I32UtoD64:
+@@ -4785,17 +4947,32 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
+       case Iop_D64toD128:
+          return mkPCastTo(mce, Ity_I128, vatom);
+ 
+-      case Iop_Clz32:
+       case Iop_TruncF64asF32:
+       case Iop_NegF32:
+       case Iop_AbsF32:
+       case Iop_F16toF32: 
+          return mkPCastTo(mce, Ity_I32, vatom);
+ 
+-      case Iop_Ctz32:
+-      case Iop_Ctz64:
+      case Iop_Ctz32: case Iop_CtzNat32:
+      case Iop_Ctz64: case Iop_CtzNat64:
+          return expensiveCountTrailingZeroes(mce, op, atom, vatom);
+ 
+      case Iop_Clz32: case Iop_ClzNat32:
+      case Iop_Clz64: case Iop_ClzNat64:
+         return expensiveCountLeadingZeroes(mce, op, atom, vatom);
+
+      // PopCount32: this is slightly pessimistic.  It is true that the
+      // result depends on all input bits, so that aspect of the PCast is
+      // correct.  However, regardless of the input, only the lowest 5 bits
+      // out of the output can ever be undefined.  So we could actually
+      // "improve" the results here by marking the top 27 bits of output as
+      // defined.  A similar comment applies for PopCount64.
+      case Iop_PopCount32:
+         return mkPCastTo(mce, Ity_I32, vatom);
+      case Iop_PopCount64:
+         return mkPCastTo(mce, Ity_I64, vatom);
+
+      // These are self-shadowing.
+       case Iop_1Uto64:
+       case Iop_1Sto64:
+       case Iop_8Uto64:
+@@ -4821,6 +4998,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
+       case Iop_V256to64_2: case Iop_V256to64_3:
+          return assignNew('V', mce, Ity_I64, unop(op, vatom));
+ 
+      // These are self-shadowing.
+       case Iop_64to32:
+       case Iop_64HIto32:
+       case Iop_1Uto32:
+@@ -4830,8 +5008,10 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
+       case Iop_16Sto32:
+       case Iop_8Sto32:
+       case Iop_V128to32:
+      case Iop_Reverse8sIn32_x1:
+          return assignNew('V', mce, Ity_I32, unop(op, vatom));
+ 
+      // These are self-shadowing.
+       case Iop_8Sto16:
+       case Iop_8Uto16:
+       case Iop_32to16:
+@@ -4840,6 +5020,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
+       case Iop_GetMSBs8x16:
+          return assignNew('V', mce, Ity_I16, unop(op, vatom));
+ 
+      // These are self-shadowing.
+       case Iop_1Uto8:
+       case Iop_1Sto8:
+       case Iop_16to8:
+@@ -4868,6 +5049,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
+       case Iop_Not16:
+       case Iop_Not8:
+       case Iop_Not1:
+         // FIXME JRS 2018-Nov-15.  This is surely not correct!
+          return vatom;
+ 
+       case Iop_CmpNEZ8x8:
+@@ -4929,6 +5111,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
+       case Iop_Ctz64x2:
+          return mkPCast64x2(mce, vatom);
+ 
+      // This is self-shadowing.
+       case Iop_PwBitMtxXpose64x2:
+          return assignNew('V', mce, Ity_V128, unop(op, vatom));
+ 
+diff --git a/memcheck/tests/vbit-test/irops.c b/memcheck/tests/vbit-test/irops.c
+index bfd82fc..e8bf67d 100644
+--- a/memcheck/tests/vbit-test/irops.c
+++ b/memcheck/tests/vbit-test/irops.c
+@@ -111,6 +111,12 @@ static irop_t irops[] = {
+   { DEFOP(Iop_Clz32,      UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
+   { DEFOP(Iop_Ctz64,      UNDEF_ALL),  .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+   { DEFOP(Iop_Ctz32,      UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+  { DEFOP(Iop_ClzNat64,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, // ppc32 asserts
+  { DEFOP(Iop_ClzNat32,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
+  { DEFOP(Iop_CtzNat64,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+  { DEFOP(Iop_CtzNat32,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
+  { DEFOP(Iop_PopCount64, UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
+  { DEFOP(Iop_PopCount32, UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
+   { DEFOP(Iop_CmpLT32S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
+   { DEFOP(Iop_CmpLT64S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc, mips assert
+   { DEFOP(Iop_CmpLE32S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
+@@ -336,6 +342,7 @@ static irop_t irops[] = {
+   { DEFOP(Iop_Sad8Ux4, UNDEF_UNKNOWN), },
+   { DEFOP(Iop_CmpNEZ16x2, UNDEF_UNKNOWN), },
+   { DEFOP(Iop_CmpNEZ8x4, UNDEF_UNKNOWN), },
+  { DEFOP(Iop_Reverse8sIn32_x1, UNDEF_UNKNOWN) },
+   /* ------------------ 64-bit SIMD FP ------------------------ */
+   { DEFOP(Iop_I32UtoFx2, UNDEF_UNKNOWN), },
+   { DEFOP(Iop_I32StoFx2, UNDEF_UNKNOWN), },
--- a/SOURCES/valgrind-3.14.0-new-strlen-IROps.patch
+++ b/SOURCES/valgrind-3.14.0-new-strlen-IROps.patch
@ -0,0 +1,124 @@
+commit 4271989815b5fc933c1e29bc75507c2726dc3738
+Author: Julian Seward <jseward@acm.org>
+Date:   Tue Nov 20 10:52:33 2018 +0100
+
+    Add some new IROps to support improved Memcheck analysis of strlen etc.
+    
+    This is part of the fix for bug 386945.  It adds the following IROps, plus
+    their supporting type- and printing- fragments:
+    
+    Iop_Reverse8sIn32_x1: 32-bit byteswap.  A fancy name, but it is consistent
+    with naming for the other swapping IROps that already exist.
+    
+    Iop_PopCount64, Iop_PopCount32: population count
+    
+    Iop_ClzNat64, Iop_ClzNat32, Iop_CtzNat64, Iop_CtzNat32: counting leading and
+    trailing zeroes, with "natural" (Nat) semantics for a zero input, meaning, in
+    the case of zero input, return the number of bits in the word.  These
+    functionally overlap with the existing Iop_Clz64, Iop_Clz32, Iop_Ctz64,
+    Iop_Ctz32.  The existing operations are undefined in case of a zero input.
+    Adding these new variants avoids the complexity of having to change the
+    declared semantics of the existing operations.  Instead they are deprecated
+    but still available for use.
+
+diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
+index 823b6be..3221033 100644
+--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
+@@ -194,6 +194,14 @@ void ppIROp ( IROp op )
+       case Iop_Ctz64:    vex_printf("Ctz64"); return;
+       case Iop_Ctz32:    vex_printf("Ctz32"); return;
+ 
+      case Iop_ClzNat64: vex_printf("ClzNat64"); return;
+      case Iop_ClzNat32: vex_printf("ClzNat32"); return;
+      case Iop_CtzNat64: vex_printf("CtzNat64"); return;
+      case Iop_CtzNat32: vex_printf("CtzNat32"); return;
+
+      case Iop_PopCount64: vex_printf("PopCount64"); return;
+      case Iop_PopCount32: vex_printf("PopCount32"); return;
+
+       case Iop_CmpLT32S: vex_printf("CmpLT32S"); return;
+       case Iop_CmpLE32S: vex_printf("CmpLE32S"); return;
+       case Iop_CmpLT32U: vex_printf("CmpLT32U"); return;
+@@ -395,6 +403,7 @@ void ppIROp ( IROp op )
+ 
+       case Iop_CmpNEZ16x2: vex_printf("CmpNEZ16x2"); return;
+       case Iop_CmpNEZ8x4:  vex_printf("CmpNEZ8x4"); return;
+      case Iop_Reverse8sIn32_x1: vex_printf("Reverse8sIn32_x1"); return;
+ 
+       case Iop_CmpF64:    vex_printf("CmpF64"); return;
+ 
+@@ -2719,6 +2728,7 @@ void typeOfPrimop ( IROp op,
+          UNARY(Ity_I16, Ity_I16);
+       case Iop_Not32:
+       case Iop_CmpNEZ16x2: case Iop_CmpNEZ8x4:
+      case Iop_Reverse8sIn32_x1:
+          UNARY(Ity_I32, Ity_I32);
+ 
+       case Iop_Not64:
+@@ -2782,9 +2792,13 @@ void typeOfPrimop ( IROp op,
+          BINARY(Ity_I64,Ity_I64, Ity_I128);
+ 
+       case Iop_Clz32: case Iop_Ctz32:
+      case Iop_ClzNat32: case Iop_CtzNat32:
+      case Iop_PopCount32:
+          UNARY(Ity_I32, Ity_I32);
+ 
+       case Iop_Clz64: case Iop_Ctz64:
+      case Iop_ClzNat64: case Iop_CtzNat64:
+      case Iop_PopCount64:
+          UNARY(Ity_I64, Ity_I64);
+ 
+       case Iop_DivU32: case Iop_DivS32: case Iop_DivU32E: case Iop_DivS32E:
+diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
+index 17bcb55..93fa5ac 100644
+--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
+@@ -452,12 +452,21 @@ typedef
+       Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
+       Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
+ 
+-      /* Wierdo integer stuff */
+      /* Counting bits */
+      /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of zero.
+         You must ensure they are never given a zero argument.  As of
+         2018-Nov-14 they are deprecated.  Try to use the Nat variants
+         immediately below, if you can.
+      */
+       Iop_Clz64, Iop_Clz32,   /* count leading zeroes */
+       Iop_Ctz64, Iop_Ctz32,   /* count trailing zeros */
+-      /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
+-         zero.  You must ensure they are never given a zero argument.
+-      */
+      /* Count leading/trailing zeroes, with "natural" semantics for the
+         case where the input is zero: then the result is the number of bits
+         in the word. */
+      Iop_ClzNat64, Iop_ClzNat32,
+      Iop_CtzNat64, Iop_CtzNat32,
+      /* Population count -- compute the number of 1 bits in the argument. */
+      Iop_PopCount64, Iop_PopCount32,
+ 
+       /* Standard integer comparisons */
+       Iop_CmpLT32S, Iop_CmpLT64S,
+@@ -831,6 +840,9 @@ typedef
+       /* MISC (vector integer cmp != 0) */
+       Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
+ 
+      /* Byte swap in a 32-bit word */
+      Iop_Reverse8sIn32_x1,
+
+       /* ------------------ 64-bit SIMD FP ------------------------ */
+ 
+       /* Convertion to/from int */
+@@ -1034,8 +1046,9 @@ typedef
+       Iop_Slice64,  // (I64, I64, I8) -> I64
+ 
+       /* REVERSE the order of chunks in vector lanes.  Chunks must be
+-         smaller than the vector lanes (obviously) and so may be 8-,
+-         16- and 32-bit in size. */
+         smaller than the vector lanes (obviously) and so may be 8-, 16- and
+         32-bit in size.  Note that the degenerate case,
+         Iop_Reverse8sIn64_x1, is a simply a vanilla byte-swap. */
+       /* Examples:
+             Reverse8sIn16_x4([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
+             Reverse8sIn32_x2([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
--- a/SOURCES/valgrind-3.14.0-power9-addex.patch
+++ b/SOURCES/valgrind-3.14.0-power9-addex.patch
@ -0,0 +1,256 @@
+From 2c1f016e634bf79faf45e81c14c955c711bc202f Mon Sep 17 00:00:00 2001
+From: Mark Wielaard <mark@klomp.org>
+Date: Mon, 31 Dec 2018 22:26:31 +0100
+Subject: [PATCH] Bug 402519 - POWER 3.0 addex instruction incorrectly
+ implemented
+
+addex uses OV as carry in and carry out. For all other instructions
+OV is the signed overflow flag. And instructions like adde use CA
+as carry.
+
+Replace set_XER_OV_OV32 with set_XER_OV_OV32_ADDEX, which will
+call calculate_XER_CA_64 and calculate_XER_CA_32, but with OV
+as input, and sets OV and OV32.
+
+Enable test_addex in none/tests/ppc64/test_isa_3_0.c and update
+the expected output. test_addex would fail to match the expected
+output before this patch.
+---
+ NEWS                                              |  1 +
+ VEX/priv/guest_ppc_toIR.c                         | 52 ++++++++++++++---------
+ none/tests/ppc64/test_isa_3_0.c                   |  3 +-
+ none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE | 36 ++++++++++------
+ 4 files changed, 58 insertions(+), 34 deletions(-)
+
+diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
+index 18df822..d685383 100644
+--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
+@@ -2645,21 +2645,6 @@ static void copy_OV_to_OV32( void ) {
+    putXER_OV32( getXER_OV() );
+ }
+ 
+-static void set_XER_OV_OV32 ( IRType ty, UInt op, IRExpr* res,
+-                              IRExpr* argL, IRExpr* argR )
+-{
+-   if (ty == Ity_I32) {
+-      set_XER_OV_OV32_32( op, res, argL, argR );
+-   } else {
+-      IRExpr* xer_ov_32;
+-      set_XER_OV_64( op, res, argL, argR );
+-      xer_ov_32 = calculate_XER_OV_32( op, unop(Iop_64to32, res),
+-                                       unop(Iop_64to32, argL),
+-                                       unop(Iop_64to32, argR));
+-      putXER_OV32( unop(Iop_32to8, xer_ov_32) );
+-   }
+-}
+-
+ static void set_XER_OV_OV32_SO ( IRType ty, UInt op, IRExpr* res,
+                                  IRExpr* argL, IRExpr* argR )
+ {
+@@ -3005,6 +2990,33 @@ static void set_XER_CA_CA32 ( IRType ty, UInt op, IRExpr* res,
+    }
+ }
+ 
+/* Used only by addex instruction, which uses and sets OV as carry.  */
+static void set_XER_OV_OV32_ADDEX ( IRType ty, IRExpr* res,
+                                    IRExpr* argL, IRExpr* argR,
+                                    IRExpr* old_ov )
+{
+   if (ty == Ity_I32) {
+      IRTemp xer_ov = newTemp(Ity_I32);
+      assign ( xer_ov, unop(Iop_32to8,
+                            calculate_XER_CA_32( PPCG_FLAG_OP_ADDE,
+                                                 res, argL, argR, old_ov ) ) );
+      putXER_OV( mkexpr (xer_ov) );
+      putXER_OV32( mkexpr (xer_ov) );
+   } else {
+      IRExpr *xer_ov;
+      IRExpr* xer_ov_32;
+      xer_ov = calculate_XER_CA_64( PPCG_FLAG_OP_ADDE,
+                                    res, argL, argR, old_ov );
+      putXER_OV( unop(Iop_32to8, xer_ov) );
+      xer_ov_32 = calculate_XER_CA_32( PPCG_FLAG_OP_ADDE,
+                                       unop(Iop_64to32, res),
+                                       unop(Iop_64to32, argL),
+                                       unop(Iop_64to32, argR),
+                                       unop(Iop_64to32, old_ov) );
+      putXER_OV32( unop(Iop_32to8, xer_ov_32) );
+   }
+}
+
+ 
+ 
+ /*------------------------------------------------------------*/
+@@ -5094,16 +5106,18 @@ static Bool dis_int_arith ( UInt theInstr )
+       }
+ 
+       case 0xAA: {// addex (Add Extended alternate carry bit Z23-form)
+         IRTemp old_xer_ov = newTemp(ty);
+          DIP("addex r%u,r%u,r%u,%d\n", rD_addr, rA_addr, rB_addr, (Int)flag_OE);
+         assign( old_xer_ov, mkWidenFrom32(ty, getXER_OV_32(), False) );
+          assign( rD, binop( mkSzOp(ty, Iop_Add8), mkexpr(rA),
+                             binop( mkSzOp(ty, Iop_Add8), mkexpr(rB),
+-                                   mkWidenFrom8( ty, getXER_OV(), False ) ) ) );
+                                   mkexpr(old_xer_ov) ) ) );
+ 
+          /* CY bit is same as OE bit */
+          if (flag_OE == 0) {
+-            /* Exception, do not set SO bit */
+-            set_XER_OV_OV32( ty, PPCG_FLAG_OP_ADDE,
+-                             mkexpr(rD), mkexpr(rA), mkexpr(rB) );
+            /* Exception, do not set SO bit and set OV from carry. */
+            set_XER_OV_OV32_ADDEX( ty, mkexpr(rD), mkexpr(rA), mkexpr(rB),
+                                   mkexpr(old_xer_ov) );
+          } else {
+             /* CY=1, 2 and 3 (AKA flag_OE) are reserved */
+             vex_printf("addex instruction, CY = %d is reserved.\n", flag_OE);
+diff --git a/none/tests/ppc64/test_isa_3_0.c b/none/tests/ppc64/test_isa_3_0.c
+index 2d13505..1c2cda3 100644
+--- a/none/tests/ppc64/test_isa_3_0.c
+++ b/none/tests/ppc64/test_isa_3_0.c
+@@ -286,7 +286,7 @@ static test_list_t testgroup_ia_ops_two[] = {
+    { &test_moduw, "moduw" },
+    { &test_modsd, "modsd" },
+    { &test_modud, "modud" },
+-   //{ &test_addex, "addex" },
+   { &test_addex, "addex" },
+    { NULL       , NULL             },
+ };
+ 
+@@ -2741,7 +2741,6 @@ static void testfunction_gpr_vector_logical_one (const char* instruction_name,
+     *   rt, xa
+     */
+    int i;
+-   int t;
+    volatile HWord_t res;
+ 
+    VERBOSE_FUNCTION_CALLOUT
+diff --git a/none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE b/none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE
+index 152ff28..cc0e88e 100644
+--- a/none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE
+++ b/none/tests/ppc64/test_isa_3_0_other.stdout.exp-LE
+@@ -40,7 +40,17 @@ modud ffffffffffffffff, 0000000000000000 => 0000000000000000 (00000000)
+ modud ffffffffffffffff, 0000001cbe991def => 000000043eb0c0b2 (00000000)
+ modud ffffffffffffffff, ffffffffffffffff => 0000000000000000 (00000000)
+ 
+-All done. Tested 4 different instructions
+addex 0000000000000000, 0000000000000000 => 0000000000000000 (00000000)
+addex 0000000000000000, 0000001cbe991def => 0000001cbe991def (00000000)
+addex 0000000000000000, ffffffffffffffff => ffffffffffffffff (00000000)
+addex 0000001cbe991def, 0000000000000000 => 0000001cbe991def (00000000)
+addex 0000001cbe991def, 0000001cbe991def => 000000397d323bde (00000000) OV32
+addex 0000001cbe991def, ffffffffffffffff => 0000001cbe991dee (00000000) OV OV32
+addex ffffffffffffffff, 0000000000000000 => 0000000000000000 (00000000) OV OV32
+addex ffffffffffffffff, 0000001cbe991def => 0000001cbe991def (00000000) OV OV32
+addex ffffffffffffffff, ffffffffffffffff => ffffffffffffffff (00000000) OV OV32
+
+All done. Tested 5 different instructions
+ ppc one argument plus shift:
+ Test instruction group [ppc one argument plus shift]
+ extswsli  aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffffffffffffffff => aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffffffffffffffff
+@@ -85,7 +95,7 @@ extswsli. aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffaa5599113377cc => aaaaaaaaaaaaaa
+ extswsli. 5152535455565758 5152535455565758 0 ffaa5599113377cc => 5152535455565758 5152535455565758 0 ffaa5599113377cc
+ extswsli. 0000000000000000 0000000000000000 0 ffaa5599113377cc => 0000000000000000 0000000000000000 0 ffaa5599113377cc
+ 
+-All done. Tested 6 different instructions
+All done. Tested 7 different instructions
+ ppc three parameter ops:
+ Test instruction group [ppc three parameter ops]
+ maddhd  0000000000000000, 0000000000000000, 0000000000000000  => 0000000000000000 (00000000)
+@@ -172,7 +182,7 @@ maddld  ffffffffffffffff, ffffffffffffffff, 0000000000000000  => 000000000000000
+ maddld  ffffffffffffffff, ffffffffffffffff, 0000001cbe991def  => 0000001cbe991df0 (00000000)
+ maddld  ffffffffffffffff, ffffffffffffffff, ffffffffffffffff  => 0000000000000000 (00000000)
+ 
+-All done. Tested 9 different instructions
+All done. Tested 10 different instructions
+ ppc count zeros:
+ Test instruction group [ppc count zeros]
+ cnttzw 0000000000000000 => 0000000000000020
+@@ -197,7 +207,7 @@ cnttzd. 0000001cbe991def => 0000000000000000 Expected cr0 to be zero, it is (200
+ cnttzd. ffffffffffffffff => 0000000000000000 Expected cr0 to be zero, it is (20000000)
+ 
+ 
+-All done. Tested 13 different instructions
+All done. Tested 14 different instructions
+ ppc set boolean:
+ Test instruction group [ppc set boolean]
+ setb cr_field:0 cr_value::00000000 =>  0000000000000000
+@@ -265,7 +275,7 @@ setb cr_field:7 cr_value::00000005 =>  0000000000000001
+ setb cr_field:7 cr_value::00000006 =>  0000000000000001
+ setb cr_field:7 cr_value::00000007 =>  0000000000000001
+ 
+-All done. Tested 14 different instructions
+All done. Tested 15 different instructions
+ ppc char compare:
+ Test instruction group [ppc char compare]
+ cmprb l=0 0x61 (a) (cmpeq:0x5b427b625a417a61) (cmprb:src22(a-z) src21(A-Z)) => in range/found
+@@ -1711,7 +1721,7 @@ cmpeqb 0x5d (]) (cmpeq:0x4642666245416561) (cmprb:src22(a-e) src21(A-E)) =>
+ cmpeqb 0x60 (`) (cmpeq:0x4642666245416561) (cmprb:src22(a-e) src21(A-E)) =>
+ cmpeqb 0x5f (_) (cmpeq:0x4642666245416561) (cmprb:src22(a-e) src21(A-E)) =>
+ 
+-All done. Tested 17 different instructions
+All done. Tested 18 different instructions
+ ppc vector scalar move to/from:
+ Test instruction group [ppc vector scalar move to/from]
+ mfvsrld aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffffffffffffffff => aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa ffffffffffffffff
+@@ -1777,7 +1787,7 @@ mtvsrws aaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaa 0 ffaa5599113377cc => 113377cc113377cc
+ mtvsrws 5152535455565758 5152535455565758 0 ffaa5599113377cc => 113377cc113377cc 113377cc113377cc 0 ffaa5599113377cc
+ mtvsrws 0000000000000000 0000000000000000 0 ffaa5599113377cc => 113377cc113377cc 113377cc113377cc 0 ffaa5599113377cc
+ 
+-All done. Tested 20 different instructions
+All done. Tested 21 different instructions
+ ppc dfp significance:
+ Test instruction group [ppc dfp significance]
+ dtstsfi significance(0x00) +Finite                  0 * 10 ^ -12 (GT) (4)
+@@ -1862,7 +1872,7 @@ dtstsfiq significance(0x20) -inf      (GT) (4)
+ dtstsfiq significance(0x30) -inf      (GT) (4)
+ dtstsfiq significance(0x3f) -inf      (GT) (4)
+ 
+-All done. Tested 22 different instructions
+All done. Tested 23 different instructions
+ ppc bcd misc:
+ Test instruction group [ppc bcd misc]
+ bcdadd. p0 xa:0000000000000000 000000000000000c (+|0) xb:0000000000000000 000000000000000c (+|0) => (EQ) (2) xt:0000000000000000 000000000000000c(+|0)
+@@ -33338,12 +33348,12 @@ bcdcfsq. p1 xa:0000000000000000 000000000000000c (+|0) xb:9999999999999999 99999
+ bcdcfsq. p1 xa:0000000000000000 000000000000000c (+|0) xb:0000000000000000 000000001234567d ( - ) => (GT) (4) xt:0000000000000000 000000305419901f(+|0)
+ 
+ 
+-All done. Tested 51 different instructions
+All done. Tested 52 different instructions
+ ppc noop misc:
+ Test instruction group [ppc noop misc]
+ wait   =>
+ 
+-All done. Tested 52 different instructions
+All done. Tested 53 different instructions
+ ppc addpc_misc:
+ Test instruction group [ppc addpc_misc]
+ addpcis   0000000000000000  =>  0000000000000000
+@@ -33380,7 +33390,7 @@ subpcis   000000000000000d  =>  0000000000000000
+ subpcis   000000000000000e  =>  0000000000000000
+ subpcis   000000000000000f  =>  0000000000000000
+ 
+-All done. Tested 54 different instructions
+All done. Tested 55 different instructions
+ ppc mffpscr:
+ Test instruction group [ppc mffpscr]
+ mffsce  =>  000000000.000000
+@@ -33395,7 +33405,7 @@ mffs  =>  000000000.000000
+  fpscr: f14 
+  local_fpscr: 
+ 
+-All done. Tested 57 different instructions
+All done. Tested 58 different instructions
+ ppc mffpscr:
+ Test instruction group [ppc mffpscr]
+ mffscdrni  0  =>  0X0
+@@ -33426,4 +33436,4 @@ mffscrn  f15 0X1   =>  0X200000000
+ mffscrn  f15 0X2   =>  0X200000000
+  fpscr: f14  local_fpscr:  30-DRN1 RN-bit62
+ 
+-All done. Tested 61 different instructions
+All done. Tested 62 different instructions
+-- 
+1.8.3.1
+
--- a/SOURCES/valgrind-3.14.0-ppc-frontend-new-IROps.patch
+++ b/SOURCES/valgrind-3.14.0-ppc-frontend-new-IROps.patch
@ -0,0 +1,381 @@
+commit 81d9832226d6e3d1ee78ee3133189d7b520e7eea
+Author: Julian Seward <jseward@acm.org>
+Date:   Tue Nov 20 11:36:53 2018 +0100
+
+    ppc front end: use new IROps added in 42719898.
+    
+    This pertains to bug 386945.
+    
+    VEX/priv/guest_ppc_toIR.c:
+    
+    gen_POPCOUNT: use Iop_PopCount{32,64} where possible.
+    
+    gen_vpopcntd_mode32: use Iop_PopCount32.
+    
+    for cntlz{w,d}, use Iop_CtzNat{32,64}.
+    
+    gen_byterev32: use Iop_Reverse8sIn32_x1 instead of lengthy sequence.
+    
+    verbose_Clz32: remove (was unused anyway).
+
+diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
+index cb1cae1..8977d4f 100644
+--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
+@@ -1595,7 +1595,8 @@ typedef enum {
+ /* Generate an IR sequence to do a popcount operation on the supplied
+    IRTemp, and return a new IRTemp holding the result.  'ty' may be
+    Ity_I32 or Ity_I64 only. */
+-static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_type )
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src,
+                             _popcount_data_type data_type )
+ {
+   /* Do count across 2^data_type bits,
+      byte:        data_type = 3
+@@ -1611,6 +1612,22 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
+ 
+    vassert(ty == Ity_I64 || ty == Ity_I32);
+ 
+   // Use a single IROp in cases where we can.
+
+   if (ty == Ity_I64 && data_type == DWORD) {
+      IRTemp res = newTemp(Ity_I64);
+      assign(res, unop(Iop_PopCount64, mkexpr(src)));
+      return res;
+   }
+
+   if (ty == Ity_I32 && data_type == WORD) {
+      IRTemp res = newTemp(Ity_I32);
+      assign(res, unop(Iop_PopCount32, mkexpr(src)));
+      return res;
+   }
+
+   // For the rest, we have to do it the slow way.
+
+    if (ty == Ity_I32) {
+ 
+       for (idx = 0; idx < WORD; idx++) {
+@@ -1638,7 +1655,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
+       return nyu;
+    }
+ 
+-// else, ty == Ity_I64
+   // else, ty == Ity_I64
+    vassert(mode64);
+ 
+    for (i = 0; i < DWORD; i++) {
+@@ -1670,52 +1687,15 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
+  */
+ static IRTemp gen_vpopcntd_mode32 ( IRTemp src1, IRTemp src2 )
+ {
+-   Int i, shift[6];
+-   IRTemp mask[6];
+-   IRTemp old = IRTemp_INVALID;
+-   IRTemp nyu1 = IRTemp_INVALID;
+-   IRTemp nyu2 = IRTemp_INVALID;
+    IRTemp retval = newTemp(Ity_I64);
+ 
+    vassert(!mode64);
+ 
+-   for (i = 0; i < WORD; i++) {
+-      mask[i]  = newTemp(Ity_I32);
+-      shift[i] = 1 << i;
+-   }
+-   assign(mask[0], mkU32(0x55555555));
+-   assign(mask[1], mkU32(0x33333333));
+-   assign(mask[2], mkU32(0x0F0F0F0F));
+-   assign(mask[3], mkU32(0x00FF00FF));
+-   assign(mask[4], mkU32(0x0000FFFF));
+-   old = src1;
+-   for (i = 0; i < WORD; i++) {
+-      nyu1 = newTemp(Ity_I32);
+-      assign(nyu1,
+-             binop(Iop_Add32,
+-                   binop(Iop_And32,
+-                         mkexpr(old),
+-                         mkexpr(mask[i])),
+-                   binop(Iop_And32,
+-                         binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
+-                         mkexpr(mask[i]))));
+-      old = nyu1;
+-   }
+-
+-   old = src2;
+-   for (i = 0; i < WORD; i++) {
+-      nyu2 = newTemp(Ity_I32);
+-      assign(nyu2,
+-             binop(Iop_Add32,
+-                   binop(Iop_And32,
+-                         mkexpr(old),
+-                         mkexpr(mask[i])),
+-                   binop(Iop_And32,
+-                         binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
+-                         mkexpr(mask[i]))));
+-      old = nyu2;
+-   }
+-   assign(retval, unop(Iop_32Uto64, binop(Iop_Add32, mkexpr(nyu1), mkexpr(nyu2))));
+   assign(retval,
+          unop(Iop_32Uto64,
+               binop(Iop_Add32,
+                     unop(Iop_PopCount32, mkexpr(src1)),
+                     unop(Iop_PopCount32, mkexpr(src2)))));
+    return retval;
+ }
+ 
+@@ -5715,7 +5695,7 @@ static Bool dis_modulo_int ( UInt theInstr )
+                 rA_address, rS_address);
+ 
+             assign( rS, getIReg( rS_address ) );
+-            assign( result, unop( Iop_Ctz32,
+            assign( result, unop( Iop_CtzNat32,
+                                   unop( Iop_64to32, mkexpr( rS ) ) ) );
+             assign( rA, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( result ) ) );
+ 
+@@ -5746,7 +5726,7 @@ static Bool dis_modulo_int ( UInt theInstr )
+                 rA_address, rS_address);
+ 
+             assign( rS, getIReg( rS_address ) );
+-            assign( rA, unop( Iop_Ctz64, mkexpr( rS ) ) );
+            assign( rA, unop( Iop_CtzNat64, mkexpr( rS ) ) );
+ 
+             if ( flag_rC == 1 )
+                set_CR0( mkexpr( rA ) );
+@@ -6307,7 +6287,6 @@ static Bool dis_int_logic ( UInt theInstr )
+    IRTemp rS     = newTemp(ty);
+    IRTemp rA     = newTemp(ty);
+    IRTemp rB     = newTemp(ty);
+-   IRExpr* irx;
+    Bool do_rc    = False;
+ 
+    assign( rS, getIReg(rS_addr) );
+@@ -6404,26 +6383,16 @@ static Bool dis_int_logic ( UInt theInstr )
+          break;
+          
+       case 0x01A: { // cntlzw (Count Leading Zeros Word, PPC32 p371)
+-         IRExpr* lo32;
+          if (rB_addr!=0) {
+             vex_printf("dis_int_logic(ppc)(cntlzw,rB_addr)\n");
+             return False;
+          }
+-         DIP("cntlzw%s r%u,r%u\n",
+-             flag_rC ? ".":"", rA_addr, rS_addr);
+         DIP("cntlzw%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
+          
+          // mode64: count in low word only
+-         lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
+-         
+-         // Iop_Clz32 undefined for arg==0, so deal with that case:
+-         irx =  binop(Iop_CmpNE32, lo32, mkU32(0));
+-         assign(rA, mkWidenFrom32(ty,
+-                         IRExpr_ITE( irx,
+-                                     unop(Iop_Clz32, lo32),
+-                                     mkU32(32)),
+-                         False));
+-
+-         // TODO: alternatively: assign(rA, verbose_Clz32(rS));
+         IRExpr* lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
+         IRExpr* res32 = unop(Iop_ClzNat32, lo32);
+         assign(rA, mode64 ? unop(Iop_32Uto64, res32) : res32);
+          break;
+       }
+          
+@@ -6521,14 +6490,8 @@ static Bool dis_int_logic ( UInt theInstr )
+             vex_printf("dis_int_logic(ppc)(cntlzd,rB_addr)\n");
+             return False;
+          }
+-         DIP("cntlzd%s r%u,r%u\n",
+-             flag_rC ? ".":"", rA_addr, rS_addr);
+-         // Iop_Clz64 undefined for arg==0, so deal with that case:
+-         irx =  binop(Iop_CmpNE64, mkexpr(rS), mkU64(0));
+-         assign(rA, IRExpr_ITE( irx,
+-                                unop(Iop_Clz64, mkexpr(rS)),
+-                                mkU64(64) ));
+-         // TODO: alternatively: assign(rA, verbose_Clz64(rS));
+         DIP("cntlzd%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
+         assign(rA, unop(Iop_ClzNat64, mkexpr(rS)));
+          break;
+ 
+       case 0x1FC: // cmpb (Power6: compare bytes)
+@@ -6574,8 +6537,9 @@ static Bool dis_int_logic ( UInt theInstr )
+          putFReg( rS_addr, mkexpr(frA));
+          return True;
+       }
+-      case 0x1FA: // popcntd (population count doubleword
+      case 0x1FA: // popcntd (population count doubleword)
+       {
+          vassert(mode64);
+     	  DIP("popcntd r%u,r%u\n", rA_addr, rS_addr);
+     	  IRTemp result = gen_POPCOUNT(ty, rS, DWORD);
+     	  putIReg( rA_addr, mkexpr(result) );
+@@ -9154,18 +9118,7 @@ static Bool dis_int_shift ( UInt theInstr )
+ static IRExpr* /* :: Ity_I32 */ gen_byterev32 ( IRTemp t )
+ {
+    vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32);
+-   return
+-      binop(Iop_Or32,
+-         binop(Iop_Shl32, mkexpr(t), mkU8(24)),
+-      binop(Iop_Or32,
+-         binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)), 
+-                          mkU32(0x00FF0000)),
+-      binop(Iop_Or32,
+-         binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)),
+-                          mkU32(0x0000FF00)),
+-         binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(24)),
+-                          mkU32(0x000000FF) )
+-      )));
+   return unop(Iop_Reverse8sIn32_x1, mkexpr(t));
+ }
+ 
+ /* Generates code to swap the byte order in the lower half of an Ity_I32,
+@@ -9225,6 +9178,10 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
+ 
+       case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
+       {
+         // JRS FIXME:
+         // * is the host_endness conditional below actually necessary?
+         // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
+         //   That would be a lot more efficient.
+          IRExpr * nextAddr;
+          IRTemp w3 = newTemp( Ity_I32 );
+          IRTemp w4 = newTemp( Ity_I32 );
+@@ -17056,8 +17013,8 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
+       case 0x7C3:    // vpopcntd
+       {
+          if (mode64) {
+-            /* Break vector into 64-bit double words and do the population count
+-             * on each double word.
+            /* Break vector into 64-bit double words and do the population
+               count on each double word.
+              */
+             IRType ty = Ity_I64;
+             IRTemp bits0_63   = newTemp(Ity_I64);
+@@ -17077,15 +17034,16 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
+                                       mkexpr( cnt_bits0_63 ) ) );
+          } else {
+             /* Break vector into 32-bit words and do the population count
+-             * on each doubleword.
+               on each 32-bit word.
+              */
+             IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
+             bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
+-            IRTemp cnt_bits0_63   = newTemp(Ity_I64);
+            IRTemp cnt_bits0_63    = newTemp(Ity_I64);
+             IRTemp cnt_bits64_127  = newTemp(Ity_I64);
+ 
+             DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr);
+-            breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
+            breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95,
+                                         &bits32_63, &bits0_31 );
+ 
+             cnt_bits0_63   = gen_vpopcntd_mode32(bits0_31, bits32_63);
+             cnt_bits64_127 = gen_vpopcntd_mode32(bits64_95, bits96_127);
+@@ -29103,10 +29061,12 @@ DisResult disInstr_PPC_WRK (
+ 
+       /* Miscellaneous ISA 2.06 instructions */
+       case 0x1FA: // popcntd
+         if (!mode64) goto decode_failure;
+         /* else fallthru */
+       case 0x17A: // popcntw
+       case 0x7A:  // popcntb
+-	  if (dis_int_logic( theInstr )) goto decode_success;
+-    	  goto decode_failure;
+         if (dis_int_logic( theInstr )) goto decode_success;
+         goto decode_failure;
+ 
+       case 0x0FC: // bpermd
+          if (!mode64) goto decode_failure;
+@@ -29669,94 +29629,6 @@ DisResult disInstr_PPC ( IRSB*        irsb_IN,
+    return dres;
+ }
+ 
+-
+-/*------------------------------------------------------------*/
+-/*--- Unused stuff                                         ---*/
+-/*------------------------------------------------------------*/
+-
+-///* A potentially more memcheck-friendly implementation of Clz32, with
+-//   the boundary case Clz32(0) = 32, which is what ppc requires. */
+-//
+-//static IRExpr* /* :: Ity_I32 */ verbose_Clz32 ( IRTemp arg )
+-//{
+-//   /* Welcome ... to SSA R Us. */
+-//   IRTemp n1  = newTemp(Ity_I32);
+-//   IRTemp n2  = newTemp(Ity_I32);
+-//   IRTemp n3  = newTemp(Ity_I32);
+-//   IRTemp n4  = newTemp(Ity_I32);
+-//   IRTemp n5  = newTemp(Ity_I32);
+-//   IRTemp n6  = newTemp(Ity_I32);
+-//   IRTemp n7  = newTemp(Ity_I32);
+-//   IRTemp n8  = newTemp(Ity_I32);
+-//   IRTemp n9  = newTemp(Ity_I32);
+-//   IRTemp n10 = newTemp(Ity_I32);
+-//   IRTemp n11 = newTemp(Ity_I32);
+-//   IRTemp n12 = newTemp(Ity_I32);
+-//
+-//   /* First, propagate the most significant 1-bit into all lower
+-//      positions in the word. */
+-//   /* unsigned int clz ( unsigned int n )
+-//      {
+-//         n |= (n >> 1);
+-//         n |= (n >> 2);
+-//         n |= (n >> 4);
+-//         n |= (n >> 8);
+-//         n |= (n >> 16);
+-//         return bitcount(~n);
+-//      }
+-//   */
+-//   assign(n1, mkexpr(arg));
+-//   assign(n2, binop(Iop_Or32, mkexpr(n1), binop(Iop_Shr32, mkexpr(n1), mkU8(1))));
+-//   assign(n3, binop(Iop_Or32, mkexpr(n2), binop(Iop_Shr32, mkexpr(n2), mkU8(2))));
+-//   assign(n4, binop(Iop_Or32, mkexpr(n3), binop(Iop_Shr32, mkexpr(n3), mkU8(4))));
+-//   assign(n5, binop(Iop_Or32, mkexpr(n4), binop(Iop_Shr32, mkexpr(n4), mkU8(8))));
+-//   assign(n6, binop(Iop_Or32, mkexpr(n5), binop(Iop_Shr32, mkexpr(n5), mkU8(16))));
+-//   /* This gives a word of the form 0---01---1.  Now invert it, giving
+-//      a word of the form 1---10---0, then do a population-count idiom
+-//      (to count the 1s, which is the number of leading zeroes, or 32
+-//      if the original word was 0. */
+-//   assign(n7, unop(Iop_Not32, mkexpr(n6)));
+-//
+-//   /* unsigned int bitcount ( unsigned int n )
+-//      {
+-//         n = n - ((n >> 1) & 0x55555555);
+-//         n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
+-//         n = (n + (n >> 4)) & 0x0F0F0F0F;
+-//         n = n + (n >> 8);
+-//         n = (n + (n >> 16)) & 0x3F;
+-//         return n;
+-//      }
+-//   */
+-//   assign(n8, 
+-//          binop(Iop_Sub32, 
+-//                mkexpr(n7),  
+-//                binop(Iop_And32, 
+-//                      binop(Iop_Shr32, mkexpr(n7), mkU8(1)),
+-//                      mkU32(0x55555555))));
+-//   assign(n9,
+-//          binop(Iop_Add32,
+-//                binop(Iop_And32, mkexpr(n8), mkU32(0x33333333)),
+-//                binop(Iop_And32,
+-//                      binop(Iop_Shr32, mkexpr(n8), mkU8(2)),
+-//                      mkU32(0x33333333))));
+-//   assign(n10,
+-//          binop(Iop_And32,
+-//                binop(Iop_Add32, 
+-//                      mkexpr(n9), 
+-//                      binop(Iop_Shr32, mkexpr(n9), mkU8(4))),
+-//                mkU32(0x0F0F0F0F)));
+-//   assign(n11,
+-//          binop(Iop_Add32,
+-//                mkexpr(n10),
+-//                binop(Iop_Shr32, mkexpr(n10), mkU8(8))));
+-//   assign(n12,
+-//          binop(Iop_Add32,
+-//                mkexpr(n11),
+-//                binop(Iop_Shr32, mkexpr(n11), mkU8(16))));
+-//   return
+-//      binop(Iop_And32, mkexpr(n12), mkU32(0x3F));
+-//}
+-
+ /*--------------------------------------------------------------------*/
+ /*--- end                                         guest_ppc_toIR.c ---*/
+ /*--------------------------------------------------------------------*/
--- a/SOURCES/valgrind-3.14.0-ppc-instr-new-IROps.patch
+++ b/SOURCES/valgrind-3.14.0-ppc-instr-new-IROps.patch
@ -0,0 +1,257 @@
+commit 97d336b79e36f6c99d8b07f49ebc9b780e6df84e
+Author: Julian Seward <jseward@acm.org>
+Date:   Tue Nov 20 11:07:37 2018 +0100
+
+    Add ppc host-side isel and instruction support for IROps added in previous commit.
+    
+    VEX/priv/host_ppc_defs.c, VEX/priv/host_ppc_defs.h:
+    
+    Dont emit cnttz{w,d}.  We may need them on a target which doesn't support
+    them.  Instead we can generate a fairly reasonable alternative sequence with
+    cntlz{w,d} instead.
+    
+    Add support for emitting popcnt{w,d}.
+    
+    VEX/priv/host_ppc_isel.c
+    
+    Add support for: Iop_ClzNat32 Iop_ClzNat64
+    
+    Redo support for: Iop_Ctz{32,64} and their Nat equivalents, so as to not use
+    cnttz{w,d}, as mentioned above.
+    
+    Add support for: Iop_PopCount64 Iop_PopCount32 Iop_Reverse8sIn32_x1
+
+diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c
+index b073c1d..f4b52e4 100644
+--- a/VEX/priv/host_ppc_defs.c
+++ b/VEX/priv/host_ppc_defs.c
+@@ -501,9 +501,9 @@ const HChar* showPPCUnaryOp ( PPCUnaryOp op ) {
+    case Pun_NEG:   return "neg";
+    case Pun_CLZ32: return "cntlzw";
+    case Pun_CLZ64: return "cntlzd";
+-   case Pun_CTZ32: return "cnttzw";
+-   case Pun_CTZ64: return "cnttzd";
+    case Pun_EXTSW: return "extsw";
+   case Pun_POP32: return "popcntw";
+   case Pun_POP64: return "popcntd";
+    default: vpanic("showPPCUnaryOp");
+    }
+ }
+@@ -4265,20 +4265,19 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
+          vassert(mode64);
+          p = mkFormX(p, 31, r_src, r_dst, 0, 58, 0, endness_host);
+          break;
+-      case Pun_CTZ32:  // cnttzw r_dst, r_src
+-         /* Note oder of src and dst is backwards from normal */
+-         p = mkFormX(p, 31, r_src, r_dst, 0, 538, 0, endness_host);
+-         break;
+-      case Pun_CTZ64:  // cnttzd r_dst, r_src
+-         /* Note oder of src and dst is backwards from normal */
+-         vassert(mode64);
+-         p = mkFormX(p, 31, r_src, r_dst, 0, 570, 0, endness_host);
+-         break;
+       case Pun_EXTSW:  // extsw r_dst, r_src
+          vassert(mode64);
+          p = mkFormX(p, 31, r_src, r_dst, 0, 986, 0, endness_host);
+          break;
+-      default: goto bad;
+      case Pun_POP32:  // popcntw r_dst, r_src
+         p = mkFormX(p, 31, r_src, r_dst, 0, 378, 0, endness_host);
+         break;
+      case Pun_POP64:  // popcntd r_dst, r_src
+         vassert(mode64);
+         p = mkFormX(p, 31, r_src, r_dst, 0, 506, 0, endness_host);
+         break;
+      default:
+         goto bad;
+       }
+       goto done;
+    }
+diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h
+index 17baff5..321fba9 100644
+--- a/VEX/priv/host_ppc_defs.h
+++ b/VEX/priv/host_ppc_defs.h
+@@ -291,9 +291,9 @@ typedef
+       Pun_NOT,
+       Pun_CLZ32,
+       Pun_CLZ64,
+-      Pun_CTZ32,
+-      Pun_CTZ64,
+-      Pun_EXTSW
+      Pun_EXTSW,
+      Pun_POP32, // popcntw
+      Pun_POP64  // popcntd
+    }
+    PPCUnaryOp;
+ 
+diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
+index 6bdb5f7..5242176 100644
+--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
+@@ -2065,12 +2065,15 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
+             return r_dst;
+          }
+          break;
+-      case Iop_Clz32:
+-      case Iop_Clz64: {
+
+      case Iop_Clz32: case Iop_ClzNat32:
+      case Iop_Clz64: case Iop_ClzNat64: {
+         // cntlz is available even in the most basic (earliest) ppc
+         // variants, so it's safe to generate it unconditionally.
+          HReg r_src, r_dst;
+-         PPCUnaryOp op_clz = (op_unop == Iop_Clz32) ? Pun_CLZ32 :
+-                                                      Pun_CLZ64;
+-         if (op_unop == Iop_Clz64 && !mode64)
+         PPCUnaryOp op_clz = (op_unop == Iop_Clz32 || op_unop == Iop_ClzNat32)
+                                ? Pun_CLZ32 : Pun_CLZ64;
+         if ((op_unop == Iop_Clz64 || op_unop == Iop_ClzNat64) && !mode64)
+             goto irreducible;
+          /* Count leading zeroes. */
+          r_dst = newVRegI(env);
+@@ -2079,18 +2082,133 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
+          return r_dst;
+       }
+ 
+-      case Iop_Ctz32:
+-      case Iop_Ctz64: {
+-         HReg r_src, r_dst;
+-         PPCUnaryOp op_clz = (op_unop == Iop_Ctz32) ? Pun_CTZ32 :
+-                                                      Pun_CTZ64;
+-         if (op_unop == Iop_Ctz64 && !mode64)
+-            goto irreducible;
+-         /* Count trailing zeroes. */
+-         r_dst = newVRegI(env);
+-         r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+-         addInstr(env, PPCInstr_Unary(op_clz,r_dst,r_src));
+-         return r_dst;
+      //case Iop_Ctz32:
+      case Iop_CtzNat32:
+      //case Iop_Ctz64:
+      case Iop_CtzNat64:
+      {
+         // Generate code using Clz, because we can't assume the host has
+         // Ctz.  In particular, part of the fix for bug 386945 involves
+         // creating a Ctz in ir_opt.c from smaller fragments.
+         PPCUnaryOp op_clz = Pun_CLZ64;
+         Int WS = 64;
+         if (op_unop == Iop_Ctz32 || op_unop == Iop_CtzNat32) {
+            op_clz = Pun_CLZ32;
+            WS = 32;
+         }
+         /* Compute ctz(arg) = wordsize - clz(~arg & (arg - 1)), thusly:
+            t1 = arg - 1
+            t2 = not arg
+            t2 = t2 & t1
+            t2 = clz t2
+            t1 = WS
+            t2 = t1 - t2
+            // result in t2
+         */
+         HReg arg = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+         HReg t1 = newVRegI(env);
+         HReg t2 = newVRegI(env);
+         addInstr(env, PPCInstr_Alu(Palu_SUB, t1, arg, PPCRH_Imm(True, 1)));
+         addInstr(env, PPCInstr_Unary(Pun_NOT, t2, arg));
+         addInstr(env, PPCInstr_Alu(Palu_AND, t2, t2, PPCRH_Reg(t1)));
+         addInstr(env, PPCInstr_Unary(op_clz, t2, t2));
+         addInstr(env, PPCInstr_LI(t1, WS, False/*!64-bit imm*/));
+         addInstr(env, PPCInstr_Alu(Palu_SUB, t2, t1, PPCRH_Reg(t2)));
+         return t2;
+      }
+
+      case Iop_PopCount64: {
+         // popcnt{x,d} is only available in later arch revs (ISA 3.0,
+         // maybe) so it's not really correct to emit it here without a caps
+         // check for the host.
+         if (mode64) {
+            HReg r_dst = newVRegI(env);
+            HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+            addInstr(env, PPCInstr_Unary(Pun_POP64, r_dst, r_src));
+            return r_dst;
+         }
+         // We don't expect to be required to handle this in 32-bit mode.
+         break;
+      }
+
+      case Iop_PopCount32: {
+         // Similar comment as for Ctz just above applies -- we really
+         // should have a caps check here.
+
+        HReg r_dst = newVRegI(env);
+        // This actually generates popcntw, which in 64 bit mode does a
+        // 32-bit count individually for both low and high halves of the
+        // word.  Per the comment at the top of iselIntExpr_R, in the 64
+        // bit mode case, the user of this result is required to ignore
+        // the upper 32 bits of the result.  In 32 bit mode this is all
+        // moot.  It is however unclear from the PowerISA 3.0 docs that
+        // the instruction exists in 32 bit mode; however our own front
+        // end (guest_ppc_toIR.c) accepts it, so I guess it does exist.
+        HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+        addInstr(env, PPCInstr_Unary(Pun_POP32, r_dst, r_src));
+        return r_dst;
+      }
+
+      case Iop_Reverse8sIn32_x1: {
+         // A bit of a mouthful, but simply .. 32-bit byte swap.
+         // This is pretty rubbish code.  We could do vastly better if
+         // rotates, and better, rotate-inserts, were allowed.  Note that
+         // even on a 64 bit target, the right shifts must be done as 32-bit
+         // so as to introduce zero bits in the right places.  So it seems
+         // simplest to do the whole sequence in 32-bit insns.
+         /*
+            r     = <argument>  // working temporary, initial byte order ABCD
+            Mask  = 00FF00FF
+            nMask = not Mask
+            tHi   = and r, Mask
+            tHi   = shl tHi, 8
+            tLo   = and r, nMask
+            tLo   = shr tLo, 8
+            r     = or tHi, tLo  // now r has order BADC
+            and repeat for 16 bit chunks ..
+            Mask  = 0000FFFF
+            nMask = not Mask
+            tHi   = and r, Mask
+            tHi   = shl tHi, 16
+            tLo   = and r, nMask
+            tLo   = shr tLo, 16
+            r     = or tHi, tLo  // now r has order DCBA
+         */
+         HReg r_src  = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+         HReg rr     = newVRegI(env);
+         HReg rMask  = newVRegI(env);
+         HReg rnMask = newVRegI(env);
+         HReg rtHi   = newVRegI(env);
+         HReg rtLo   = newVRegI(env);
+         // Copy r_src since we need to modify it
+         addInstr(env, mk_iMOVds_RR(rr, r_src));
+         // Swap within 16-bit lanes
+         addInstr(env, PPCInstr_LI(rMask, 0x00FF00FFULL,
+                                   False/* !64bit imm*/));
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
+                                     rtHi, rtHi,
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
+                                     rtLo, rtLo,
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
+         // And now swap the two 16-bit chunks
+         addInstr(env, PPCInstr_LI(rMask, 0x0000FFFFULL,
+                                   False/* !64bit imm*/));
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32 bit shift*/,
+                                     rtHi, rtHi,
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, True/*32 bit shift*/,
+                                     rtLo, rtLo,
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
+         return rr;
+       }
+ 
+       case Iop_Left8:
--- a/SOURCES/valgrind-3.14.0-ppc64-ldbrx.patch
+++ b/SOURCES/valgrind-3.14.0-ppc64-ldbrx.patch
@ -0,0 +1,130 @@
+commit 7bdd6731f8337fd57bf91772aa1917e44239d7c2
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Fri Dec 7 10:42:22 2018 -0500
+
+    Implement ppc64 ldbrx as 64-bit load and Iop_Reverse8sIn64_x1.
+    
+    This makes it possible for memcheck to analyse the new gcc strcmp
+    inlined code correctly even if the ldbrx load is partly beyond an
+    addressable block.
+    
+    Partially resolves bug 386945.
+
+diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
+index 8977d4f..a81dace 100644
+--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
+@@ -9178,24 +9178,28 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
+ 
+       case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
+       {
+-         // JRS FIXME:
+-         // * is the host_endness conditional below actually necessary?
+-         // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
+-         //   That would be a lot more efficient.
+-         IRExpr * nextAddr;
+-         IRTemp w3 = newTemp( Ity_I32 );
+-         IRTemp w4 = newTemp( Ity_I32 );
+-         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+-         assign( w1, load( Ity_I32, mkexpr( EA ) ) );
+-         assign( w2, gen_byterev32( w1 ) );
+-         nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
+-                           ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );
+-         assign( w3, load( Ity_I32, nextAddr ) );
+-         assign( w4, gen_byterev32( w3 ) );
+-         if (host_endness == VexEndnessLE)
+-            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w2 ), mkexpr( w4 ) ) );
+         /* Caller makes sure we are only called in mode64. */
+
+         /* If we supported swapping LE/BE loads in the backend then we could
+            just load the value with the bytes reversed by doing a BE load
+            on an LE machine and a LE load on a BE machine.
+
+         IRTemp dw1 = newTemp(Ity_I64);
+         if (host_endness == VexEndnessBE)
+            assign( dw1, IRExpr_Load(Iend_LE, Ity_I64, mkexpr(EA)));
+          else
+-            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
+            assign( dw1, IRExpr_Load(Iend_BE, Ity_I64, mkexpr(EA)));
+         putIReg( rD_addr, mkexpr(dw1) );
+
+         But since we currently don't we load the value as is and then
+         switch it around with Iop_Reverse8sIn64_x1. */
+
+         IRTemp dw1 = newTemp(Ity_I64);
+         IRTemp dw2 = newTemp(Ity_I64);
+         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+         assign( dw1, load(Ity_I64, mkexpr(EA)) );
+         assign( dw2, unop(Iop_Reverse8sIn64_x1, mkexpr(dw1)) );
+         putIReg( rD_addr, mkexpr(dw2) );
+          break;
+       }
+ 
+diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
+index 750cf8d..4fc3eb5 100644
+--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
+@@ -2210,6 +2210,63 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
+          return rr;
+       }
+ 
+      case Iop_Reverse8sIn64_x1: {
+	 /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
+            Can only be used in 64bit mode.  */
+         vassert (mode64);
+
+         HReg r_src  = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
+         HReg rr     = newVRegI(env);
+         HReg rMask  = newVRegI(env);
+         HReg rnMask = newVRegI(env);
+         HReg rtHi   = newVRegI(env);
+         HReg rtLo   = newVRegI(env);
+
+         // Copy r_src since we need to modify it
+         addInstr(env, mk_iMOVds_RR(rr, r_src));
+
+         // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
+         addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
+                                   True/* 64bit imm*/));
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
+                                     rtHi, rtHi,
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
+                                     rtLo, rtLo,
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
+
+         // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
+         addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
+                                   True/* !64bit imm*/));
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
+                                     rtHi, rtHi,
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
+                                     rtLo, rtLo,
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
+
+         // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
+         /* We don't need to mask anymore, just two more shifts and an or.  */
+         addInstr(env, mk_iMOVds_RR(rtLo, rr));
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
+                                     rtLo, rtLo,
+                                     PPCRH_Imm(False/*!signed imm*/, 32)));
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
+                                     rr, rr,
+                                     PPCRH_Imm(False/*!signed imm*/, 32)));
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
+
+         return rr;
+      }
+
+       case Iop_Left8:
+       case Iop_Left16:
+       case Iop_Left32: 
--- a/SOURCES/valgrind-3.14.0-ppc64-lxvb16x.patch
+++ b/SOURCES/valgrind-3.14.0-ppc64-lxvb16x.patch
@ -0,0 +1,88 @@
+commit 5c00e04a1b61475a7f731f8cfede114201815e0a
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Sun Dec 9 23:25:05 2018 +0100
+
+    Implement ppc64 lxvb16x as 128-bit vector load with reversed double words.
+    
+    This makes it possible for memcheck to know which part of the 128bit
+    vector is defined, even if the load is partly beyond an addressable block.
+    
+    Partially resolves bug 386945.
+
+diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
+index 7af4973..ec2f90a 100644
+--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
+@@ -20702,54 +20702,29 @@ dis_vx_load ( UInt theInstr )
+    {
+       DIP("lxvb16x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
+ 
+-      IRTemp byte[16];
+-      int i;
+-      UInt ea_off = 0;
+-      IRExpr* irx_addr;
+-      IRTemp tmp_low[9];
+-      IRTemp tmp_hi[9];
+      /* The result of lxvb16x should be the same on big and little
+         endian systems. We do a host load, then reverse the bytes in
+         the double words. If the host load was little endian we swap
+         them around again. */
+ 
+-      tmp_low[0] = newTemp( Ity_I64 );
+-      tmp_hi[0] = newTemp( Ity_I64 );
+-      assign( tmp_low[0], mkU64( 0 ) );
+-      assign( tmp_hi[0], mkU64( 0 ) );
+-
+-      for ( i = 0; i < 8; i++ ) {
+-         byte[i] = newTemp( Ity_I64 );
+-         tmp_low[i+1] = newTemp( Ity_I64 );
+-
+-         irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
+-                           ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
+-         ea_off += 1;
+-
+-         assign( byte[i], binop( Iop_Shl64,
+-                                 unop( Iop_8Uto64,
+-                                       load( Ity_I8, irx_addr ) ),
+-                                 mkU8( 8 * ( 7 - i ) ) ) );
+      IRTemp high = newTemp(Ity_I64);
+      IRTemp high_rev = newTemp(Ity_I64);
+      IRTemp low = newTemp(Ity_I64);
+      IRTemp low_rev = newTemp(Ity_I64);
+ 
+-         assign( tmp_low[i+1],
+-                 binop( Iop_Or64,
+-                        mkexpr( byte[i] ), mkexpr( tmp_low[i] ) ) );
+-      }
+      IRExpr *t128 = load( Ity_V128, mkexpr( EA ) );
+ 
+-      for ( i = 0; i < 8; i++ ) {
+-         byte[i + 8] = newTemp( Ity_I64 );
+-         tmp_hi[i+1] = newTemp( Ity_I64 );
+      assign( high, unop(Iop_V128HIto64, t128) );
+      assign( high_rev, unop(Iop_Reverse8sIn64_x1, mkexpr(high)) );
+      assign( low, unop(Iop_V128to64, t128) );
+      assign( low_rev, unop(Iop_Reverse8sIn64_x1, mkexpr(low)) );
+ 
+-         irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
+-                           ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
+-         ea_off += 1;
+      if (host_endness == VexEndnessLE)
+         t128 = binop( Iop_64HLtoV128, mkexpr (low_rev), mkexpr (high_rev) );
+      else
+         t128 = binop( Iop_64HLtoV128, mkexpr (high_rev), mkexpr (low_rev) );
+ 
+-         assign( byte[i+8], binop( Iop_Shl64,
+-                                   unop( Iop_8Uto64,
+-                                         load( Ity_I8, irx_addr ) ),
+-                                   mkU8( 8 * ( 7 - i ) ) ) );
+-         assign( tmp_hi[i+1], binop( Iop_Or64,
+-                                     mkexpr( byte[i+8] ),
+-                                     mkexpr( tmp_hi[i] ) ) );
+-      }
+-      putVSReg( XT, binop( Iop_64HLtoV128,
+-                           mkexpr( tmp_low[8] ), mkexpr( tmp_hi[8] ) ) );
+      putVSReg( XT, t128 );
+       break;
+    }
+ 
--- a/SOURCES/valgrind-3.14.0-ppc64-lxvd2x.patch
+++ b/SOURCES/valgrind-3.14.0-ppc64-lxvd2x.patch
@ -0,0 +1,47 @@
+commit b7d65cab4f3e9a6f66a496e723e53ed736c4d2e7
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Sun Dec 9 00:55:42 2018 +0100
+
+    Implement ppc64 lxvd2x as 128-bit load with double word swap for ppc64le.
+    
+    This makes it possible for memcheck to know which part of the 128bit
+    vector is defined, even if the load is partly beyond an addressable block.
+    
+    Partially resolves bug 386945.
+
+diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
+index a81dace..7af4973 100644
+--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
+@@ -20590,16 +20590,22 @@ dis_vx_load ( UInt theInstr )
+    }
+    case 0x34C: // lxvd2x
+    {
+-      IROp addOp = ty == Ity_I64 ? Iop_Add64 : Iop_Add32;
+-      IRExpr * high, *low;
+-      ULong ea_off = 8;
+-      IRExpr* high_addr;
+      IRExpr *t128;
+       DIP("lxvd2x %d,r%u,r%u\n", XT, rA_addr, rB_addr);
+-      high = load( Ity_I64, mkexpr( EA ) );
+-      high_addr = binop( addOp, mkexpr( EA ), ty == Ity_I64 ? mkU64( ea_off )
+-            : mkU32( ea_off ) );
+-      low = load( Ity_I64, high_addr );
+-      putVSReg( XT, binop( Iop_64HLtoV128, high, low ) );
+      t128 = load( Ity_V128, mkexpr( EA ) );
+
+      /* The data in the vec register should be in big endian order.
+         So if we just did a little endian load then swap around the
+         high and low double words. */
+      if (host_endness == VexEndnessLE) {
+         IRTemp high = newTemp(Ity_I64);
+         IRTemp low = newTemp(Ity_I64);
+         assign( high, unop(Iop_V128HIto64, t128) );
+         assign( low, unop(Iop_V128to64, t128) );
+         t128 = binop( Iop_64HLtoV128, mkexpr (low), mkexpr (high) );
+      }
+
+      putVSReg( XT, t128 );
+       break;
+    }
+    case 0x14C: // lxvdsx
--- a/SOURCES/valgrind-3.14.0-ppc64-ptrace.patch
+++ b/SOURCES/valgrind-3.14.0-ppc64-ptrace.patch
@ -0,0 +1,111 @@
+commit 3967a99c26e8b314634a6b1fd8927cbb2bb5d060
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Wed Dec 12 14:11:29 2018 +0100
+
+    Implement minimal ptrace support for ppc64[le]-linux.
+
+diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c
+index 6549dd1..0fdcc8e 100644
+--- a/coregrind/m_syswrap/syswrap-ppc64-linux.c
+++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c
+@@ -388,6 +388,7 @@ DECL_TEMPLATE(ppc64_linux, sys_mmap);
+ //zz DECL_TEMPLATE(ppc64_linux, sys_sigreturn);
+ DECL_TEMPLATE(ppc64_linux, sys_rt_sigreturn);
+ DECL_TEMPLATE(ppc64_linux, sys_fadvise64);
+DECL_TEMPLATE(ppc64_linux, sys_ptrace);
+ 
+ PRE(sys_mmap)
+ {
+@@ -511,6 +512,72 @@ PRE(sys_rt_sigreturn)
+    *flags |= SfPollAfter;
+ }
+ 
+// ARG3 is only used for pointers into the traced process's address
+// space and for offsets into the traced process's struct
+// user_regs_struct. It is never a pointer into this process's memory
+// space, and we should therefore not check anything it points to.
+// powerpc does have other ways to get/set registers, we only support
+// GET/SETREGSET for now.
+PRE(sys_ptrace)
+{
+   PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4);
+   PRE_REG_READ4(int, "ptrace",
+                 long, request, long, pid, long, addr, long, data);
+   switch (ARG1) {
+   case VKI_PTRACE_PEEKTEXT:
+   case VKI_PTRACE_PEEKDATA:
+   case VKI_PTRACE_PEEKUSR:
+      PRE_MEM_WRITE( "ptrace(peek)", ARG4,
+                     sizeof (long));
+      break;
+   case VKI_PTRACE_GETEVENTMSG:
+      PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
+      break;
+   case VKI_PTRACE_GETSIGINFO:
+      PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
+      break;
+   case VKI_PTRACE_SETSIGINFO:
+      PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
+      break;
+   case VKI_PTRACE_GETREGSET:
+      ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
+      break;
+   case VKI_PTRACE_SETREGSET:
+      ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
+      break;
+   default:
+      break;
+   }
+}
+
+POST(sys_ptrace)
+{
+   switch (ARG1) {
+   case VKI_PTRACE_TRACEME:
+      ML_(linux_POST_traceme)(tid);
+      break;
+   case VKI_PTRACE_PEEKTEXT:
+   case VKI_PTRACE_PEEKDATA:
+   case VKI_PTRACE_PEEKUSR:
+      POST_MEM_WRITE( ARG4, sizeof (long));
+      break;
+   case VKI_PTRACE_GETEVENTMSG:
+      POST_MEM_WRITE( ARG4, sizeof(unsigned long));
+      break;
+   case VKI_PTRACE_GETSIGINFO:
+      /* XXX: This is a simplification. Different parts of the
+       * siginfo_t are valid depending on the type of signal.
+       */
+      POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
+      break;
+   case VKI_PTRACE_GETREGSET:
+      ML_(linux_POST_getregset)(tid, ARG3, ARG4);
+      break;
+   default:
+      break;
+   }
+}
+
+ #undef PRE
+ #undef POST
+ 
+@@ -562,8 +629,7 @@ static SyscallTableEntry syscall_table[] = {
+    GENX_(__NR_getuid,            sys_getuid),             //  24
+ 
+ // _____(__NR_stime,             sys_stime),              //  25
+-// When ptrace is supported, memcheck/tests/linux/getregset should be enabled
+-// _____(__NR_ptrace,            sys_ptrace),             //  26
+   PLAXY(__NR_ptrace,            sys_ptrace),             //  26
+    GENX_(__NR_alarm,             sys_alarm),              //  27
+ // _____(__NR_oldfstat,          sys_oldfstat),           //  28
+    GENX_(__NR_pause,             sys_pause),              //  29
+diff --git a/memcheck/tests/linux/getregset.vgtest b/memcheck/tests/linux/getregset.vgtest
+index 4c66108..c35be4c 100644
+--- a/memcheck/tests/linux/getregset.vgtest
+++ b/memcheck/tests/linux/getregset.vgtest
+@@ -1,4 +1,4 @@
+ prog: getregset
+ vgopts: -q
+-prereq: ((../../../tests/os_test linux 2.6.33 && ! ../../../tests/arch_test mips32) || ../../../tests/os_test linux 3.10.0 ) && ! ../../../tests/arch_test ppc64
+prereq: ((../../../tests/os_test linux 2.6.33 && ! ../../../tests/arch_test mips32) || ../../../tests/os_test linux 3.10.0 )
+ 
--- a/SOURCES/valgrind-3.14.0-ppc64-unaligned-vecs.patch
+++ b/SOURCES/valgrind-3.14.0-ppc64-unaligned-vecs.patch
@ -0,0 +1,28 @@
+commit 321771ee63740333ad355244e0764295218843b8
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Sun Dec 9 14:26:39 2018 +0100
+
+    memcheck: Allow unaligned loads of 128bit vectors on ppc64[le].
+    
+    On powerpc partial unaligned loads of vectors from partially invalid
+    addresses are OK and could be generated by our translation of lxvd2x.
+    
+    Adjust partial_load memcheck tests to allow partial loads of 16 byte
+    vectors on powerpc64.
+    
+    Part of resolving bug #386945.
+
+diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
+index 737f79d..101916b 100644
+--- a/memcheck/mc_main.c
+++ b/memcheck/mc_main.c
+@@ -1354,6 +1354,9 @@ void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
+    tl_assert(szB == 16); // s390 doesn't have > 128 bit SIMD
+    /* OK if all loaded bytes are from the same page. */
+    Bool alignedOK = ((a & 0xfff) <= 0x1000 - szB);
+#  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
+   /* lxvd2x might generate an unaligned 128 bit vector load.  */
+   Bool alignedOK = (szB == 16);
+ #  else
+    /* OK if the address is aligned by the load size. */
+    Bool alignedOK = (0 == (a & (szB - 1)));
--- a/SOURCES/valgrind-3.14.0-ppc64-unaligned-words.patch
+++ b/SOURCES/valgrind-3.14.0-ppc64-unaligned-words.patch
@ -0,0 +1,148 @@
+commit c5a5bea00af75f6ac50da10967d956f117b956f1
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Sat Dec 8 13:47:43 2018 -0500
+
+    memcheck: Allow unaligned loads of words on ppc64[le].
+    
+    On powerpc partial unaligned loads of words from partially invalid
+    addresses are OK and could be generated by our translation of ldbrx.
+    
+    Adjust partial_load memcheck tests to allow partial loads of words
+    on powerpc64.
+    
+    Part of resolving bug #386945.
+
+diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
+index 3ef7cb9..737f79d 100644
+--- a/memcheck/mc_main.c
+++ b/memcheck/mc_main.c
+@@ -1508,6 +1508,9 @@ ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
+ #  if defined(VGA_mips64) && defined(VGABI_N32)
+    if (szB == VG_WORDSIZE * 2 && VG_IS_WORD_ALIGNED(a)
+        && n_addrs_bad < VG_WORDSIZE * 2)
+#  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
+   /* On power unaligned loads of words are OK. */
+   if (szB == VG_WORDSIZE && n_addrs_bad < VG_WORDSIZE)
+ #  else
+    if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
+        && n_addrs_bad < VG_WORDSIZE)
+diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
+index 2af4dd1..70b8ada 100644
+--- a/memcheck/tests/Makefile.am
+++ b/memcheck/tests/Makefile.am
+@@ -235,8 +235,10 @@ EXTRA_DIST = \
+ 	partiallydefinedeq.stdout.exp \
+ 	partial_load_ok.vgtest partial_load_ok.stderr.exp \
+ 		partial_load_ok.stderr.exp64 \
+		partial_load_ok.stderr.exp-ppc64 \
+ 	partial_load_dflt.vgtest partial_load_dflt.stderr.exp \
+ 		partial_load_dflt.stderr.exp64 \
+		partial_load_dflt.stderr.exp-ppc64 \
+ 	partial_load_dflt.stderr.expr-s390x-mvc \
+ 	pdb-realloc.stderr.exp pdb-realloc.vgtest \
+ 	pdb-realloc2.stderr.exp pdb-realloc2.stdout.exp pdb-realloc2.vgtest \
+diff --git a/memcheck/tests/partial_load.c b/memcheck/tests/partial_load.c
+index 0b2f10b..685ca8d 100644
+--- a/memcheck/tests/partial_load.c
+++ b/memcheck/tests/partial_load.c
+@@ -1,14 +1,14 @@
+-
+#include <stdio.h>
+ #include <stdlib.h>
+ #include <assert.h>
+ 
+ int main ( void )
+ {
+-  long  w;
+-  int   i;
+-  char* p;
+-
+  long  w; int   i; char* p;
+   assert(sizeof(long) == sizeof(void*));
+#if defined(__powerpc64__)
+  fprintf (stderr, "powerpc64\n"); /* Used to select correct .exp file.  */
+#endif
+ 
+   /* partial load, which --partial-loads-ok=yes should suppress */
+   p = calloc( sizeof(long)-1, 1 );
+@@ -16,7 +16,7 @@ int main ( void )
+   w = *(long*)p;
+   free(p);
+ 
+-  /* partial but misaligned, cannot be suppressed */
+  /* partial but misaligned, ppc64[le] ok, but otherwise cannot be suppressed */
+   p = calloc( sizeof(long), 1 );
+   assert(p);
+   p++;
+diff --git a/memcheck/tests/partial_load_dflt.stderr.exp-ppc64 b/memcheck/tests/partial_load_dflt.stderr.exp-ppc64
+new file mode 100644
+index 0000000..cf32bcf
+--- /dev/null
+++ b/memcheck/tests/partial_load_dflt.stderr.exp-ppc64
+@@ -0,0 +1,23 @@
+
+powerpc64
+Invalid read of size 2
+   at 0x........: main (partial_load.c:30)
+ Address 0x........ is 0 bytes inside a block of size 1 alloc'd
+   at 0x........: calloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial_load.c:28)
+
+Invalid read of size 8
+   at 0x........: main (partial_load.c:37)
+ Address 0x........ is 0 bytes inside a block of size 8 free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: main (partial_load.c:36)
+
+
+HEAP SUMMARY:
+    in use at exit: ... bytes in ... blocks
+  total heap usage: ... allocs, ... frees, ... bytes allocated
+
+For a detailed leak analysis, rerun with: --leak-check=full
+
+For counts of detected and suppressed errors, rerun with: -v
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+diff --git a/memcheck/tests/partial_load_ok.stderr.exp-ppc64 b/memcheck/tests/partial_load_ok.stderr.exp-ppc64
+new file mode 100644
+index 0000000..cf32bcf
+--- /dev/null
+++ b/memcheck/tests/partial_load_ok.stderr.exp-ppc64
+@@ -0,0 +1,23 @@
+
+powerpc64
+Invalid read of size 2
+   at 0x........: main (partial_load.c:30)
+ Address 0x........ is 0 bytes inside a block of size 1 alloc'd
+   at 0x........: calloc (vg_replace_malloc.c:...)
+   by 0x........: main (partial_load.c:28)
+
+Invalid read of size 8
+   at 0x........: main (partial_load.c:37)
+ Address 0x........ is 0 bytes inside a block of size 8 free'd
+   at 0x........: free (vg_replace_malloc.c:...)
+   by 0x........: main (partial_load.c:36)
+
+
+HEAP SUMMARY:
+    in use at exit: ... bytes in ... blocks
+  total heap usage: ... allocs, ... frees, ... bytes allocated
+
+For a detailed leak analysis, rerun with: --leak-check=full
+
+For counts of detected and suppressed errors, rerun with: -v
+ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
+diff -ur valgrind-3.14.0.orig/memcheck/tests/Makefile.in valgrind-3.14.0/memcheck/tests/Makefile.in
+--- valgrind-3.14.0.orig/memcheck/tests/Makefile.in	2018-12-12 23:17:07.525501080 +0100
+++ valgrind-3.14.0/memcheck/tests/Makefile.in	2018-12-12 23:18:13.404014757 +0100
+@@ -1546,8 +1546,10 @@
+ 	partiallydefinedeq.stdout.exp \
+ 	partial_load_ok.vgtest partial_load_ok.stderr.exp \
+ 		partial_load_ok.stderr.exp64 \
+		partial_load_ok.stderr.exp-ppc64 \
+ 	partial_load_dflt.vgtest partial_load_dflt.stderr.exp \
+ 		partial_load_dflt.stderr.exp64 \
+		partial_load_dflt.stderr.exp-ppc64 \
+ 	partial_load_dflt.stderr.expr-s390x-mvc \
+ 	pdb-realloc.stderr.exp pdb-realloc.vgtest \
+ 	pdb-realloc2.stderr.exp pdb-realloc2.stdout.exp pdb-realloc2.vgtest \
--- a/SOURCES/valgrind-3.14.0-s390x-fix-reg-alloc-vr-vs-fpr.patch
+++ b/SOURCES/valgrind-3.14.0-s390x-fix-reg-alloc-vr-vs-fpr.patch
@ -0,0 +1,84 @@
+commit 71002d8a5111d02ce8049c55017a8d948c820e35
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Thu Oct 25 13:47:12 2018 +0200
+
+    Bug 400490 s390x: Fix register allocation for VRs vs FPRs
+    
+    On s390x, if vector registers are available, they are fed to the register
+    allocator as if they were separate from the floating-point registers.  But
+    in fact the FPRs are embedded in the VRs.  So for instance, if both f3 and
+    v3 are allocated and used at the same time, corruption will result.
+    
+    This is fixed by offering only the non-overlapping VRs, v16 to v31, to the
+    register allocator instead.
+
+diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
+index 6c22ac8..98ac938 100644
+--- a/VEX/priv/host_s390_defs.c
+++ b/VEX/priv/host_s390_defs.c
+@@ -59,7 +59,6 @@ static UInt s390_tchain_load64_len(void);
+ 
+ /* A mapping from register number to register index */
+ static Int gpr_index[16];  // GPR regno -> register index
+-static Int fpr_index[16];  // FPR regno -> register index
+ static Int vr_index[32];   // VR regno -> register index
+ 
+ HReg
+@@ -73,7 +72,7 @@ s390_hreg_gpr(UInt regno)
+ HReg
+ s390_hreg_fpr(UInt regno)
+ {
+-   Int ix = fpr_index[regno];
+   Int ix = vr_index[regno];
+    vassert(ix >= 0);
+    return mkHReg(/*virtual*/False, HRcFlt64, regno, ix);
+ }
+@@ -463,11 +462,9 @@ getRRegUniverse_S390(void)
+ 
+    RRegUniverse__init(ru);
+ 
+-   /* Assign invalid values to the gpr/fpr/vr_index */
+   /* Assign invalid values to the gpr/vr_index */
+    for (UInt i = 0; i < sizeof gpr_index / sizeof gpr_index[0]; ++i)
+       gpr_index[i] = -1;
+-   for (UInt i = 0; i < sizeof fpr_index / sizeof fpr_index[0]; ++i)
+-      fpr_index[i] = -1;
+    for (UInt i = 0; i < sizeof vr_index / sizeof vr_index[0]; ++i)
+       vr_index[i] = -1;
+ 
+@@ -494,17 +491,17 @@ getRRegUniverse_S390(void)
+ 
+    ru->allocable_start[HRcFlt64] = ru->size;
+    for (UInt regno = 8; regno <= 15; ++regno) {
+-      fpr_index[regno] = ru->size;
+      vr_index[regno] = ru->size;
+       ru->regs[ru->size++] = s390_hreg_fpr(regno);
+    }
+    for (UInt regno = 0; regno <= 7; ++regno) {
+-      fpr_index[regno] = ru->size;
+      vr_index[regno] = ru->size;
+       ru->regs[ru->size++] = s390_hreg_fpr(regno);
+    }
+    ru->allocable_end[HRcFlt64] = ru->size - 1;
+ 
+    ru->allocable_start[HRcVec128] = ru->size;
+-   for (UInt regno = 0; regno <= 31; ++regno) {
+   for (UInt regno = 16; regno <= 31; ++regno) {
+       vr_index[regno] = ru->size;
+       ru->regs[ru->size++] = s390_hreg_vr(regno);
+    }
+@@ -527,12 +524,12 @@ getRRegUniverse_S390(void)
+    /* Sanity checking */
+    for (UInt i = 0; i < sizeof gpr_index / sizeof gpr_index[0]; ++i)
+       vassert(gpr_index[i] >= 0);
+-   for (UInt i = 0; i < sizeof fpr_index / sizeof fpr_index[0]; ++i)
+-      vassert(fpr_index[i] >= 0);
+    for (UInt i = 0; i < sizeof vr_index / sizeof vr_index[0]; ++i)
+       vassert(vr_index[i] >= 0);
+                  
+    initialised = True;
+
+   RRegUniverse__check_is_sane(ru);
+    return ru;
+ }
+ 
--- a/SOURCES/valgrind-3.14.0-s390x-sign-extend-lochi.patch
+++ b/SOURCES/valgrind-3.14.0-s390x-sign-extend-lochi.patch
@ -0,0 +1,41 @@
+commit 9545e9f96beda6e9f2205bdb3c3e96edaf8d9e2b
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Tue Oct 30 17:06:38 2018 +0100
+
+    Bug 400491 s390x: Sign-extend immediate operand of LOCHI and friends
+    
+    The VEX implementation of each of the z/Architecture instructions LOCHI,
+    LOCHHI, and LOCGHI treats the immediate 16-bit operand as an unsigned
+    integer instead of a signed integer.  This is fixed.
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index 60b6081..9c4d79b 100644
+--- a/VEX/priv/guest_s390_toIR.c
+++ b/VEX/priv/guest_s390_toIR.c
+@@ -16307,7 +16307,7 @@ static const HChar *
+ s390_irgen_LOCHHI(UChar r1, UChar m3, UShort i2, UChar unused)
+ {
+    next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)));
+-   put_gpr_w0(r1, mkU32(i2));
+   put_gpr_w0(r1, mkU32((UInt)(Int)(Short)i2));
+ 
+    return "lochhi";
+ }
+@@ -16316,7 +16316,7 @@ static const HChar *
+ s390_irgen_LOCHI(UChar r1, UChar m3, UShort i2, UChar unused)
+ {
+    next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)));
+-   put_gpr_w1(r1, mkU32(i2));
+   put_gpr_w1(r1, mkU32((UInt)(Int)(Short)i2));
+ 
+    return "lochi";
+ }
+@@ -16325,7 +16325,7 @@ static const HChar *
+ s390_irgen_LOCGHI(UChar r1, UChar m3, UShort i2, UChar unused)
+ {
+    next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)));
+-   put_gpr_dw0(r1, mkU64(i2));
+   put_gpr_dw0(r1, mkU64((UInt)(Int)(Short)i2));
+ 
+    return "locghi";
+ }
--- a/SOURCES/valgrind-3.14.0-s390x-vec-facility-bit.patch
+++ b/SOURCES/valgrind-3.14.0-s390x-vec-facility-bit.patch
@ -0,0 +1,32 @@
+commit 467c7c4c9665c0f8b41a4416722a027ebc05df2b
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Mon Jan 21 14:10:00 2019 +0100
+
+    Bug 403552 s390x: Fix vector facility bit number
+    
+    The wrong bit number was used when checking for the vector facility.  This
+    can result in a fatal emulation error: "Encountered an instruction that
+    requires the vector facility.  That facility is not available on this
+    host."
+    
+    In many cases the wrong facility bit was usually set as well, hence
+    nothing bad happened.  But when running Valgrind within a Qemu/KVM guest,
+    the wrong bit was not (always?) set and the emulation error occurred.
+    
+    This fix simply corrects the vector facility bit number, changing it from
+    128 to 129.
+
+ 
+diff --git a/VEX/pub/libvex_s390x_common.h b/VEX/pub/libvex_s390x_common.h
+index a8a66b96b..8723ee21d 100644
+--- a/VEX/pub/libvex_s390x_common.h
+++ b/VEX/pub/libvex_s390x_common.h
+@@ -103,7 +103,7 @@
+ #define S390_FAC_MSA5    57  // message-security-assist 5
+ #define S390_FAC_TREXE   73  // transactional execution
+ #define S390_FAC_MSA4    77  // message-security-assist 4
+-#define S390_FAC_VX      128 // vector facility
+#define S390_FAC_VX      129 // vector facility
+ 
+ 
+ /*--------------------------------------------------------------*/
--- a/SOURCES/valgrind-3.14.0-s390x-vec-float-point-code.patch
+++ b/SOURCES/valgrind-3.14.0-s390x-vec-float-point-code.patch
--- a/SOURCES/valgrind-3.14.0-s390x-vec-float-point-tests.patch
+++ b/SOURCES/valgrind-3.14.0-s390x-vec-float-point-tests.patch
--- a/SOURCES/valgrind-3.14.0-s390x-vec-reg-vgdb.patch
+++ b/SOURCES/valgrind-3.14.0-s390x-vec-reg-vgdb.patch
@ -0,0 +1,408 @@
+commit 50bd2282bce101012a5668b670cb185375600d2d
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Thu Oct 18 17:51:57 2018 +0200
+
+    Bug 397187 s390x: Add vector register support for vgdb
+    
+    On s390x machines with a vector facility, Valgrind's gdbserver didn't
+    represent the vector registers.  This is fixed.
+
+diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
+index 8de1996..94030fd 100644
+--- a/coregrind/Makefile.am
+++ b/coregrind/Makefile.am
+@@ -685,6 +685,11 @@ GDBSERVER_XML_FILES = \
+ 	m_gdbserver/s390x-linux64-valgrind-s1.xml \
+ 	m_gdbserver/s390x-linux64-valgrind-s2.xml \
+ 	m_gdbserver/s390x-linux64.xml \
+	m_gdbserver/s390-vx-valgrind-s1.xml \
+	m_gdbserver/s390-vx-valgrind-s2.xml \
+	m_gdbserver/s390-vx.xml \
+	m_gdbserver/s390x-vx-linux-valgrind.xml \
+	m_gdbserver/s390x-vx-linux.xml \
+ 	m_gdbserver/mips-cp0-valgrind-s1.xml \
+ 	m_gdbserver/mips-cp0-valgrind-s2.xml \
+ 	m_gdbserver/mips-cp0.xml \
+diff --git a/coregrind/m_gdbserver/s390-vx-valgrind-s1.xml b/coregrind/m_gdbserver/s390-vx-valgrind-s1.xml
+new file mode 100644
+index 0000000..ca461b3
+--- /dev/null
+++ b/coregrind/m_gdbserver/s390-vx-valgrind-s1.xml
+@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.s390.vx-valgrind-s1">
+  <reg name="v0ls1" bitsize="64" type="uint64"/>
+  <reg name="v1ls1" bitsize="64" type="uint64"/>
+  <reg name="v2ls1" bitsize="64" type="uint64"/>
+  <reg name="v3ls1" bitsize="64" type="uint64"/>
+  <reg name="v4ls1" bitsize="64" type="uint64"/>
+  <reg name="v5ls1" bitsize="64" type="uint64"/>
+  <reg name="v6ls1" bitsize="64" type="uint64"/>
+  <reg name="v7ls1" bitsize="64" type="uint64"/>
+  <reg name="v8ls1" bitsize="64" type="uint64"/>
+  <reg name="v9ls1" bitsize="64" type="uint64"/>
+  <reg name="v10ls1" bitsize="64" type="uint64"/>
+  <reg name="v11ls1" bitsize="64" type="uint64"/>
+  <reg name="v12ls1" bitsize="64" type="uint64"/>
+  <reg name="v13ls1" bitsize="64" type="uint64"/>
+  <reg name="v14ls1" bitsize="64" type="uint64"/>
+  <reg name="v15ls1" bitsize="64" type="uint64"/>
+
+  <reg name="v16s1" bitsize="128" type="uint128"/>
+  <reg name="v17s1" bitsize="128" type="uint128"/>
+  <reg name="v18s1" bitsize="128" type="uint128"/>
+  <reg name="v19s1" bitsize="128" type="uint128"/>
+  <reg name="v20s1" bitsize="128" type="uint128"/>
+  <reg name="v21s1" bitsize="128" type="uint128"/>
+  <reg name="v22s1" bitsize="128" type="uint128"/>
+  <reg name="v23s1" bitsize="128" type="uint128"/>
+  <reg name="v24s1" bitsize="128" type="uint128"/>
+  <reg name="v25s1" bitsize="128" type="uint128"/>
+  <reg name="v26s1" bitsize="128" type="uint128"/>
+  <reg name="v27s1" bitsize="128" type="uint128"/>
+  <reg name="v28s1" bitsize="128" type="uint128"/>
+  <reg name="v29s1" bitsize="128" type="uint128"/>
+  <reg name="v30s1" bitsize="128" type="uint128"/>
+  <reg name="v31s1" bitsize="128" type="uint128"/>
+</feature>
+diff --git a/coregrind/m_gdbserver/s390-vx-valgrind-s2.xml b/coregrind/m_gdbserver/s390-vx-valgrind-s2.xml
+new file mode 100644
+index 0000000..eccbd8d
+--- /dev/null
+++ b/coregrind/m_gdbserver/s390-vx-valgrind-s2.xml
+@@ -0,0 +1,43 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.s390.vx-valgrind-s2">
+  <reg name="v0ls2" bitsize="64" type="uint64"/>
+  <reg name="v1ls2" bitsize="64" type="uint64"/>
+  <reg name="v2ls2" bitsize="64" type="uint64"/>
+  <reg name="v3ls2" bitsize="64" type="uint64"/>
+  <reg name="v4ls2" bitsize="64" type="uint64"/>
+  <reg name="v5ls2" bitsize="64" type="uint64"/>
+  <reg name="v6ls2" bitsize="64" type="uint64"/>
+  <reg name="v7ls2" bitsize="64" type="uint64"/>
+  <reg name="v8ls2" bitsize="64" type="uint64"/>
+  <reg name="v9ls2" bitsize="64" type="uint64"/>
+  <reg name="v10ls2" bitsize="64" type="uint64"/>
+  <reg name="v11ls2" bitsize="64" type="uint64"/>
+  <reg name="v12ls2" bitsize="64" type="uint64"/>
+  <reg name="v13ls2" bitsize="64" type="uint64"/>
+  <reg name="v14ls2" bitsize="64" type="uint64"/>
+  <reg name="v15ls2" bitsize="64" type="uint64"/>
+
+  <reg name="v16s2" bitsize="128" type="uint128"/>
+  <reg name="v17s2" bitsize="128" type="uint128"/>
+  <reg name="v18s2" bitsize="128" type="uint128"/>
+  <reg name="v19s2" bitsize="128" type="uint128"/>
+  <reg name="v20s2" bitsize="128" type="uint128"/>
+  <reg name="v21s2" bitsize="128" type="uint128"/>
+  <reg name="v22s2" bitsize="128" type="uint128"/>
+  <reg name="v23s2" bitsize="128" type="uint128"/>
+  <reg name="v24s2" bitsize="128" type="uint128"/>
+  <reg name="v25s2" bitsize="128" type="uint128"/>
+  <reg name="v26s2" bitsize="128" type="uint128"/>
+  <reg name="v27s2" bitsize="128" type="uint128"/>
+  <reg name="v28s2" bitsize="128" type="uint128"/>
+  <reg name="v29s2" bitsize="128" type="uint128"/>
+  <reg name="v30s2" bitsize="128" type="uint128"/>
+  <reg name="v31s2" bitsize="128" type="uint128"/>
+</feature>
+diff --git a/coregrind/m_gdbserver/s390-vx.xml b/coregrind/m_gdbserver/s390-vx.xml
+new file mode 100644
+index 0000000..2a16873
+--- /dev/null
+++ b/coregrind/m_gdbserver/s390-vx.xml
+@@ -0,0 +1,59 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.s390.vx">
+  <vector id="v4f" type="ieee_single" count="4"/>
+  <vector id="v2d" type="ieee_double" count="2"/>
+  <vector id="v16i8" type="int8" count="16"/>
+  <vector id="v8i16" type="int16" count="8"/>
+  <vector id="v4i32" type="int32" count="4"/>
+  <vector id="v2i64" type="int64" count="2"/>
+  <union id="vec128">
+    <field name="v4_float" type="v4f"/>
+    <field name="v2_double" type="v2d"/>
+    <field name="v16_int8" type="v16i8"/>
+    <field name="v8_int16" type="v8i16"/>
+    <field name="v4_int32" type="v4i32"/>
+    <field name="v2_int64" type="v2i64"/>
+    <field name="uint128" type="uint128"/>
+  </union>
+
+  <reg name="v0l" bitsize="64" type="uint64"/>
+  <reg name="v1l" bitsize="64" type="uint64"/>
+  <reg name="v2l" bitsize="64" type="uint64"/>
+  <reg name="v3l" bitsize="64" type="uint64"/>
+  <reg name="v4l" bitsize="64" type="uint64"/>
+  <reg name="v5l" bitsize="64" type="uint64"/>
+  <reg name="v6l" bitsize="64" type="uint64"/>
+  <reg name="v7l" bitsize="64" type="uint64"/>
+  <reg name="v8l" bitsize="64" type="uint64"/>
+  <reg name="v9l" bitsize="64" type="uint64"/>
+  <reg name="v10l" bitsize="64" type="uint64"/>
+  <reg name="v11l" bitsize="64" type="uint64"/>
+  <reg name="v12l" bitsize="64" type="uint64"/>
+  <reg name="v13l" bitsize="64" type="uint64"/>
+  <reg name="v14l" bitsize="64" type="uint64"/>
+  <reg name="v15l" bitsize="64" type="uint64"/>
+
+  <reg name="v16" bitsize="128" type="vec128"/>
+  <reg name="v17" bitsize="128" type="vec128"/>
+  <reg name="v18" bitsize="128" type="vec128"/>
+  <reg name="v19" bitsize="128" type="vec128"/>
+  <reg name="v20" bitsize="128" type="vec128"/>
+  <reg name="v21" bitsize="128" type="vec128"/>
+  <reg name="v22" bitsize="128" type="vec128"/>
+  <reg name="v23" bitsize="128" type="vec128"/>
+  <reg name="v24" bitsize="128" type="vec128"/>
+  <reg name="v25" bitsize="128" type="vec128"/>
+  <reg name="v26" bitsize="128" type="vec128"/>
+  <reg name="v27" bitsize="128" type="vec128"/>
+  <reg name="v28" bitsize="128" type="vec128"/>
+  <reg name="v29" bitsize="128" type="vec128"/>
+  <reg name="v30" bitsize="128" type="vec128"/>
+  <reg name="v31" bitsize="128" type="vec128"/>
+</feature>
+diff --git a/coregrind/m_gdbserver/s390x-vx-linux-valgrind.xml b/coregrind/m_gdbserver/s390x-vx-linux-valgrind.xml
+new file mode 100644
+index 0000000..0237002
+--- /dev/null
+++ b/coregrind/m_gdbserver/s390x-vx-linux-valgrind.xml
+@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2018 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!-- S/390 64-bit user-level code.  -->
+
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+  <architecture>s390:64-bit</architecture>
+  <xi:include href="s390x-core64.xml"/>
+  <xi:include href="s390-acr.xml"/>
+  <xi:include href="s390-fpr.xml"/>
+  <xi:include href="s390x-linux64.xml"/>
+  <xi:include href="s390-vx.xml"/>
+  <xi:include href="s390x-core64-valgrind-s1.xml"/>
+  <xi:include href="s390-acr-valgrind-s1.xml"/>
+  <xi:include href="s390-fpr-valgrind-s1.xml"/>
+  <xi:include href="s390x-linux64-valgrind-s1.xml"/>
+  <xi:include href="s390-vx-valgrind-s1.xml"/>
+  <xi:include href="s390x-core64-valgrind-s2.xml"/>
+  <xi:include href="s390-acr-valgrind-s2.xml"/>
+  <xi:include href="s390-fpr-valgrind-s2.xml"/>
+  <xi:include href="s390x-linux64-valgrind-s2.xml"/>
+  <xi:include href="s390-vx-valgrind-s2.xml"/>
+</target>
+diff --git a/coregrind/m_gdbserver/s390x-vx-linux.xml b/coregrind/m_gdbserver/s390x-vx-linux.xml
+new file mode 100644
+index 0000000..e431c5b
+--- /dev/null
+++ b/coregrind/m_gdbserver/s390x-vx-linux.xml
+@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2010-2018 Free Software Foundation, Inc.
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!-- S/390 64-bit user-level code.  -->
+
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+  <architecture>s390:64-bit</architecture>
+  <xi:include href="s390x-core64.xml"/>
+  <xi:include href="s390-acr.xml"/>
+  <xi:include href="s390-fpr.xml"/>
+  <xi:include href="s390x-linux64.xml"/>
+  <xi:include href="s390-vx.xml"/>
+</target>
+diff --git a/coregrind/m_gdbserver/valgrind-low-s390x.c b/coregrind/m_gdbserver/valgrind-low-s390x.c
+index 7bbb2e3..a667f4b 100644
+--- a/coregrind/m_gdbserver/valgrind-low-s390x.c
+++ b/coregrind/m_gdbserver/valgrind-low-s390x.c
+@@ -88,9 +88,42 @@ static struct reg regs[] = {
+   { "f14", 2592, 64 },
+   { "f15", 2656, 64 },
+   { "orig_r2", 2720, 64 },
+  { "v0l", 2784, 64 },
+  { "v1l", 2848, 64 },
+  { "v2l", 2912, 64 },
+  { "v3l", 2976, 64 },
+  { "v4l", 3040, 64 },
+  { "v5l", 3104, 64 },
+  { "v6l", 3168, 64 },
+  { "v7l", 3232, 64 },
+  { "v8l", 3296, 64 },
+  { "v9l", 3360, 64 },
+  { "v10l", 3424, 64 },
+  { "v11l", 3488, 64 },
+  { "v12l", 3552, 64 },
+  { "v13l", 3616, 64 },
+  { "v14l", 3680, 64 },
+  { "v15l", 3744, 64 },
+  { "v16", 3808, 128 },
+  { "v17", 3936, 128 },
+  { "v18", 4064, 128 },
+  { "v19", 4192, 128 },
+  { "v20", 4320, 128 },
+  { "v21", 4448, 128 },
+  { "v22", 4576, 128 },
+  { "v23", 4704, 128 },
+  { "v24", 4832, 128 },
+  { "v25", 4960, 128 },
+  { "v26", 5088, 128 },
+  { "v27", 5216, 128 },
+  { "v28", 5344, 128 },
+  { "v29", 5472, 128 },
+  { "v30", 5600, 128 },
+  { "v31", 5728, 128 },
+ };
+ static const char *expedite_regs[] = { "r14", "r15", "pswa", 0 };
+-#define num_regs (sizeof (regs) / sizeof (regs[0]))
+#define num_regs_all (sizeof (regs) / sizeof (regs[0]))
+static int num_regs;
+ 
+ static
+ CORE_ADDR get_pc (void)
+@@ -165,7 +198,7 @@ void transfer_register (ThreadId tid, int abs_regno, void * buf,
+    case 32: VG_(transfer) (&s390x->guest_a14, buf, dir, size, mod); break;
+    case 33: VG_(transfer) (&s390x->guest_a15, buf, dir, size, mod); break;
+    case 34: VG_(transfer) (&s390x->guest_fpc, buf, dir, size, mod); break;
+-   case 35: VG_(transfer) (&s390x->guest_v0,  buf, dir, size, mod); break;
+   case 35: VG_(transfer) (&s390x->guest_v0.w64[0],  buf, dir, size, mod); break;
+    case 36: VG_(transfer) (&s390x->guest_v1.w64[0],  buf, dir, size, mod); break;
+    case 37: VG_(transfer) (&s390x->guest_v2.w64[0],  buf, dir, size, mod); break;
+    case 38: VG_(transfer) (&s390x->guest_v3.w64[0],  buf, dir, size, mod); break;
+@@ -182,18 +215,65 @@ void transfer_register (ThreadId tid, int abs_regno, void * buf,
+    case 49: VG_(transfer) (&s390x->guest_v14.w64[0], buf, dir, size, mod); break;
+    case 50: VG_(transfer) (&s390x->guest_v15.w64[0], buf, dir, size, mod); break;
+    case 51:  *mod = False; break; //GDBTD??? { "orig_r2", 0, 64 },
+   case 52: VG_(transfer) (&s390x->guest_v0.w64[1],  buf, dir, size, mod); break;
+   case 53: VG_(transfer) (&s390x->guest_v1.w64[1],  buf, dir, size, mod); break;
+   case 54: VG_(transfer) (&s390x->guest_v2.w64[1],  buf, dir, size, mod); break;
+   case 55: VG_(transfer) (&s390x->guest_v3.w64[1],  buf, dir, size, mod); break;
+   case 56: VG_(transfer) (&s390x->guest_v4.w64[1],  buf, dir, size, mod); break;
+   case 57: VG_(transfer) (&s390x->guest_v5.w64[1],  buf, dir, size, mod); break;
+   case 58: VG_(transfer) (&s390x->guest_v6.w64[1],  buf, dir, size, mod); break;
+   case 59: VG_(transfer) (&s390x->guest_v7.w64[1],  buf, dir, size, mod); break;
+   case 60: VG_(transfer) (&s390x->guest_v8.w64[1],  buf, dir, size, mod); break;
+   case 61: VG_(transfer) (&s390x->guest_v9.w64[1],  buf, dir, size, mod); break;
+   case 62: VG_(transfer) (&s390x->guest_v10.w64[1], buf, dir, size, mod); break;
+   case 63: VG_(transfer) (&s390x->guest_v11.w64[1], buf, dir, size, mod); break;
+   case 64: VG_(transfer) (&s390x->guest_v12.w64[1], buf, dir, size, mod); break;
+   case 65: VG_(transfer) (&s390x->guest_v13.w64[1], buf, dir, size, mod); break;
+   case 66: VG_(transfer) (&s390x->guest_v14.w64[1], buf, dir, size, mod); break;
+   case 67: VG_(transfer) (&s390x->guest_v15.w64[1], buf, dir, size, mod); break;
+   case 68: VG_(transfer) (&s390x->guest_v16, buf, dir, size, mod); break;
+   case 69: VG_(transfer) (&s390x->guest_v17, buf, dir, size, mod); break;
+   case 70: VG_(transfer) (&s390x->guest_v18, buf, dir, size, mod); break;
+   case 71: VG_(transfer) (&s390x->guest_v19, buf, dir, size, mod); break;
+   case 72: VG_(transfer) (&s390x->guest_v20, buf, dir, size, mod); break;
+   case 73: VG_(transfer) (&s390x->guest_v21, buf, dir, size, mod); break;
+   case 74: VG_(transfer) (&s390x->guest_v22, buf, dir, size, mod); break;
+   case 75: VG_(transfer) (&s390x->guest_v23, buf, dir, size, mod); break;
+   case 76: VG_(transfer) (&s390x->guest_v24, buf, dir, size, mod); break;
+   case 77: VG_(transfer) (&s390x->guest_v25, buf, dir, size, mod); break;
+   case 78: VG_(transfer) (&s390x->guest_v26, buf, dir, size, mod); break;
+   case 79: VG_(transfer) (&s390x->guest_v27, buf, dir, size, mod); break;
+   case 80: VG_(transfer) (&s390x->guest_v28, buf, dir, size, mod); break;
+   case 81: VG_(transfer) (&s390x->guest_v29, buf, dir, size, mod); break;
+   case 82: VG_(transfer) (&s390x->guest_v30, buf, dir, size, mod); break;
+   case 83: VG_(transfer) (&s390x->guest_v31, buf, dir, size, mod); break;
+    default: vg_assert(0);
+    }
+ }
+ 
+ static
+Bool have_vx (void)
+{
+   VexArch va;
+   VexArchInfo vai;
+   VG_(machine_get_VexArchInfo) (&va, &vai);
+   return (vai.hwcaps & VEX_HWCAPS_S390X_VX) != 0;
+}
+
+static
+ const char* target_xml (Bool shadow_mode)
+ {
+    if (shadow_mode) {
+-      return "s390x-generic-valgrind.xml";
+      if (have_vx())
+         return "s390x-vx-linux-valgrind.xml";
+      else
+         return "s390x-generic-valgrind.xml";
+    } else {
+-      return "s390x-generic.xml";
+-   }  
+      if (have_vx())
+         return "s390x-vx-linux.xml";
+      else
+         return "s390x-generic.xml";
+   }
+ }
+ 
+ static CORE_ADDR** target_get_dtv (ThreadState *tst)
+@@ -206,7 +286,7 @@ static CORE_ADDR** target_get_dtv (ThreadState *tst)
+ }
+ 
+ static struct valgrind_target_ops low_target = {
+-   num_regs,
+   -1, // Override at init time.
+    regs,
+    17, //sp = r15, which is register offset 17 in regs
+    transfer_register,
+@@ -220,6 +300,11 @@ static struct valgrind_target_ops low_target = {
+ void s390x_init_architecture (struct valgrind_target_ops *target)
+ {
+    *target = low_target;
+   if (have_vx())
+      num_regs = num_regs_all;
+   else
+      num_regs = num_regs_all - 32; // Remove all VX registers.
+   target->num_regs = num_regs;
+    set_register_cache (regs, num_regs);
+    gdbserver_expedite_regs = expedite_regs;
+ }
+diff -ru valgrind-3.14.0.orig/coregrind/Makefile.in valgrind-3.14.0/coregrind/Makefile.in
+--- valgrind-3.14.0.orig/coregrind/Makefile.in	2018-11-20 17:30:03.075888111 +0100
+++ valgrind-3.14.0/coregrind/Makefile.in	2018-11-20 17:31:14.999314275 +0100
+@@ -1869,6 +1869,11 @@
+ 	m_gdbserver/s390x-linux64-valgrind-s1.xml \
+ 	m_gdbserver/s390x-linux64-valgrind-s2.xml \
+ 	m_gdbserver/s390x-linux64.xml \
+	m_gdbserver/s390-vx-valgrind-s1.xml \
+	m_gdbserver/s390-vx-valgrind-s2.xml \
+	m_gdbserver/s390-vx.xml \
+	m_gdbserver/s390x-vx-linux-valgrind.xml \
+	m_gdbserver/s390x-vx-linux.xml \
+ 	m_gdbserver/mips-cp0-valgrind-s1.xml \
+ 	m_gdbserver/mips-cp0-valgrind-s2.xml \
+ 	m_gdbserver/mips-cp0.xml \
--- a/SOURCES/valgrind-3.14.0-s390z-more-z13-fixes.patch
+++ b/SOURCES/valgrind-3.14.0-s390z-more-z13-fixes.patch
@ -0,0 +1,51 @@
+From d10cd86ee32bf76495f79c02df62fc242adbcbe3 Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.vnet.ibm.com>
+Date: Thu, 26 Jul 2018 16:35:24 +0200
+Subject: [PATCH] s390x: More fixes for z13 support
+
+This patch addresses the following:
+
+* Fix the implementation of LOCGHI.  Previously Valgrind performed 32-bit
+  sign extension instead of 64-bit sign extension on the immediate value.
+
+* Advertise VXRS in HWCAP.  If no VXRS are advertised, but the program
+  uses vector registers, this could cause problems with a glibc built with
+  "-march=z13".
+---
+ VEX/priv/guest_s390_toIR.c          | 2 +-
+ coregrind/m_initimg/initimg-linux.c | 6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index 9c4d79b87..50a5a4177 100644
+--- a/VEX/priv/guest_s390_toIR.c
+++ b/VEX/priv/guest_s390_toIR.c
+@@ -16325,7 +16325,7 @@ static const HChar *
+ s390_irgen_LOCGHI(UChar r1, UChar m3, UShort i2, UChar unused)
+ {
+    next_insn_if(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)));
+-   put_gpr_dw0(r1, mkU64((UInt)(Int)(Short)i2));
+   put_gpr_dw0(r1, mkU64((ULong)(Long)(Short)i2));
+ 
+    return "locghi";
+ }
+diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
+index 61cc458bc..8a7f0d024 100644
+--- a/coregrind/m_initimg/initimg-linux.c
+++ b/coregrind/m_initimg/initimg-linux.c
+@@ -699,9 +699,9 @@ Addr setup_client_stack( void*  init_sp,
+             }
+ #           elif defined(VGP_s390x_linux)
+             {
+-               /* Advertise hardware features "below" TE only.  TE and VXRS
+-                  (and anything above) are not supported by Valgrind. */
+-               auxv->u.a_val &= VKI_HWCAP_S390_TE - 1;
+               /* Advertise hardware features "below" TE and VXRS.  TE itself
+                  and anything above VXRS is not supported by Valgrind. */
+               auxv->u.a_val &= (VKI_HWCAP_S390_TE - 1) | VKI_HWCAP_S390_VXRS;
+             }
+ #           elif defined(VGP_arm64_linux)
+             {
+-- 
+2.17.0
+
--- a/SOURCES/valgrind-3.14.0-set_AV_CR6.patch
+++ b/SOURCES/valgrind-3.14.0-set_AV_CR6.patch
@ -0,0 +1,145 @@
+commit dc1523fb3550b4ed9dd4c178741626daaa474da7
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Mon Dec 10 17:18:20 2018 +0100
+
+    PR386945 set_AV_CR6 patch
+    
+    https://bugs.kde.org/show_bug.cgi?id=386945#c62
+
+diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
+index ec2f90a..c3cc6d0 100644
+--- a/VEX/priv/guest_ppc_toIR.c
+++ b/VEX/priv/guest_ppc_toIR.c
+@@ -2062,45 +2062,88 @@ static void set_CR0 ( IRExpr* result )
+ static void set_AV_CR6 ( IRExpr* result, Bool test_all_ones )
+ {
+    /* CR6[0:3] = {all_ones, 0, all_zeros, 0}
+-      all_ones  = (v[0] && v[1] && v[2] && v[3])
+-      all_zeros = ~(v[0] || v[1] || v[2] || v[3])
+      32 bit: all_zeros =  (v[0] || v[1] || v[2] || v[3]) == 0x0000'0000
+              all_ones  = ~(v[0] && v[1] && v[2] && v[3]) == 0x0000'0000
+              where v[] denotes 32-bit lanes
+      or
+      64 bit: all_zeros =  (v[0] || v[1]) == 0x0000'0000'0000'0000
+              all_ones  = ~(v[0] && v[1]) == 0x0000'0000'0000'0000
+              where v[] denotes 64-bit lanes
+
+      The 32- and 64-bit versions compute the same thing, but the 64-bit one
+      tries to be a bit more efficient.
+    */
+-   IRTemp v0 = newTemp(Ity_V128);
+-   IRTemp v1 = newTemp(Ity_V128);
+-   IRTemp v2 = newTemp(Ity_V128);
+-   IRTemp v3 = newTemp(Ity_V128);
+-   IRTemp rOnes  = newTemp(Ity_I8);
+-   IRTemp rZeros = newTemp(Ity_I8);
+-
+    vassert(typeOfIRExpr(irsb->tyenv,result) == Ity_V128);
+ 
+-   assign( v0, result );
+-   assign( v1, binop(Iop_ShrV128, result, mkU8(32)) );
+-   assign( v2, binop(Iop_ShrV128, result, mkU8(64)) );
+-   assign( v3, binop(Iop_ShrV128, result, mkU8(96)) );
+   IRTemp overlappedOred  = newTemp(Ity_V128);
+   IRTemp overlappedAnded = newTemp(Ity_V128);
+
+   if (mode64) {
+      IRTemp v0 = newTemp(Ity_V128);
+      IRTemp v1 = newTemp(Ity_V128);
+      assign( v0, result );
+      assign( v1, binop(Iop_ShrV128, result, mkU8(64)) );
+      assign(overlappedOred,
+             binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)));
+      assign(overlappedAnded,
+             binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)));
+   } else {
+      IRTemp v0 = newTemp(Ity_V128);
+      IRTemp v1 = newTemp(Ity_V128);
+      IRTemp v2 = newTemp(Ity_V128);
+      IRTemp v3 = newTemp(Ity_V128);
+      assign( v0, result );
+      assign( v1, binop(Iop_ShrV128, result, mkU8(32)) );
+      assign( v2, binop(Iop_ShrV128, result, mkU8(64)) );
+      assign( v3, binop(Iop_ShrV128, result, mkU8(96)) );
+      assign(overlappedOred,
+             binop(Iop_OrV128,
+                   binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)),
+                   binop(Iop_OrV128, mkexpr(v2), mkexpr(v3))));
+      assign(overlappedAnded,
+             binop(Iop_AndV128,
+                   binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
+                   binop(Iop_AndV128, mkexpr(v2), mkexpr(v3))));
+   }
+
+   IRTemp rOnes   = newTemp(Ity_I8);
+   IRTemp rZeroes = newTemp(Ity_I8);
+ 
+-   assign( rZeros, unop(Iop_1Uto8,
+-       binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
+-             unop(Iop_Not32,
+-                  unop(Iop_V128to32,
+-                       binop(Iop_OrV128,
+-                             binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)),
+-                             binop(Iop_OrV128, mkexpr(v2), mkexpr(v3))))
+-                  ))) );
+   if (mode64) {
+      assign(rZeroes,
+             unop(Iop_1Uto8,
+                  binop(Iop_CmpEQ64,
+                        mkU64(0),
+                        unop(Iop_V128to64, mkexpr(overlappedOred)))));
+      assign(rOnes,
+             unop(Iop_1Uto8,
+                  binop(Iop_CmpEQ64,
+                        mkU64(0),
+                        unop(Iop_Not64,
+                             unop(Iop_V128to64, mkexpr(overlappedAnded))))));
+   } else {
+      assign(rZeroes,
+             unop(Iop_1Uto8,
+                  binop(Iop_CmpEQ32,
+                        mkU32(0),
+                        unop(Iop_V128to32, mkexpr(overlappedOred)))));
+      assign(rOnes,
+             unop(Iop_1Uto8,
+                  binop(Iop_CmpEQ32,
+                        mkU32(0),
+                        unop(Iop_Not32,
+                             unop(Iop_V128to32, mkexpr(overlappedAnded))))));
+   }
+
+   // rOnes might not be used below.  But iropt will remove it, so there's no
+   // inefficiency as a result.
+ 
+    if (test_all_ones) {
+-      assign( rOnes, unop(Iop_1Uto8,
+-         binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
+-               unop(Iop_V128to32,
+-                    binop(Iop_AndV128,
+-                          binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
+-                          binop(Iop_AndV128, mkexpr(v2), mkexpr(v3)))
+-                    ))) );
+       putCR321( 6, binop(Iop_Or8,
+                          binop(Iop_Shl8, mkexpr(rOnes),  mkU8(3)),
+-                         binop(Iop_Shl8, mkexpr(rZeros), mkU8(1))) );
+                         binop(Iop_Shl8, mkexpr(rZeroes), mkU8(1))) );
+    } else {
+-      putCR321( 6, binop(Iop_Shl8, mkexpr(rZeros), mkU8(1)) );
+      putCR321( 6, binop(Iop_Shl8, mkexpr(rZeroes), mkU8(1)) );
+    }
+    putCR0( 6, mkU8(0) );
+ } 
+diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
+index c24db91..7f69ee3 100644
+--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
+@@ -8322,6 +8322,9 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure,
+ #     elif defined(VGA_amd64)
+       mce.dlbo.dl_Add64           = DLauto;
+       mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive;
+#     elif defined(VGA_ppc64le)
+      // Needed by (at least) set_AV_CR6() in the front end.
+      mce.dlbo.dl_CmpEQ64_CmpNE64 = DLexpensive;
+ #     endif
+ 
+       /* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then
--- a/SOURCES/valgrind-3.14.0-sigkill.patch
+++ b/SOURCES/valgrind-3.14.0-sigkill.patch
@ -0,0 +1,244 @@
+commit 0c701ba2a4b10a5f6f3fae31cb0ec6ca034d51d9
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Fri Dec 7 14:01:20 2018 +0100
+
+    Fix sigkill.stderr.exp for glibc-2.28.
+    
+    glibc 2.28 filters out some bad signal numbers and returns
+    Invalid argument instead of passing such bad signal numbers
+    the kernel sigaction syscall. So we won't see such bad signal
+    numbers and won't print "bad signal number" ourselves.
+    
+    Add a new memcheck/tests/sigkill.stderr.exp-glibc-2.28 to catch
+    this case.
+
+diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
+index 76e0e90..2af4dd1 100644
+--- a/memcheck/tests/Makefile.am
+++ b/memcheck/tests/Makefile.am
+@@ -260,7 +260,8 @@ EXTRA_DIST = \
+ 	sh-mem-random.stdout.exp sh-mem-random.vgtest \
+ 	sigaltstack.stderr.exp sigaltstack.vgtest \
+ 	sigkill.stderr.exp sigkill.stderr.exp-darwin sigkill.stderr.exp-mips32 \
+-	    sigkill.stderr.exp-solaris sigkill.vgtest \
+	    sigkill.stderr.exp-solaris \
+	    sigkill.stderr.exp-glibc-2.28 sigkill.vgtest \
+ 	signal2.stderr.exp signal2.stdout.exp signal2.vgtest \
+ 	sigprocmask.stderr.exp sigprocmask.stderr.exp2 sigprocmask.vgtest \
+ 	static_malloc.stderr.exp static_malloc.vgtest \
+diff --git a/memcheck/tests/sigkill.stderr.exp-glibc-2.28 b/memcheck/tests/sigkill.stderr.exp-glibc-2.28
+new file mode 100644
+index 0000000..0e5f0cb
+--- /dev/null
+++ b/memcheck/tests/sigkill.stderr.exp-glibc-2.28
+@@ -0,0 +1,197 @@
+
+setting signal 1: Success
+getting signal 1: Success
+
+setting signal 2: Success
+getting signal 2: Success
+
+setting signal 3: Success
+getting signal 3: Success
+
+setting signal 4: Success
+getting signal 4: Success
+
+setting signal 5: Success
+getting signal 5: Success
+
+setting signal 6: Success
+getting signal 6: Success
+
+setting signal 7: Success
+getting signal 7: Success
+
+setting signal 8: Success
+getting signal 8: Success
+
+setting signal 9: Warning: ignored attempt to set SIGKILL handler in sigaction();
+         the SIGKILL signal is uncatchable
+Invalid argument
+getting signal 9: Success
+
+setting signal 10: Success
+getting signal 10: Success
+
+setting signal 11: Success
+getting signal 11: Success
+
+setting signal 12: Success
+getting signal 12: Success
+
+setting signal 13: Success
+getting signal 13: Success
+
+setting signal 14: Success
+getting signal 14: Success
+
+setting signal 15: Success
+getting signal 15: Success
+
+setting signal 16: Success
+getting signal 16: Success
+
+setting signal 17: Success
+getting signal 17: Success
+
+setting signal 18: Success
+getting signal 18: Success
+
+setting signal 19: Warning: ignored attempt to set SIGSTOP handler in sigaction();
+         the SIGSTOP signal is uncatchable
+Invalid argument
+getting signal 19: Success
+
+setting signal 20: Success
+getting signal 20: Success
+
+setting signal 21: Success
+getting signal 21: Success
+
+setting signal 22: Success
+getting signal 22: Success
+
+setting signal 23: Success
+getting signal 23: Success
+
+setting signal 24: Success
+getting signal 24: Success
+
+setting signal 25: Success
+getting signal 25: Success
+
+setting signal 26: Success
+getting signal 26: Success
+
+setting signal 27: Success
+getting signal 27: Success
+
+setting signal 28: Success
+getting signal 28: Success
+
+setting signal 29: Success
+getting signal 29: Success
+
+setting signal 30: Success
+getting signal 30: Success
+
+setting signal 31: Success
+getting signal 31: Success
+
+setting signal 34: Success
+getting signal 34: Success
+
+setting signal 35: Success
+getting signal 35: Success
+
+setting signal 36: Success
+getting signal 36: Success
+
+setting signal 37: Success
+getting signal 37: Success
+
+setting signal 38: Success
+getting signal 38: Success
+
+setting signal 39: Success
+getting signal 39: Success
+
+setting signal 40: Success
+getting signal 40: Success
+
+setting signal 41: Success
+getting signal 41: Success
+
+setting signal 42: Success
+getting signal 42: Success
+
+setting signal 43: Success
+getting signal 43: Success
+
+setting signal 44: Success
+getting signal 44: Success
+
+setting signal 45: Success
+getting signal 45: Success
+
+setting signal 46: Success
+getting signal 46: Success
+
+setting signal 47: Success
+getting signal 47: Success
+
+setting signal 48: Success
+getting signal 48: Success
+
+setting signal 49: Success
+getting signal 49: Success
+
+setting signal 50: Success
+getting signal 50: Success
+
+setting signal 51: Success
+getting signal 51: Success
+
+setting signal 52: Success
+getting signal 52: Success
+
+setting signal 53: Success
+getting signal 53: Success
+
+setting signal 54: Success
+getting signal 54: Success
+
+setting signal 55: Success
+getting signal 55: Success
+
+setting signal 56: Success
+getting signal 56: Success
+
+setting signal 57: Success
+getting signal 57: Success
+
+setting signal 58: Success
+getting signal 58: Success
+
+setting signal 59: Success
+getting signal 59: Success
+
+setting signal 60: Success
+getting signal 60: Success
+
+setting signal 61: Success
+getting signal 61: Success
+
+setting signal 62: Success
+getting signal 62: Success
+
+setting signal 65: Invalid argument
+getting signal 65: Invalid argument
+
+
+HEAP SUMMARY:
+    in use at exit: ... bytes in ... blocks
+  total heap usage: ... allocs, ... frees, ... bytes allocated
+
+For a detailed leak analysis, rerun with: --leak-check=full
+
+For counts of detected and suppressed errors, rerun with: -v
+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
+diff -ur valgrind-3.14.0.orig/memcheck/tests/Makefile.in valgrind-3.14.0/memcheck/tests/Makefile.in
+--- valgrind-3.14.0.orig/memcheck/tests/Makefile.in	2018-12-13 00:30:45.013839247 +0100
+++ valgrind-3.14.0/memcheck/tests/Makefile.in	2018-12-13 00:30:54.242636002 +0100
+@@ -1573,7 +1573,8 @@
+ 	sh-mem-random.stdout.exp sh-mem-random.vgtest \
+ 	sigaltstack.stderr.exp sigaltstack.vgtest \
+ 	sigkill.stderr.exp sigkill.stderr.exp-darwin sigkill.stderr.exp-mips32 \
+-	    sigkill.stderr.exp-solaris sigkill.vgtest \
+	    sigkill.stderr.exp-solaris \
+	    sigkill.stderr.exp-glibc-2.28 sigkill.vgtest \
+ 	signal2.stderr.exp signal2.stdout.exp signal2.vgtest \
+ 	sigprocmask.stderr.exp sigprocmask.stderr.exp2 sigprocmask.vgtest \
+ 	static_malloc.stderr.exp static_malloc.vgtest \
--- a/SOURCES/valgrind-3.14.0-transform-popcount64-ctznat64.patch
+++ b/SOURCES/valgrind-3.14.0-transform-popcount64-ctznat64.patch
@ -0,0 +1,82 @@
+commit cb5d7e047598bff6d0f1d707a70d9fb1a1c7f0e2
+Author: Julian Seward <jseward@acm.org>
+Date:   Tue Nov 20 11:46:55 2018 +0100
+
+    VEX/priv/ir_opt.c
+    
+    fold_Expr: transform PopCount64(And64(Add64(x,-1),Not64(x))) into CtzNat64(x).
+    
+    This is part of the fix for bug 386945.
+
+diff --git a/VEX/priv/ir_opt.c b/VEX/priv/ir_opt.c
+index f40870b..23964be 100644
+--- a/VEX/priv/ir_opt.c
+++ b/VEX/priv/ir_opt.c
+@@ -1377,6 +1377,8 @@ static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
+    case Iex_Unop:
+       /* UNARY ops */
+       if (e->Iex.Unop.arg->tag == Iex_Const) {
+
+         /* cases where the arg is a const */
+          switch (e->Iex.Unop.op) {
+          case Iop_1Uto8:
+             e2 = IRExpr_Const(IRConst_U8(toUChar(
+@@ -1690,8 +1692,56 @@ static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
+ 
+          default: 
+             goto unhandled;
+-      }
+-      }
+         } // switch (e->Iex.Unop.op)
+
+      } else {
+
+         /* other cases (identities, etc) */
+         switch (e->Iex.Unop.op) {
+         case Iop_PopCount64: {
+            // PopCount64( And64( Add64(x,-1), Not64(x) ) ) ==> CtzNat64(x)
+            // bindings:
+            //   a1:And64( a11:Add64(a111:x,a112:-1), a12:Not64(a121:x) )
+            IRExpr* a1 = chase(env, e->Iex.Unop.arg);
+            if (!a1)
+               goto nomatch;
+            if (a1->tag != Iex_Binop || a1->Iex.Binop.op != Iop_And64)
+               goto nomatch;
+            // a1 is established
+            IRExpr* a11 = chase(env, a1->Iex.Binop.arg1);
+            if (!a11)
+               goto nomatch;
+            if (a11->tag != Iex_Binop || a11->Iex.Binop.op != Iop_Add64)
+               goto nomatch;
+            // a11 is established
+            IRExpr* a12 = chase(env, a1->Iex.Binop.arg2);
+            if (!a12)
+               goto nomatch;
+            if (a12->tag != Iex_Unop || a12->Iex.Unop.op != Iop_Not64)
+               goto nomatch;
+            // a12 is established
+            IRExpr* a111 = a11->Iex.Binop.arg1;
+            IRExpr* a112 = chase(env, a11->Iex.Binop.arg2);
+            IRExpr* a121 = a12->Iex.Unop.arg;
+            if (!a111 || !a112 || !a121)
+               goto nomatch;
+            // a111 and a121 need to be the same temp.
+            if (!eqIRAtom(a111, a121))
+               goto nomatch;
+            // Finally, a112 must be a 64-bit version of -1.
+            if (!isOnesU(a112))
+               goto nomatch;
+            // Match established.  Transform.
+            e2 = IRExpr_Unop(Iop_CtzNat64, a111);
+            break;
+           nomatch:
+            break;
+         }
+         default:
+            break;
+         } // switch (e->Iex.Unop.op)
+
+      } // if (e->Iex.Unop.arg->tag == Iex_Const)
+       break;
+ 
+    case Iex_Binop:
--- a/SOURCES/valgrind-3.14.0-undef_malloc_args.patch
+++ b/SOURCES/valgrind-3.14.0-undef_malloc_args.patch
@ -0,0 +1,98 @@
+commit 262275da43425ba2b8c240e47063e36b39167996
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Wed Dec 12 13:55:01 2018 +0100
+
+    Fix memcheck/tests/undef_malloc_args testcase.
+
+diff --git a/coregrind/m_replacemalloc/vg_replace_malloc.c b/coregrind/m_replacemalloc/vg_replace_malloc.c
+index 28bdb4a..564829a 100644
+--- a/coregrind/m_replacemalloc/vg_replace_malloc.c
+++ b/coregrind/m_replacemalloc/vg_replace_malloc.c
+@@ -216,9 +216,19 @@ static void init(void);
+    Apart of allowing memcheck to detect an error, the macro
+    TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED has no effect and
+    has a minimal cost for other tools replacing malloc functions.
+
+   Creating an "artificial" use of _x that works reliably is not entirely
+   straightforward.  Simply comparing it against zero often produces no
+   warning if _x contains at least one nonzero bit is defined, because
+   Memcheck knows that the result of the comparison will be defined (cf
+   expensiveCmpEQorNE).
+
+   Really we want to PCast _x, so as to create a value which is entirely
+   undefined if any bit of _x is undefined.  But there's no portable way to do
+   that.
+ */
+-#define TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED(x) \
+-   if ((ULong)x == 0) __asm__ __volatile__( "" ::: "memory" )
+#define TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED(_x) \
+   if ((UWord)(_x) == 0) __asm__ __volatile__( "" ::: "memory" )
+ 
+ /*---------------------- malloc ----------------------*/
+ 
+@@ -504,7 +514,7 @@ static void init(void);
+    void VG_REPLACE_FUNCTION_EZU(10040,soname,fnname) (void *zone, void *p)  \
+    { \
+       DO_INIT; \
+-      TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED((UWord) zone);	\
+      TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED((UWord)zone ^ (UWord)p); \
+       MALLOC_TRACE(#fnname "(%p, %p)\n", zone, p ); \
+       if (p == NULL)  \
+          return; \
+diff --git a/memcheck/tests/undef_malloc_args.c b/memcheck/tests/undef_malloc_args.c
+index 99e2799..654d70d 100644
+--- a/memcheck/tests/undef_malloc_args.c
+++ b/memcheck/tests/undef_malloc_args.c
+@@ -11,29 +11,29 @@ int main (int argc, char*argv[])
+ 
+    {
+       size_t size = def_size;
+-      (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, 1);
+      (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, sizeof(size));
+       p = malloc(size);
+    }
+ 
+-   (void) VALGRIND_MAKE_MEM_UNDEFINED(&p, 1);
+   (void) VALGRIND_MAKE_MEM_UNDEFINED(&p, sizeof(p));
+    new_p = realloc(p, def_size);
+ 
+-   (void) VALGRIND_MAKE_MEM_UNDEFINED(&new_p, 1);
+   (void) VALGRIND_MAKE_MEM_UNDEFINED(&new_p, sizeof(new_p));
+    new_p = realloc(new_p, def_size);
+ 
+-   (void) VALGRIND_MAKE_MEM_UNDEFINED(&new_p, 1);
+   (void) VALGRIND_MAKE_MEM_UNDEFINED(&new_p, sizeof(new_p));
+    free (new_p);
+ 
+    {
+       size_t nmemb = 1;
+-      (void) VALGRIND_MAKE_MEM_UNDEFINED(&nmemb, 1);
+      (void) VALGRIND_MAKE_MEM_UNDEFINED(&nmemb, sizeof(nmemb));
+       new_p = calloc(nmemb, def_size);
+       free (new_p);
+    }
+ #if 0
+    {
+       size_t alignment = 1;
+-      (void) VALGRIND_MAKE_MEM_UNDEFINED(&alignment, 1);
+      (void) VALGRIND_MAKE_MEM_UNDEFINED(&alignment, sizeof(alignment));
+       new_p = memalign(alignment, def_size);
+       free(new_p);
+    }
+@@ -41,14 +41,14 @@ int main (int argc, char*argv[])
+    {
+       size_t nmemb = 16;
+       size_t size = def_size;
+-      (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, 1);
+      (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, sizeof(size));
+       new_p = memalign(nmemb, size);
+       free(new_p);
+    }
+ 
+    {
+       size_t size = def_size;
+-      (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, 1);
+      (void) VALGRIND_MAKE_MEM_UNDEFINED(&size, sizeof(size));
+       new_p = valloc(size);
+       free (new_p);
+    }
--- a/SOURCES/valgrind-3.14.0-wcsncmp.patch
+++ b/SOURCES/valgrind-3.14.0-wcsncmp.patch
@ -0,0 +1,89 @@
+commit 5fdabb72fdcba6bcf788eaa19c1ee557c13b8a7a
+Author: Mark Wielaard <mark@klomp.org>
+Date:   Sat Dec 1 23:54:40 2018 +0100
+
+    Bug 401627 - Add wcsncmp override and testcase.
+    
+    glibc 2.28 added an avx2 optimized variant of wstrncmp which memcheck
+    cannot proof correct. Add a simple override in vg_replace_strmem.c.
+
+diff --git a/memcheck/tests/wcs.c b/memcheck/tests/wcs.c
+index 15730ad..538304b 100644
+--- a/memcheck/tests/wcs.c
+++ b/memcheck/tests/wcs.c
+@@ -1,5 +1,6 @@
+-// Uses various wchar_t * functions that have hand written SSE assembly
+-// implementations in glibc. wcslen, wcscpy, wcscmp, wcsrchr, wcschr.
+// Uses various wchar_t * functions that have hand written SSE and/or AVX2
+// assembly implementations in glibc.
+// wcslen, wcscpy, wcscmp, wcsncmp, wcsrchr, wcschr.
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+@@ -18,6 +19,8 @@ int main(int argc, char **argv)
+   c = wcscpy (b, a);
+ 
+   fprintf (stderr, "wcscmp equal: %d\n", wcscmp (a, b)); // wcscmp equal: 0
+  fprintf (stderr,
+	   "wcsncmp equal: %d\n", wcsncmp (a, b, l)); // wcsncmp equal: 0
+ 
+   d = wcsrchr (a, L'd');
+   e = wcschr (a, L'd');
+diff --git a/memcheck/tests/wcs.stderr.exp b/memcheck/tests/wcs.stderr.exp
+index 41d74c8..d5b5959 100644
+--- a/memcheck/tests/wcs.stderr.exp
+++ b/memcheck/tests/wcs.stderr.exp
+@@ -1,3 +1,4 @@
+ wcslen: 53
+ wcscmp equal: 0
+wcsncmp equal: 0
+ wcsrchr == wcschr: 1
+diff --git a/shared/vg_replace_strmem.c b/shared/vg_replace_strmem.c
+index d6927f0..89a7dcc 100644
+--- a/shared/vg_replace_strmem.c
+++ b/shared/vg_replace_strmem.c
+@@ -103,6 +103,7 @@
+    20420 STPNCPY
+    20430 WMEMCHR
+    20440 WCSNLEN
+   20450 WSTRNCMP
+ */
+ 
+ #if defined(VGO_solaris)
+@@ -1927,6 +1928,36 @@ static inline void my_exit ( int x )
+  WCSCMP(VG_Z_LIBC_SONAME,          wcscmp)
+ #endif
+ 
+/*---------------------- wcsncmp ----------------------*/
+
+// This is a wchar_t equivalent to strncmp.  We don't
+// have wchar_t available here, but in the GNU C Library
+// wchar_t is always 32 bits wide and wcsncmp uses signed
+// comparison, not unsigned as in strncmp function.
+
+#define WCSNCMP(soname, fnname) \
+   int VG_REPLACE_FUNCTION_EZU(20450,soname,fnname) \
+          ( const Int* s1, const Int* s2, SizeT nmax ); \
+   int VG_REPLACE_FUNCTION_EZU(20450,soname,fnname) \
+          ( const Int* s1, const Int* s2, SizeT nmax ) \
+   { \
+      SizeT n = 0; \
+      while (True) { \
+         if (n >= nmax) return 0; \
+         if (*s1 == 0 && *s2 == 0) return 0; \
+         if (*s1 == 0) return -1; \
+         if (*s2 == 0) return 1; \
+         \
+         if (*s1 < *s2) return -1; \
+         if (*s1 > *s2) return 1; \
+         \
+         s1++; s2++; n++; \
+      } \
+   }
+#if defined(VGO_linux)
+ WCSNCMP(VG_Z_LIBC_SONAME,          wcsncmp)
+#endif
+
+ /*---------------------- wcscpy ----------------------*/
+ 
+ // This is a wchar_t equivalent to strcpy.  We don't
--- a/SOURCES/valgrind-3.9.0-cachegrind-improvements.patch
+++ b/SOURCES/valgrind-3.9.0-cachegrind-improvements.patch
@ -0,0 +1,54 @@
+--- valgrind-3.8.1/cachegrind/cg_sim.c.jj	2011-10-26 23:24:32.000000000 +0200
+++ valgrind-3.8.1/cachegrind/cg_sim.c	2011-12-09 17:31:19.256023683 +0100
+@@ -42,27 +42,30 @@ typedef struct {
+    Int          size;                   /* bytes */
+    Int          assoc;
+    Int          line_size;              /* bytes */
+-   Int          sets;
+    Int          sets_min_1;
+    Int          line_size_bits;
+    Int          tag_shift;
+-   HChar        desc_line[128];         /* large enough */
+    UWord*       tags;
+-} cache_t2;
+   HChar        desc_line[128];
+} cache_t2
+#ifdef __GNUC__
+__attribute__ ((aligned (8 * sizeof (Int))))
+#endif
+;
+ 
+ /* By this point, the size/assoc/line_size has been checked. */
+ static void cachesim_initcache(cache_t config, cache_t2* c)
+ {
+-   Int i;
+   Int sets;
+ 
+    c->size      = config.size;
+    c->assoc     = config.assoc;
+    c->line_size = config.line_size;
+ 
+-   c->sets           = (c->size / c->line_size) / c->assoc;
+-   c->sets_min_1     = c->sets - 1;
+   sets              = (c->size / c->line_size) / c->assoc;
+   c->sets_min_1     = sets - 1;
+    c->line_size_bits = VG_(log2)(c->line_size);
+-   c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
+   c->tag_shift      = c->line_size_bits + VG_(log2)(sets);
+ 
+    if (c->assoc == 1) {
+       VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", 
+@@ -72,11 +75,8 @@ static void cachesim_initcache(cache_t c
+                                  c->size, c->line_size, c->assoc);
+    }
+ 
+-   c->tags = VG_(malloc)("cg.sim.ci.1",
+-                         sizeof(UWord) * c->sets * c->assoc);
+-
+-   for (i = 0; i < c->sets * c->assoc; i++)
+-      c->tags[i] = 0;
+   c->tags = VG_(calloc)("cg.sim.ci.1",
+                         sizeof(UWord), sets * c->assoc);
+ }
+ 
+ /* This attribute forces GCC to inline the function, getting rid of a
--- a/SOURCES/valgrind-3.9.0-helgrind-race-supp.patch
+++ b/SOURCES/valgrind-3.9.0-helgrind-race-supp.patch
@ -0,0 +1,15 @@
+--- valgrind/glibc-2.34567-NPTL-helgrind.supp.jj	2009-08-19 15:37:48.000000000 +0200
+++ valgrind/glibc-2.34567-NPTL-helgrind.supp	2009-10-21 16:46:31.000000000 +0200
+@@ -88,6 +88,12 @@
+    obj:*/lib*/libpthread-2.*so*
+ }
+ {
+   helgrind-glibc2X-102a
+   Helgrind:Race
+   fun:mythread_wrapper
+   obj:*vgpreload_helgrind*.so
+}
+{
+    helgrind-glibc2X-103
+    Helgrind:Race
+    fun:pthread_cond_*@@GLIBC_2.*
--- a/SOURCES/valgrind-3.9.0-ldso-supp.patch
+++ b/SOURCES/valgrind-3.9.0-ldso-supp.patch
@ -0,0 +1,28 @@
+--- valgrind/glibc-2.X.supp.in.jj	2011-10-26 23:24:45.000000000 +0200
+++ valgrind/glibc-2.X.supp.in	2012-05-07 10:55:20.395942656 +0200
+@@ -124,7 +124,7 @@
+    glibc-2.5.x-on-SUSE-10.2-(PPC)-2a
+    Memcheck:Cond
+    fun:index
+-   obj:*ld-@GLIBC_VERSION@.*.so
+   obj:*ld-@GLIBC_VERSION@*.so
+ }
+ {
+    glibc-2.5.x-on-SuSE-10.2-(PPC)-2b
+@@ -136,14 +136,14 @@
+    glibc-2.5.5-on-SuSE-10.2-(PPC)-2c
+    Memcheck:Addr4
+    fun:index
+-   obj:*ld-@GLIBC_VERSION@.*.so
+   obj:*ld-@GLIBC_VERSION@*.so
+ }
+ {
+    glibc-2.3.5-on-SuSE-10.1-(PPC)-3
+    Memcheck:Addr4
+    fun:*wordcopy_fwd_dest_aligned*
+    fun:mem*cpy
+-   obj:*lib*@GLIBC_VERSION@.*.so
+   obj:*lib*@GLIBC_VERSION@*.so
+ }
+ 
+ {
--- a/SPECS/valgrind.spec
+++ b/SPECS/valgrind.spec
				`@ -0,0 +1 @@`
				`182afd405b92ddb6f52c6729e848eacf4b1daf46 SOURCES/valgrind-3.14.0.tar.bz2`