From d3173fc08b0d01883e209038e1e78ec83c53ebff Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Thu, 5 Sep 2013 16:56:43 +0200 Subject: [PATCH] Add valgrind-3.8.1-mmxext.patch. --- valgrind-3.8.1-mmxext.patch | 975 ++++++++++++++++++++++++++++++++++++ valgrind.spec | 5 + 2 files changed, 980 insertions(+) create mode 100644 valgrind-3.8.1-mmxext.patch diff --git a/valgrind-3.8.1-mmxext.patch b/valgrind-3.8.1-mmxext.patch new file mode 100644 index 0000000..5c88907 --- /dev/null +++ b/valgrind-3.8.1-mmxext.patch @@ -0,0 +1,975 @@ +commit a4b7b67db47021c424c18a5729f250016d34df27 +Author: mjw +Date: Tue Aug 27 10:19:03 2013 +0000 + + Support mmxext (integer sse) subset on i386 (athlon). + + Some processors like the AMD Athlon "Classic" support mmxext, + a sse1 subset. This subset is not properly detected by VEX. + The subset uses the same encoding as the sse1 instructions. + + The subset is described at: + http://support.amd.com/us/Embedded_TechDocs/22466.pdf + https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions + + This introduces a new VEX_HWCAPS_X86_MMXEXT that sits between + the baseline (0) and VEX_HWCAPS_X86_SSE1. There is also a new + x86g_dirtyhelper_CPUID_mmxext to mimics a Athlon "Classic" + (Model 2, K75 "Pluto/Orion"). + + Groups all mmxext instructions together in one block. + + git-svn-id: svn://svn.valgrind.org/vex/trunk@2745 8f6e269a-dfd6-0310-a8e1-e2731360e62c + +diff --git a/VEX/priv/guest_x86_defs.h b/VEX/priv/guest_x86_defs.h +index 389e6bb..1a16a0b 100644 +--- a/VEX/priv/guest_x86_defs.h ++++ b/VEX/priv/guest_x86_defs.h +@@ -144,6 +144,7 @@ extern ULong x86g_dirtyhelper_loadF80le ( UInt ); + extern void x86g_dirtyhelper_storeF80le ( UInt, ULong ); + + extern void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* ); ++extern void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* ); + extern void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* ); + extern void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* ); + +diff --git a/VEX/priv/guest_x86_helpers.c b/VEX/priv/guest_x86_helpers.c +index 9c26794..e87e89f 100644 +--- a/VEX/priv/guest_x86_helpers.c ++++ b/VEX/priv/guest_x86_helpers.c +@@ -2207,6 +2207,63 @@ void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st ) + + /* CALLED FROM GENERATED CODE */ + /* DIRTY HELPER (modifies guest state) */ ++/* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */ ++/* But without 3DNow support (weird, but we really don't support it). */ ++void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st ) ++{ ++ switch (st->guest_EAX) { ++ /* vendor ID */ ++ case 0: ++ st->guest_EAX = 0x1; ++ st->guest_EBX = 0x68747541; ++ st->guest_ECX = 0x444d4163; ++ st->guest_EDX = 0x69746e65; ++ break; ++ /* feature bits */ ++ case 1: ++ st->guest_EAX = 0x621; ++ st->guest_EBX = 0x0; ++ st->guest_ECX = 0x0; ++ st->guest_EDX = 0x183f9ff; ++ break; ++ /* Highest Extended Function Supported (0x80000004 brand string) */ ++ case 0x80000000: ++ st->guest_EAX = 0x80000004; ++ st->guest_EBX = 0x68747541; ++ st->guest_ECX = 0x444d4163; ++ st->guest_EDX = 0x69746e65; ++ break; ++ /* Extended Processor Info and Feature Bits */ ++ case 0x80000001: ++ st->guest_EAX = 0x721; ++ st->guest_EBX = 0x0; ++ st->guest_ECX = 0x0; ++ st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */ ++ break; ++ /* Processor Brand String "AMD Athlon(tm) Processor" */ ++ case 0x80000002: ++ st->guest_EAX = 0x20444d41; ++ st->guest_EBX = 0x6c687441; ++ st->guest_ECX = 0x74286e6f; ++ st->guest_EDX = 0x5020296d; ++ break; ++ case 0x80000003: ++ st->guest_EAX = 0x65636f72; ++ st->guest_EBX = 0x726f7373; ++ st->guest_ECX = 0x0; ++ st->guest_EDX = 0x0; ++ break; ++ default: ++ st->guest_EAX = 0x0; ++ st->guest_EBX = 0x0; ++ st->guest_ECX = 0x0; ++ st->guest_EDX = 0x0; ++ break; ++ } ++} ++ ++/* CALLED FROM GENERATED CODE */ ++/* DIRTY HELPER (modifies guest state) */ + /* Claim to be the following SSE1-capable CPU: + vendor_id : GenuineIntel + cpu family : 6 +diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c +index 90499b0..e98f19c 100644 +--- a/VEX/priv/guest_x86_toIR.c ++++ b/VEX/priv/guest_x86_toIR.c +@@ -8318,7 +8318,18 @@ DisResult disInstr_X86_WRK ( + guest subarchitecture. */ + if (archinfo->hwcaps == 0/*baseline, no sse at all*/) + goto after_sse_decoders; +- ++ ++ /* With mmxext only some extended MMX instructions are recognized. ++ The mmxext instructions are MASKMOVQ MOVNTQ PAVGB PAVGW PMAXSW ++ PMAXUB PMINSW PMINUB PMULHUW PSADBW PSHUFW PEXTRW PINSRW PMOVMSKB ++ PREFETCHNTA PREFETCHT0 PREFETCHT1 PREFETCHT2 SFENCE ++ ++ http://support.amd.com/us/Embedded_TechDocs/22466.pdf ++ https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions */ ++ ++ if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/) ++ goto mmxext; ++ + /* Otherwise we must be doing sse1 or sse2, so we can at least try + for SSE1 here. */ + +@@ -8627,6 +8638,11 @@ DisResult disInstr_X86_WRK ( + goto decode_success; + } + ++ ++ /* mmxext sse1 subset starts here. mmxext only arches will parse ++ only this subset of the sse1 instructions. */ ++ mmxext: ++ + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F F7 = MASKMOVQ -- 8x8 masked store */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) { +@@ -8637,203 +8653,6 @@ DisResult disInstr_X86_WRK ( + goto decode_success; + } + +- /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ +- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) { +- delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 ); +- goto decode_success; +- } +- +- /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ +- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) { +- vassert(sz == 4); +- delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 ); +- goto decode_success; +- } +- +- /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ +- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) { +- delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 ); +- goto decode_success; +- } +- +- /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ +- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) { +- vassert(sz == 4); +- delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 ); +- goto decode_success; +- } +- +- /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ +- /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ +- if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) { +- modrm = getIByte(delta+2); +- if (epartIsReg(modrm)) { +- putXMMReg( gregOfRM(modrm), +- getXMMReg( eregOfRM(modrm) )); +- DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), +- nameXMMReg(gregOfRM(modrm))); +- delta += 2+1; +- } else { +- addr = disAMode ( &alen, sorb, delta+2, dis_buf ); +- if (insn[1] == 0x28/*movaps*/) +- gen_SEGV_if_not_16_aligned( addr ); +- putXMMReg( gregOfRM(modrm), +- loadLE(Ity_V128, mkexpr(addr)) ); +- DIP("mov[ua]ps %s,%s\n", dis_buf, +- nameXMMReg(gregOfRM(modrm))); +- delta += 2+alen; +- } +- goto decode_success; +- } +- +- /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ +- /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ +- if (sz == 4 && insn[0] == 0x0F +- && (insn[1] == 0x29 || insn[1] == 0x11)) { +- modrm = getIByte(delta+2); +- if (epartIsReg(modrm)) { +- /* fall through; awaiting test case */ +- } else { +- addr = disAMode ( &alen, sorb, delta+2, dis_buf ); +- if (insn[1] == 0x29/*movaps*/) +- gen_SEGV_if_not_16_aligned( addr ); +- storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); +- DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)), +- dis_buf ); +- delta += 2+alen; +- goto decode_success; +- } +- } +- +- /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ +- /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ +- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) { +- modrm = getIByte(delta+2); +- if (epartIsReg(modrm)) { +- delta += 2+1; +- putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, +- getXMMRegLane64( eregOfRM(modrm), 0 ) ); +- DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)), +- nameXMMReg(gregOfRM(modrm))); +- } else { +- addr = disAMode ( &alen, sorb, delta+2, dis_buf ); +- delta += 2+alen; +- putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, +- loadLE(Ity_I64, mkexpr(addr)) ); +- DIP("movhps %s,%s\n", dis_buf, +- nameXMMReg( gregOfRM(modrm) )); +- } +- goto decode_success; +- } +- +- /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ +- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) { +- if (!epartIsReg(insn[2])) { +- delta += 2; +- addr = disAMode ( &alen, sorb, delta, dis_buf ); +- delta += alen; +- storeLE( mkexpr(addr), +- getXMMRegLane64( gregOfRM(insn[2]), +- 1/*upper lane*/ ) ); +- DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), +- dis_buf); +- goto decode_success; +- } +- /* else fall through */ +- } +- +- /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ +- /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ +- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) { +- modrm = getIByte(delta+2); +- if (epartIsReg(modrm)) { +- delta += 2+1; +- putXMMRegLane64( gregOfRM(modrm), +- 0/*lower lane*/, +- getXMMRegLane64( eregOfRM(modrm), 1 )); +- DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)), +- nameXMMReg(gregOfRM(modrm))); +- } else { +- addr = disAMode ( &alen, sorb, delta+2, dis_buf ); +- delta += 2+alen; +- putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/, +- loadLE(Ity_I64, mkexpr(addr)) ); +- DIP("movlps %s, %s\n", +- dis_buf, nameXMMReg( gregOfRM(modrm) )); +- } +- goto decode_success; +- } +- +- /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ +- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) { +- if (!epartIsReg(insn[2])) { +- delta += 2; +- addr = disAMode ( &alen, sorb, delta, dis_buf ); +- delta += alen; +- storeLE( mkexpr(addr), +- getXMMRegLane64( gregOfRM(insn[2]), +- 0/*lower lane*/ ) ); +- DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), +- dis_buf); +- goto decode_success; +- } +- /* else fall through */ +- } +- +- /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) +- to 4 lowest bits of ireg(G) */ +- if (insn[0] == 0x0F && insn[1] == 0x50) { +- modrm = getIByte(delta+2); +- if (sz == 4 && epartIsReg(modrm)) { +- Int src; +- t0 = newTemp(Ity_I32); +- t1 = newTemp(Ity_I32); +- t2 = newTemp(Ity_I32); +- t3 = newTemp(Ity_I32); +- delta += 2+1; +- src = eregOfRM(modrm); +- assign( t0, binop( Iop_And32, +- binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)), +- mkU32(1) )); +- assign( t1, binop( Iop_And32, +- binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)), +- mkU32(2) )); +- assign( t2, binop( Iop_And32, +- binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)), +- mkU32(4) )); +- assign( t3, binop( Iop_And32, +- binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)), +- mkU32(8) )); +- putIReg(4, gregOfRM(modrm), +- binop(Iop_Or32, +- binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), +- binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) +- ) +- ); +- DIP("movmskps %s,%s\n", nameXMMReg(src), +- nameIReg(4, gregOfRM(modrm))); +- goto decode_success; +- } +- /* else fall through */ +- } +- +- /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ +- /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ +- if (insn[0] == 0x0F && insn[1] == 0x2B) { +- modrm = getIByte(delta+2); +- if (!epartIsReg(modrm)) { +- addr = disAMode ( &alen, sorb, delta+2, dis_buf ); +- gen_SEGV_if_not_16_aligned( addr ); +- storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); +- DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", +- dis_buf, +- nameXMMReg(gregOfRM(modrm))); +- delta += 2+alen; +- goto decode_success; +- } +- /* else fall through */ +- } +- + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the + Intel manual does not say anything about the usual business of +@@ -8854,70 +8673,6 @@ DisResult disInstr_X86_WRK ( + /* else fall through */ + } + +- /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G +- (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ +- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) { +- vassert(sz == 4); +- modrm = getIByte(delta+3); +- if (epartIsReg(modrm)) { +- putXMMRegLane32( gregOfRM(modrm), 0, +- getXMMRegLane32( eregOfRM(modrm), 0 )); +- DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)), +- nameXMMReg(gregOfRM(modrm))); +- delta += 3+1; +- } else { +- addr = disAMode ( &alen, sorb, delta+3, dis_buf ); +- /* zero bits 127:64 */ +- putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); +- /* zero bits 63:32 */ +- putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) ); +- /* write bits 31:0 */ +- putXMMRegLane32( gregOfRM(modrm), 0, +- loadLE(Ity_I32, mkexpr(addr)) ); +- DIP("movss %s,%s\n", dis_buf, +- nameXMMReg(gregOfRM(modrm))); +- delta += 3+alen; +- } +- goto decode_success; +- } +- +- /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem +- or lo 1/4 xmm). */ +- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) { +- vassert(sz == 4); +- modrm = getIByte(delta+3); +- if (epartIsReg(modrm)) { +- /* fall through, we don't yet have a test case */ +- } else { +- addr = disAMode ( &alen, sorb, delta+3, dis_buf ); +- storeLE( mkexpr(addr), +- getXMMRegLane32(gregOfRM(modrm), 0) ); +- DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)), +- dis_buf); +- delta += 3+alen; +- goto decode_success; +- } +- } +- +- /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ +- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) { +- delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 ); +- goto decode_success; +- } +- +- /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ +- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) { +- vassert(sz == 4); +- delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 ); +- goto decode_success; +- } +- +- /* 0F 56 = ORPS -- G = G and E */ +- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) { +- delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 ); +- goto decode_success; +- } +- + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) { +@@ -9173,6 +8928,284 @@ DisResult disInstr_X86_WRK ( + goto decode_success; + } + ++ /* 0F AE /7 = SFENCE -- flush pending operations to memory */ ++ if (insn[0] == 0x0F && insn[1] == 0xAE ++ && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) { ++ vassert(sz == 4); ++ delta += 3; ++ /* Insert a memory fence. It's sometimes important that these ++ are carried through to the generated code. */ ++ stmt( IRStmt_MBE(Imbe_Fence) ); ++ DIP("sfence\n"); ++ goto decode_success; ++ } ++ ++ /* End of mmxext sse1 subset. No more sse parsing for mmxext only arches. */ ++ if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/) ++ goto after_sse_decoders; ++ ++ ++ /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ ++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) { ++ delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 ); ++ goto decode_success; ++ } ++ ++ /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */ ++ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) { ++ vassert(sz == 4); ++ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 ); ++ goto decode_success; ++ } ++ ++ /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */ ++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) { ++ delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 ); ++ goto decode_success; ++ } ++ ++ /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */ ++ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) { ++ vassert(sz == 4); ++ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 ); ++ goto decode_success; ++ } ++ ++ /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */ ++ /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */ ++ if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) { ++ modrm = getIByte(delta+2); ++ if (epartIsReg(modrm)) { ++ putXMMReg( gregOfRM(modrm), ++ getXMMReg( eregOfRM(modrm) )); ++ DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)), ++ nameXMMReg(gregOfRM(modrm))); ++ delta += 2+1; ++ } else { ++ addr = disAMode ( &alen, sorb, delta+2, dis_buf ); ++ if (insn[1] == 0x28/*movaps*/) ++ gen_SEGV_if_not_16_aligned( addr ); ++ putXMMReg( gregOfRM(modrm), ++ loadLE(Ity_V128, mkexpr(addr)) ); ++ DIP("mov[ua]ps %s,%s\n", dis_buf, ++ nameXMMReg(gregOfRM(modrm))); ++ delta += 2+alen; ++ } ++ goto decode_success; ++ } ++ ++ /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */ ++ /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */ ++ if (sz == 4 && insn[0] == 0x0F ++ && (insn[1] == 0x29 || insn[1] == 0x11)) { ++ modrm = getIByte(delta+2); ++ if (epartIsReg(modrm)) { ++ /* fall through; awaiting test case */ ++ } else { ++ addr = disAMode ( &alen, sorb, delta+2, dis_buf ); ++ if (insn[1] == 0x29/*movaps*/) ++ gen_SEGV_if_not_16_aligned( addr ); ++ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); ++ DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)), ++ dis_buf ); ++ delta += 2+alen; ++ goto decode_success; ++ } ++ } ++ ++ /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */ ++ /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */ ++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) { ++ modrm = getIByte(delta+2); ++ if (epartIsReg(modrm)) { ++ delta += 2+1; ++ putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, ++ getXMMRegLane64( eregOfRM(modrm), 0 ) ); ++ DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)), ++ nameXMMReg(gregOfRM(modrm))); ++ } else { ++ addr = disAMode ( &alen, sorb, delta+2, dis_buf ); ++ delta += 2+alen; ++ putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/, ++ loadLE(Ity_I64, mkexpr(addr)) ); ++ DIP("movhps %s,%s\n", dis_buf, ++ nameXMMReg( gregOfRM(modrm) )); ++ } ++ goto decode_success; ++ } ++ ++ /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */ ++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) { ++ if (!epartIsReg(insn[2])) { ++ delta += 2; ++ addr = disAMode ( &alen, sorb, delta, dis_buf ); ++ delta += alen; ++ storeLE( mkexpr(addr), ++ getXMMRegLane64( gregOfRM(insn[2]), ++ 1/*upper lane*/ ) ); ++ DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ), ++ dis_buf); ++ goto decode_success; ++ } ++ /* else fall through */ ++ } ++ ++ /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */ ++ /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */ ++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) { ++ modrm = getIByte(delta+2); ++ if (epartIsReg(modrm)) { ++ delta += 2+1; ++ putXMMRegLane64( gregOfRM(modrm), ++ 0/*lower lane*/, ++ getXMMRegLane64( eregOfRM(modrm), 1 )); ++ DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)), ++ nameXMMReg(gregOfRM(modrm))); ++ } else { ++ addr = disAMode ( &alen, sorb, delta+2, dis_buf ); ++ delta += 2+alen; ++ putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/, ++ loadLE(Ity_I64, mkexpr(addr)) ); ++ DIP("movlps %s, %s\n", ++ dis_buf, nameXMMReg( gregOfRM(modrm) )); ++ } ++ goto decode_success; ++ } ++ ++ /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */ ++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) { ++ if (!epartIsReg(insn[2])) { ++ delta += 2; ++ addr = disAMode ( &alen, sorb, delta, dis_buf ); ++ delta += alen; ++ storeLE( mkexpr(addr), ++ getXMMRegLane64( gregOfRM(insn[2]), ++ 0/*lower lane*/ ) ); ++ DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ), ++ dis_buf); ++ goto decode_success; ++ } ++ /* else fall through */ ++ } ++ ++ /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E) ++ to 4 lowest bits of ireg(G) */ ++ if (insn[0] == 0x0F && insn[1] == 0x50) { ++ modrm = getIByte(delta+2); ++ if (sz == 4 && epartIsReg(modrm)) { ++ Int src; ++ t0 = newTemp(Ity_I32); ++ t1 = newTemp(Ity_I32); ++ t2 = newTemp(Ity_I32); ++ t3 = newTemp(Ity_I32); ++ delta += 2+1; ++ src = eregOfRM(modrm); ++ assign( t0, binop( Iop_And32, ++ binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)), ++ mkU32(1) )); ++ assign( t1, binop( Iop_And32, ++ binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)), ++ mkU32(2) )); ++ assign( t2, binop( Iop_And32, ++ binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)), ++ mkU32(4) )); ++ assign( t3, binop( Iop_And32, ++ binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)), ++ mkU32(8) )); ++ putIReg(4, gregOfRM(modrm), ++ binop(Iop_Or32, ++ binop(Iop_Or32, mkexpr(t0), mkexpr(t1)), ++ binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ++ ) ++ ); ++ DIP("movmskps %s,%s\n", nameXMMReg(src), ++ nameIReg(4, gregOfRM(modrm))); ++ goto decode_success; ++ } ++ /* else fall through */ ++ } ++ ++ /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */ ++ /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */ ++ if (insn[0] == 0x0F && insn[1] == 0x2B) { ++ modrm = getIByte(delta+2); ++ if (!epartIsReg(modrm)) { ++ addr = disAMode ( &alen, sorb, delta+2, dis_buf ); ++ gen_SEGV_if_not_16_aligned( addr ); ++ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) ); ++ DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s", ++ dis_buf, ++ nameXMMReg(gregOfRM(modrm))); ++ delta += 2+alen; ++ goto decode_success; ++ } ++ /* else fall through */ ++ } ++ ++ /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G ++ (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */ ++ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) { ++ vassert(sz == 4); ++ modrm = getIByte(delta+3); ++ if (epartIsReg(modrm)) { ++ putXMMRegLane32( gregOfRM(modrm), 0, ++ getXMMRegLane32( eregOfRM(modrm), 0 )); ++ DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)), ++ nameXMMReg(gregOfRM(modrm))); ++ delta += 3+1; ++ } else { ++ addr = disAMode ( &alen, sorb, delta+3, dis_buf ); ++ /* zero bits 127:64 */ ++ putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) ); ++ /* zero bits 63:32 */ ++ putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) ); ++ /* write bits 31:0 */ ++ putXMMRegLane32( gregOfRM(modrm), 0, ++ loadLE(Ity_I32, mkexpr(addr)) ); ++ DIP("movss %s,%s\n", dis_buf, ++ nameXMMReg(gregOfRM(modrm))); ++ delta += 3+alen; ++ } ++ goto decode_success; ++ } ++ ++ /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem ++ or lo 1/4 xmm). */ ++ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) { ++ vassert(sz == 4); ++ modrm = getIByte(delta+3); ++ if (epartIsReg(modrm)) { ++ /* fall through, we don't yet have a test case */ ++ } else { ++ addr = disAMode ( &alen, sorb, delta+3, dis_buf ); ++ storeLE( mkexpr(addr), ++ getXMMRegLane32(gregOfRM(modrm), 0) ); ++ DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)), ++ dis_buf); ++ delta += 3+alen; ++ goto decode_success; ++ } ++ } ++ ++ /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */ ++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) { ++ delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 ); ++ goto decode_success; ++ } ++ ++ /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */ ++ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) { ++ vassert(sz == 4); ++ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 ); ++ goto decode_success; ++ } ++ ++ /* 0F 56 = ORPS -- G = G and E */ ++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) { ++ delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 ); ++ goto decode_success; ++ } ++ + /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */ + if (insn[0] == 0x0F && insn[1] == 0x53) { + vassert(sz == 4); +@@ -9205,18 +9238,6 @@ DisResult disInstr_X86_WRK ( + goto decode_success; + } + +- /* 0F AE /7 = SFENCE -- flush pending operations to memory */ +- if (insn[0] == 0x0F && insn[1] == 0xAE +- && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) { +- vassert(sz == 4); +- delta += 3; +- /* Insert a memory fence. It's sometimes important that these +- are carried through to the generated code. */ +- stmt( IRStmt_MBE(Imbe_Fence) ); +- DIP("sfence\n"); +- goto decode_success; +- } +- + /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) { + Int select; +@@ -14674,6 +14695,11 @@ DisResult disInstr_X86_WRK ( + fAddr = &x86g_dirtyhelper_CPUID_sse1; + } + else ++ if (archinfo->hwcaps & VEX_HWCAPS_X86_MMXEXT) { ++ fName = "x86g_dirtyhelper_CPUID_mmxext"; ++ fAddr = &x86g_dirtyhelper_CPUID_mmxext; ++ } ++ else + if (archinfo->hwcaps == 0/*no SSE*/) { + fName = "x86g_dirtyhelper_CPUID_sse0"; + fAddr = &x86g_dirtyhelper_CPUID_sse0; +diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c +index 21a05a9..693eaa2 100644 +--- a/VEX/priv/host_x86_defs.c ++++ b/VEX/priv/host_x86_defs.c +@@ -727,7 +727,8 @@ X86Instr* X86Instr_MFence ( UInt hwcaps ) { + X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); + i->tag = Xin_MFence; + i->Xin.MFence.hwcaps = hwcaps; +- vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1 ++ vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT ++ |VEX_HWCAPS_X86_SSE1 + |VEX_HWCAPS_X86_SSE2 + |VEX_HWCAPS_X86_SSE3 + |VEX_HWCAPS_X86_LZCNT))); +@@ -2695,7 +2696,7 @@ Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc, + *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; + goto done; + } +- if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) { ++ if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) { + /* sfence */ + *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8; + /* lock addl $0,0(%esp) */ +diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h +index f810ab4..e03becf 100644 +--- a/VEX/priv/host_x86_defs.h ++++ b/VEX/priv/host_x86_defs.h +@@ -360,7 +360,7 @@ typedef + Xin_Store, /* store 16/8 bit value in memory */ + Xin_Set32, /* convert condition code to 32-bit value */ + Xin_Bsfr32, /* 32-bit bsf/bsr */ +- Xin_MFence, /* mem fence (not just sse2, but sse0 and 1 too) */ ++ Xin_MFence, /* mem fence (not just sse2, but sse0 and 1/mmxext too) */ + Xin_ACAS, /* 8/16/32-bit lock;cmpxchg */ + Xin_DACAS, /* lock;cmpxchg8b (doubleword ACAS, 2 x 32-bit only) */ + +@@ -508,13 +508,13 @@ typedef + HReg src; + HReg dst; + } Bsfr32; +- /* Mem fence (not just sse2, but sse0 and 1 too). In short, +- an insn which flushes all preceding loads and stores as +- much as possible before continuing. On SSE2 we emit a +- real "mfence", on SSE1 "sfence ; lock addl $0,0(%esp)" and +- on SSE0 "lock addl $0,0(%esp)". This insn therefore +- carries the host's hwcaps so the assembler knows what to +- emit. */ ++ /* Mem fence (not just sse2, but sse0 and sse1/mmxext too). ++ In short, an insn which flushes all preceding loads and ++ stores as much as possible before continuing. On SSE2 ++ we emit a real "mfence", on SSE1 or the MMXEXT subset ++ "sfence ; lock addl $0,0(%esp)" and on SSE0 ++ "lock addl $0,0(%esp)". This insn therefore carries the ++ host's hwcaps so the assembler knows what to emit. */ + struct { + UInt hwcaps; + } MFence; +diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c +index 086aefc..90bc563 100644 +--- a/VEX/priv/host_x86_isel.c ++++ b/VEX/priv/host_x86_isel.c +@@ -3251,7 +3251,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) + { + + # define REQUIRE_SSE1 \ +- do { if (env->hwcaps == 0/*baseline, no sse*/) \ ++ do { if (env->hwcaps == 0/*baseline, no sse*/ \ ++ || env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \ + goto vec_fail; \ + } while (0) + +@@ -4388,7 +4389,8 @@ HInstrArray* iselSB_X86 ( IRSB* bb, + /* sanity ... */ + vassert(arch_host == VexArchX86); + vassert(0 == (hwcaps_host +- & ~(VEX_HWCAPS_X86_SSE1 ++ & ~(VEX_HWCAPS_X86_MMXEXT ++ | VEX_HWCAPS_X86_SSE1 + | VEX_HWCAPS_X86_SSE2 + | VEX_HWCAPS_X86_SSE3 + | VEX_HWCAPS_X86_LZCNT))); +diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c +index e425950..5bb762f 100644 +--- a/VEX/priv/main_main.c ++++ b/VEX/priv/main_main.c +@@ -1086,23 +1086,25 @@ + + static HChar* show_hwcaps_x86 ( UInt hwcaps ) + { +- /* Monotonic, SSE3 > SSE2 > SSE1 > baseline. */ ++ /* Monotonic, LZCNT > SSE3 > SSE2 > SSE1 > MMXEXT > baseline. */ + switch (hwcaps) { + case 0: + return "x86-sse0"; +- case VEX_HWCAPS_X86_SSE1: +- return "x86-sse1"; +- case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2: +- return "x86-sse1-sse2"; +- case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2 ++ case VEX_HWCAPS_X86_MMXEXT: ++ return "x86-mmxext"; ++ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1: ++ return "x86-mmxext-sse1"; ++ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2: ++ return "x86-mmxext-sse1-sse2"; ++ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2 + | VEX_HWCAPS_X86_LZCNT: +- return "x86-sse1-sse2-lzcnt"; +- case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2 ++ return "x86-mmxext-sse1-sse2-lzcnt"; ++ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2 + | VEX_HWCAPS_X86_SSE3: +- return "x86-sse1-sse2-sse3"; +- case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2 ++ return "x86-mmxext-sse1-sse2-sse3"; ++ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2 + | VEX_HWCAPS_X86_SSE3 | VEX_HWCAPS_X86_LZCNT: +- return "x86-sse1-sse2-sse3-lzcnt"; ++ return "x86-mmxext-sse1-sse2-sse3-lzcnt"; + default: + return NULL; + } +diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h +index 4b36727..c8b5892 100644 +--- a/VEX/pub/libvex.h ++++ b/VEX/pub/libvex.h +@@ -71,11 +71,12 @@ typedef + combinations. */ + + /* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE), with +- cmpxchg8b. */ +-#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */ +-#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */ +-#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */ +-#define VEX_HWCAPS_X86_LZCNT (1<<4) /* SSE4a LZCNT insn */ ++ cmpxchg8b. MMXEXT is a special AMD only subset of SSE1 (Integer SSE). */ ++#define VEX_HWCAPS_X86_MMXEXT (1<<1) /* A subset of SSE1 on early AMD */ ++#define VEX_HWCAPS_X86_SSE1 (1<<2) /* SSE1 support (Pentium III) */ ++#define VEX_HWCAPS_X86_SSE2 (1<<3) /* SSE2 support (Pentium 4) */ ++#define VEX_HWCAPS_X86_SSE3 (1<<4) /* SSE3 support (>= Prescott) */ ++#define VEX_HWCAPS_X86_LZCNT (1<<5) /* SSE4a LZCNT insn */ + + /* amd64: baseline capability is SSE2, with cmpxchg8b but not + cmpxchg16b. */ +commit 4c6f0638553e69b7f70c17a64a8f60114d6f6230 +Author: mjw +Date: Tue Aug 27 10:23:23 2013 +0000 + + Support mmxext (integer sse) subset on i386 (athlon). Bug #323713 + + Some processors like the AMD Athlon "Classic" support mmxext, + a sse1 subset. This subset is not properly detected by VEX. + The subset uses the same encoding as the sse1 instructions. + + The subset is described at: + http://support.amd.com/us/Embedded_TechDocs/22466.pdf + https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions + + Detects mmxext subset from cpuid information (and enables it + when full sse1 is found). Also fixes the prereq of + none/tests/x86/insn_mmxext.vgtest so that it also runs when + full sse1 (and not just the mmxext subset) is found. + It already passed on such configurations. With the VEX patch + (r2745) it also passes with just the mmxext subset. + + git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13515 a5019735-40e9-0310-863c-91ae7b9d1cf9 + +diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c +index 353c05b..2fd5f07 100644 +--- a/coregrind/m_machine.c ++++ b/coregrind/m_machine.c +@@ -685,7 +685,7 @@ + LibVEX_default_VexArchInfo(&vai); + + #if defined(VGA_x86) +- { Bool have_sse1, have_sse2, have_cx8, have_lzcnt; ++ { Bool have_sse1, have_sse2, have_cx8, have_lzcnt, have_mmxext; + UInt eax, ebx, ecx, edx, max_extended; + UChar vstr[13]; + vstr[0] = 0; +@@ -722,17 +722,27 @@ + if (!have_cx8) + return False; + +- /* Figure out if this is an AMD that can do LZCNT. */ ++ /* Figure out if this is an AMD that can do mmxext and/or LZCNT. */ ++ have_mmxext = False; + have_lzcnt = False; + if (0 == VG_(strcmp)(vstr, "AuthenticAMD") + && max_extended >= 0x80000001) { + VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); + have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ ++ ++ /* Some older AMD processors support a sse1 subset (Integer SSE). */ ++ have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0); + } + +- if (have_sse2 && have_sse1) { ++ /* Intel processors don't define the mmxext extension, but since it ++ is just a sse1 subset always define it when we have sse1. */ ++ if (have_sse1) ++ have_mmxext = True; ++ ++ if (have_sse2 && have_sse1 && have_mmxext) { + va = VexArchX86; +- vai.hwcaps = VEX_HWCAPS_X86_SSE1; ++ vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; ++ vai.hwcaps |= VEX_HWCAPS_X86_SSE1; + vai.hwcaps |= VEX_HWCAPS_X86_SSE2; + if (have_lzcnt) + vai.hwcaps |= VEX_HWCAPS_X86_LZCNT; +@@ -740,13 +750,21 @@ + return True; + } + +- if (have_sse1) { ++ if (have_sse1 && have_mmxext) { + va = VexArchX86; +- vai.hwcaps = VEX_HWCAPS_X86_SSE1; ++ vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; ++ vai.hwcaps |= VEX_HWCAPS_X86_SSE1; + VG_(machine_x86_have_mxcsr) = 1; + return True; + } + ++ if (have_mmxext) { ++ va = VexArchX86; ++ vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; ++ VG_(machine_x86_have_mxcsr) = 0; ++ return True; ++ } ++ + va = VexArchX86; + vai.hwcaps = 0; /*baseline - no sse at all*/ + VG_(machine_x86_have_mxcsr) = 0; +diff --git a/none/tests/x86/insn_mmxext.vgtest b/none/tests/x86/insn_mmxext.vgtest +index ad48b6e..e3627d6 100644 +--- a/none/tests/x86/insn_mmxext.vgtest ++++ b/none/tests/x86/insn_mmxext.vgtest +@@ -1,3 +1,4 @@ + prog: ../../../none/tests/x86/insn_mmxext +-prereq: ../../../tests/x86_amd64_features x86-mmxext ++# mmxext is an old AMD subset of sse1, so either will do. ++prereq: ../../../tests/x86_amd64_features x86-mmxext || ../../../tests/x86_amd64_features x86-sse + vgopts: -q diff --git a/valgrind.spec b/valgrind.spec index 090dccc..b35f3d8 100644 --- a/valgrind.spec +++ b/valgrind.spec @@ -188,6 +188,9 @@ Patch48: valgrind-3.8.1-power-isa-205-deprecation.patch # KDE#310931 message-security assist instruction extension not implemented Patch49: valgrind-3.8.1-s390-STFLE.patch +# KDE#323713 Support mmxext (integer sse) subset on i386 (athlon) +Patch50: valgrind-3.8.1-mmxext.patch + %ifarch x86_64 ppc64 # Ensure glibc{,-devel} is installed for both multilib arches BuildRequires: /lib/libc.so.6 /usr/lib/libc.so /lib64/libc.so.6 /usr/lib64/libc.so @@ -337,6 +340,7 @@ touch ./memcheck/tests/linux/getregset.stderr.exp chmod 755 tests/check_isa-2_07_cap %patch48 -p1 %patch49 -p1 +%patch50 -p1 # These tests go into an endless loop on ARM # There is a __sync_add_and_fetch in the testcase. @@ -501,6 +505,7 @@ echo ===============END TESTING=============== * Thu Sep 05 2013 Mark Wielaard - Fix power_ISA2_05 testcase (valgrind-3.8.1-power-isa-205-deprecation.patch) - Fix ppc32 make check build (valgrind-3.8.1-initial-power-isa-207.patch) +- Add valgrind-3.8.1-mmxext.patch * Wed Aug 21 2013 Mark Wielaard - 3.8.1-26 - Allow building against glibc 2.18. (#999169)