From dfed9866524f5d66744b23f0d64c698cbaac8540 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 11 Jan 2022 12:59:54 -0500 Subject: [PATCH] import valgrind-3.18.1-6.el9 --- ...algrind-3.18.1-amd64-more-spec-rules.patch | 105 + .../valgrind-3.18.1-arm64-atomic-align.patch | 163 ++ ...valgrind-3.18.1-arm64-doubleword-cas.patch | 121 ++ .../valgrind-3.18.1-arm64-ldaxp-stlxp.patch | 1440 +++++++++++++ SOURCES/valgrind-3.18.1-condvar.patch | 284 +++ .../valgrind-3.18.1-demangle-namespace.patch | 35 + SOURCES/valgrind-3.18.1-dhat-tests-copy.patch | 20 + ...algrind-3.18.1-gdbserver_tests-hwcap.patch | 25 + SOURCES/valgrind-3.18.1-ppc-pstq-tests.patch | 1876 +++++++++++++++++ SOURCES/valgrind-3.18.1-ppc-pstq.patch | 47 + ...algrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch | 60 + SOURCES/valgrind-3.18.1-rseq-enosys.patch | 180 ++ .../valgrind-3.18.1-rust-v0-demangle.patch | 137 ++ SOURCES/valgrind-3.18.1-s390x-EXRL.patch | 549 +++++ SOURCES/valgrind-3.18.1-s390x-vdso.patch | 28 + SPECS/valgrind.spec | 86 +- 16 files changed, 5155 insertions(+), 1 deletion(-) create mode 100644 SOURCES/valgrind-3.18.1-amd64-more-spec-rules.patch create mode 100644 SOURCES/valgrind-3.18.1-arm64-atomic-align.patch create mode 100644 SOURCES/valgrind-3.18.1-arm64-doubleword-cas.patch create mode 100644 SOURCES/valgrind-3.18.1-arm64-ldaxp-stlxp.patch create mode 100644 SOURCES/valgrind-3.18.1-condvar.patch create mode 100644 SOURCES/valgrind-3.18.1-demangle-namespace.patch create mode 100644 SOURCES/valgrind-3.18.1-dhat-tests-copy.patch create mode 100644 SOURCES/valgrind-3.18.1-gdbserver_tests-hwcap.patch create mode 100644 SOURCES/valgrind-3.18.1-ppc-pstq-tests.patch create mode 100644 SOURCES/valgrind-3.18.1-ppc-pstq.patch create mode 100644 SOURCES/valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch create mode 100644 SOURCES/valgrind-3.18.1-rseq-enosys.patch create mode 100644 SOURCES/valgrind-3.18.1-rust-v0-demangle.patch create mode 100644 SOURCES/valgrind-3.18.1-s390x-EXRL.patch create mode 100644 SOURCES/valgrind-3.18.1-s390x-vdso.patch diff --git a/SOURCES/valgrind-3.18.1-amd64-more-spec-rules.patch b/SOURCES/valgrind-3.18.1-amd64-more-spec-rules.patch new file mode 100644 index 0000000..87794ee --- /dev/null +++ b/SOURCES/valgrind-3.18.1-amd64-more-spec-rules.patch @@ -0,0 +1,105 @@ +commit 595341b150312d2407bd43304449bf39ec3e1fa8 +Author: Julian Seward +Date: Sat Nov 13 19:59:07 2021 +0100 + + amd64 front end: add more spec rules: + + S after SHRQ + Z after SHLQ + NZ after SHLQ + Z after SHLL + S after SHLL + + The lack of at least one of these was observed to cause occasional false + positives in Memcheck. + + Plus add commented-out cases so as to complete the set of 12 rules + {Z,NZ,S,NS} after {SHRQ,SHLQ,SHLL}. The commented-out ones are commented + out because I so far didn't find any use cases for them. + +diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c +index 9d61e7a0f..ba71c1b62 100644 +--- a/VEX/priv/guest_amd64_helpers.c ++++ b/VEX/priv/guest_amd64_helpers.c +@@ -1823,16 +1823,26 @@ IRExpr* guest_amd64_spechelper ( const HChar* function_name, + /*---------------- SHRQ ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondZ)) { +- /* SHRQ, then Z --> test dep1 == 0 */ ++ /* SHRQ, then Z --> test result[63:0] == 0 */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); + } + if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNZ)) { +- /* SHRQ, then NZ --> test dep1 != 0 */ ++ /* SHRQ, then NZ --> test result[63:0] != 0 */ + return unop(Iop_1Uto64, + binop(Iop_CmpNE64, cc_dep1, mkU64(0))); + } + ++ if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondS)) { ++ /* SHRQ, then S --> (ULong)result[63] (result is in dep1) */ ++ return binop(Iop_Shr64, cc_dep1, mkU8(63)); ++ } ++ // No known test case for this, hence disabled: ++ //if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNS)) { ++ // /* SHRQ, then NS --> (ULong) ~ result[63] */ ++ // vassert(0); ++ //} ++ + /*---------------- SHRL ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondZ)) { +@@ -1881,6 +1891,52 @@ IRExpr* guest_amd64_spechelper ( const HChar* function_name, + // mkU32(0))); + //} + ++ /*---------------- SHLQ ----------------*/ ++ ++ if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondZ)) { ++ /* SHLQ, then Z --> test dep1 == 0 */ ++ return unop(Iop_1Uto64, ++ binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); ++ } ++ if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondNZ)) { ++ /* SHLQ, then NZ --> test dep1 != 0 */ ++ return unop(Iop_1Uto64, ++ binop(Iop_CmpNE64, cc_dep1, mkU64(0))); ++ } ++ ++ //if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondS)) { ++ // /* SHLQ, then S --> (ULong)result[63] */ ++ // vassert(0); ++ //} ++ //if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondNS)) { ++ // /* SHLQ, then NS --> (ULong) ~ result[63] */ ++ // vassert(0); ++ //} ++ ++ /*---------------- SHLL ----------------*/ ++ ++ if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondZ)) { ++ /* SHLL, then Z --> test result[31:0] == 0 */ ++ return unop(Iop_1Uto64, ++ binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1), ++ mkU32(0))); ++ } ++ //if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondNZ)) { ++ // /* SHLL, then NZ --> test dep1 != 0 */ ++ // vassert(0); ++ //} ++ ++ if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondS)) { ++ /* SHLL, then S --> (ULong)result[31] */ ++ return binop(Iop_And64, ++ binop(Iop_Shr64, cc_dep1, mkU8(31)), ++ mkU64(1)); ++ } ++ //if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondNS)) { ++ // /* SHLL, then NS --> (ULong) ~ result[31] */ ++ // vassert(0); ++ //} ++ + /*---------------- COPY ----------------*/ + /* This can happen, as a result of amd64 FP compares: "comisd ... ; + jbe" for example. */ diff --git a/SOURCES/valgrind-3.18.1-arm64-atomic-align.patch b/SOURCES/valgrind-3.18.1-arm64-atomic-align.patch new file mode 100644 index 0000000..8cce35f --- /dev/null +++ b/SOURCES/valgrind-3.18.1-arm64-atomic-align.patch @@ -0,0 +1,163 @@ +commit 2be719921e700a9ac9b85f470ed87cb8adf8151b +Author: Julian Seward +Date: Sat Nov 13 09:27:01 2021 +0100 + + Bug 445415 - arm64 front end: alignment checks missing for atomic instructions. + + For the arm64 front end, none of the atomic instructions have address + alignment checks included in their IR. They all should. The effect of + missing alignment checks in the IR is that, since this IR will in most cases + be translated back to atomic instructions in the back end, we will get + alignment traps (SIGBUS) on the host side and not on the guest side, which is + (very) incorrect behaviour of the simulation. + + +diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c +index ee018c6a9..16a7e075f 100644 +--- a/VEX/priv/guest_arm64_toIR.c ++++ b/VEX/priv/guest_arm64_toIR.c +@@ -4833,6 +4833,34 @@ static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr ) + } + + ++/* Generate a SIGBUS followed by a restart of the current instruction if ++ `effective_addr` is `align`-aligned. This is required behaviour for atomic ++ instructions. This assumes that guest_RIP_curr_instr is set correctly! ++ ++ This is hardwired to generate SIGBUS because so far the only supported arm64 ++ (arm64-linux) does that. Should we need to later extend it to generate some ++ other signal, use the same scheme as with gen_SIGNAL_if_not_XX_aligned in ++ guest_amd64_toIR.c. */ ++static ++void gen_SIGBUS_if_not_XX_aligned ( IRTemp effective_addr, ULong align ) ++{ ++ if (align == 1) { ++ return; ++ } ++ vassert(align == 16 || align == 8 || align == 4 || align == 2); ++ stmt( ++ IRStmt_Exit( ++ binop(Iop_CmpNE64, ++ binop(Iop_And64,mkexpr(effective_addr),mkU64(align-1)), ++ mkU64(0)), ++ Ijk_SigBUS, ++ IRConst_U64(guest_PC_curr_instr), ++ OFFB_PC ++ ) ++ ); ++} ++ ++ + /* Generate a "standard 7" name, from bitQ and size. But also + allow ".1d" since that's occasionally useful. */ + static +@@ -6670,7 +6698,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + + IRTemp ea = newTemp(Ity_I64); + assign(ea, getIReg64orSP(nn)); +- /* FIXME generate check that ea is szB-aligned */ ++ gen_SIGBUS_if_not_XX_aligned(ea, szB); + + if (isLD && ss == BITS5(1,1,1,1,1)) { + IRTemp res = newTemp(ty); +@@ -6803,7 +6831,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + + IRTemp ea = newTemp(Ity_I64); + assign(ea, getIReg64orSP(nn)); +- /* FIXME generate check that ea is 2*elemSzB-aligned */ ++ gen_SIGBUS_if_not_XX_aligned(ea, fullSzB); + + if (isLD && ss == BITS5(1,1,1,1,1)) { + if (abiinfo->guest__use_fallback_LLSC) { +@@ -7044,7 +7072,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + + IRTemp ea = newTemp(Ity_I64); + assign(ea, getIReg64orSP(nn)); +- /* FIXME generate check that ea is szB-aligned */ ++ gen_SIGBUS_if_not_XX_aligned(ea, szB); + + if (isLD) { + IRTemp res = newTemp(ty); +@@ -7159,6 +7187,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + + IRTemp ea = newTemp(Ity_I64); + assign(ea, getIReg64orSP(nn)); ++ gen_SIGBUS_if_not_XX_aligned(ea, szB); + + // Insert barrier before loading for acquire and acquire-release variants: + // A and AL. +@@ -7266,6 +7295,10 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + IRType ty = integerIRTypeOfSize(szB); + Bool is64 = szB == 8; + ++ IRTemp ea = newTemp(Ity_I64); ++ assign(ea, getIReg64orSP(nn)); ++ gen_SIGBUS_if_not_XX_aligned(ea, szB); ++ + IRExpr *exp = narrowFrom64(ty, getIReg64orZR(ss)); + IRExpr *new = narrowFrom64(ty, getIReg64orZR(tt)); + +@@ -7275,7 +7308,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + // Store the result back if LHS remains unchanged in memory. + IRTemp old = newTemp(ty); + stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old, +- Iend_LE, getIReg64orSP(nn), ++ Iend_LE, mkexpr(ea), + /*expdHi*/NULL, exp, + /*dataHi*/NULL, new)) ); + +@@ -7307,6 +7340,10 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + if ((ss & 0x1) || (tt & 0x1)) { + /* undefined; fall through */ + } else { ++ IRTemp ea = newTemp(Ity_I64); ++ assign(ea, getIReg64orSP(nn)); ++ gen_SIGBUS_if_not_XX_aligned(ea, is64 ? 16 : 8); ++ + IRExpr *expLo = getIRegOrZR(is64, ss); + IRExpr *expHi = getIRegOrZR(is64, ss + 1); + IRExpr *newLo = getIRegOrZR(is64, tt); +@@ -7318,7 +7355,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + stmt(IRStmt_MBE(Imbe_Fence)); + + stmt( IRStmt_CAS(mkIRCAS(oldHi, oldLo, +- Iend_LE, getIReg64orSP(nn), ++ Iend_LE, mkexpr(ea), + expHi, expLo, + newHi, newLo)) ); + +diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c +index b65e27db4..39c6aaa46 100644 +--- a/VEX/priv/host_arm64_defs.c ++++ b/VEX/priv/host_arm64_defs.c +@@ -4033,6 +4033,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, + case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break; + case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; + case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; ++ case Ijk_SigBUS: trcval = VEX_TRC_JMP_SIGBUS; break; + //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; + case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; + /* We don't expect to see the following being assisted. */ +diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c +index 094e7e74b..82cb2d78c 100644 +--- a/VEX/priv/host_arm64_isel.c ++++ b/VEX/priv/host_arm64_isel.c +@@ -4483,6 +4483,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) + case Ijk_InvalICache: + case Ijk_FlushDCache: + case Ijk_SigTRAP: ++ case Ijk_SigBUS: + case Ijk_Yield: { + HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); + addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, +@@ -4576,8 +4577,8 @@ static void iselNext ( ISelEnv* env, + case Ijk_InvalICache: + case Ijk_FlushDCache: + case Ijk_SigTRAP: +- case Ijk_Yield: +- { ++ case Ijk_SigBUS: ++ case Ijk_Yield: { + HReg r = iselIntExpr_R(env, next); + ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); + addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk)); diff --git a/SOURCES/valgrind-3.18.1-arm64-doubleword-cas.patch b/SOURCES/valgrind-3.18.1-arm64-doubleword-cas.patch new file mode 100644 index 0000000..7cf0bf5 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-arm64-doubleword-cas.patch @@ -0,0 +1,121 @@ +commit 7dbe2fed72886874f2eaf57dc07929542ae55b58 +Author: Julian Seward +Date: Fri Nov 12 10:40:48 2021 +0100 + + Bug 445354 - arm64 backend: incorrect code emitted for doubleword CAS. + + The sequence of instructions emitted by the arm64 backend for doubleword + compare-and-swap is incorrect. This could lead to incorrect simulation of the + AArch8.1 atomic instructions (CASP, at least). It also causes failures in the + upcoming fix for v8.0 support for LD{,A}XP/ST{,L}XP in bug 444399, at least + when running with the fallback LL/SC implementation + (`--sim-hints=fallback-llsc`, or as autoselected at startup). In the worst + case it can cause segfaulting in the generated code, because it could jump + backwards unexpectedly far. + + The problem is the sequence emitted for ARM64in_CASP: + + * the jump offsets are incorrect, both for `bne out` (x 2) and `cbnz w1, loop`. + + * using w1 to hold the success indication of the stxp instruction trashes the + previous value in x1. But the value in x1 is an output of ARM64in_CASP, + hence one of the two output registers is corrupted. That confuses any code + downstream that want to inspect those values to find out whether or not the + transaction succeeded. + + The fixes are to + + * fix the branch offsets + + * use a different register to hold the stxp success indication. w3 is a + convenient check. + +diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c +index 5dccc0495..5657bcab9 100644 +--- a/VEX/priv/host_arm64_defs.c ++++ b/VEX/priv/host_arm64_defs.c +@@ -2271,6 +2271,7 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) + addHRegUse(u, HRmWrite, hregARM64_X1()); + addHRegUse(u, HRmWrite, hregARM64_X9()); + addHRegUse(u, HRmWrite, hregARM64_X8()); ++ addHRegUse(u, HRmWrite, hregARM64_X3()); + break; + case ARM64in_MFence: + return; +@@ -4254,16 +4255,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, + + -- always: + cmp x0, x8 // EB08001F +- bne out // 540000E1 (b.ne #28 ) ++ bne out // 540000A1 + cmp x1, x9 // EB09003F +- bne out // 540000A1 (b.ne #20 ) ++ bne out // 54000061 + + -- one of: +- stxp w1, x6, x7, [x2] // C8211C46 +- stxp w1, w6, w7, [x2] // 88211C46 ++ stxp w3, x6, x7, [x2] // C8231C46 ++ stxp w3, w6, w7, [x2] // 88231C46 + + -- always: +- cbnz w1, loop // 35FFFE81 (cbnz w1, #-48 ) ++ cbnz w3, loop // 35FFFF03 + out: + */ + switch (i->ARM64in.CASP.szB) { +@@ -4277,15 +4278,15 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, + default: vassert(0); + } + *p++ = 0xEB08001F; +- *p++ = 0x540000E1; +- *p++ = 0xEB09003F; + *p++ = 0x540000A1; ++ *p++ = 0xEB09003F; ++ *p++ = 0x54000061; + switch (i->ARM64in.CASP.szB) { +- case 8: *p++ = 0xC8211C46; break; +- case 4: *p++ = 0x88211C46; break; ++ case 8: *p++ = 0xC8231C46; break; ++ case 4: *p++ = 0x88231C46; break; + default: vassert(0); + } +- *p++ = 0x35FFFE81; ++ *p++ = 0x35FFFF03; + goto done; + } + case ARM64in_MFence: { +diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h +index f0737f2c6..01fb5708e 100644 +--- a/VEX/priv/host_arm64_defs.h ++++ b/VEX/priv/host_arm64_defs.h +@@ -720,6 +720,7 @@ typedef + Int szB; /* 1, 2, 4 or 8 */ + } StrEX; + /* x1 = CAS(x3(addr), x5(expected) -> x7(new)), ++ and trashes x8 + where x1[8*szB-1 : 0] == x5[8*szB-1 : 0] indicates success, + x1[8*szB-1 : 0] != x5[8*szB-1 : 0] indicates failure. + Uses x8 as scratch (but that's not allocatable). +@@ -738,7 +739,7 @@ typedef + -- if branch taken, failure; x1[[8*szB-1 : 0] holds old value + -- attempt to store + stxr w8, x7, [x3] +- -- if store successful, x1==0, so the eor is "x1 := x5" ++ -- if store successful, x8==0 + -- if store failed, branch back and try again. + cbne w8, loop + after: +@@ -746,6 +747,12 @@ typedef + struct { + Int szB; /* 1, 2, 4 or 8 */ + } CAS; ++ /* Doubleworld CAS, 2 x 32 bit or 2 x 64 bit ++ x0(oldLSW),x1(oldMSW) ++ = DCAS(x2(addr), x4(expectedLSW),x5(expectedMSW) ++ -> x6(newLSW),x7(newMSW)) ++ and trashes x8, x9 and x3 ++ */ + struct { + Int szB; /* 4 or 8 */ + } CASP; diff --git a/SOURCES/valgrind-3.18.1-arm64-ldaxp-stlxp.patch b/SOURCES/valgrind-3.18.1-arm64-ldaxp-stlxp.patch new file mode 100644 index 0000000..d118cc6 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-arm64-ldaxp-stlxp.patch @@ -0,0 +1,1440 @@ +commit 530df882b8f60ecacaf2b9b8a719f7ea1c1d1650 +Author: Julian Seward +Date: Fri Nov 12 12:13:45 2021 +0100 + + Bug 444399 - disInstr(arm64): unhandled instruction 0xC87F2D89 (LD{,A}XP and ST{,L}XP). + + This is unfortunately a big and complex patch, to implement LD{,A}XP and + ST{,L}XP. These were omitted from the original AArch64 v8.0 implementation + for unknown reasons. + + (Background) the patch is made significantly more complex because for AArch64 + we actually have two implementations of the underlying + Load-Linked/Store-Conditional (LL/SC) machinery: a "primary" implementation, + which translates LL/SC more or less directly into IR and re-emits them at the + back end, and a "fallback" implementation that implements LL/SC "manually", by + taking advantage of the fact that V serialises thread execution, so we can + "implement" LL/SC by simulating a reservation using fields LLSC_* in the guest + state, and invalidating the reservation at every thread switch. + + (Background) the fallback scheme is needed because the primary scheme is in + violation of the ARMv8 semantics in that it can (easily) introduce extra + memory references between the LL and SC, hence on some hardware causing the + reservation to always fail and so the simulated program to wind up looping + forever. + + For these instructions, big picture: + + * for the primary implementation, we take advantage of the fact that + IRStmt_LLSC allows I128 bit transactions to be represented. Hence we bundle + up the two 64-bit data elements into an I128 (or vice versa) and present a + single I128-typed IRStmt_LLSC in the IR. In the backend, those are + re-emitted as LDXP/STXP respectively. For LL/SC on 32-bit register pairs, + that bundling produces a single 64-bit item, and so the existing LL/SC + backend machinery handles it. The effect is that a doubleword 32-bit LL/SC + in the front end translates into a single 64-bit LL/SC in the back end. + Overall, though, the implementation is straightforward. + + * for the fallback implementation, it is necessary to extend the guest state + field `guest_LLSC_DATA` to represent a 128-bit transaction, by splitting it + into _DATA_LO64 and DATA_HI64. Then, the implementation is an exact + analogue of the fallback implementation for single-word LL/SC. It takes + advantage of the fact that the backend already supports 128-bit CAS, as + fixed in bug 445354. As with the primary implementation, doubleword 32-bit + LL/SC is bundled into a single 64-bit transaction. + + Detailed changes: + + * new arm64 guest state fields LLSC_DATA_LO64/LLSC_DATA_LO64 to replace + guest_LLSC_DATA + + * (ridealong fix) arm64 front end: a fix to a minor and harmless decoding bug + for the single-word LDX/STX case. + + * arm64 front end: IR generation for LD{,A}XP/ST{,L}XP: tedious and + longwinded, but per comments above, an exact(ish) analogue of the singleword + case + + * arm64 backend: new insns ARM64Instr_LdrEXP / ARM64Instr_StrEXP to wrap up 2 + x 64 exclusive loads/stores. Per comments above, there's no need to handle + the 2 x 32 case. + + * arm64 isel: translate I128-typed IRStmt_LLSC into the above two insns + + * arm64 isel: some auxiliary bits and pieces needed to handle I128 values; + this is standard doubleword isel stuff + + * arm64 isel: (ridealong fix): Ist_CAS: check for endianness of the CAS! + + * arm64 isel: (ridealong) a couple of formatting fixes + + * IR infrastructure: add support for I128 constants, done the same as V128 + constants + + * memcheck: handle shadow loads and stores for I128 values + + * testcase: memcheck/tests/atomic_incs.c: on arm64, also test 128-bit atomic + addition, to check we really have atomicity right + + * testcase: new test none/tests/arm64/ldxp_stxp.c, tests operation but not + atomicity. (Smoke test). + +diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c +index 12a1c5978..ee018c6a9 100644 +--- a/VEX/priv/guest_arm64_toIR.c ++++ b/VEX/priv/guest_arm64_toIR.c +@@ -1184,9 +1184,10 @@ static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e ) + #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART) + #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN) + +-#define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE) +-#define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR) +-#define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA) ++#define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE) ++#define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR) ++#define OFFB_LLSC_DATA_LO64 offsetof(VexGuestARM64State,guest_LLSC_DATA_LO64) ++#define OFFB_LLSC_DATA_HI64 offsetof(VexGuestARM64State,guest_LLSC_DATA_HI64) + + + /* ---------------- Integer registers ---------------- */ +@@ -6652,7 +6653,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while() + has to do this bit) + */ +- if (INSN(29,23) == BITS7(0,0,1,0,0,0,0) ++ if (INSN(29,24) == BITS6(0,0,1,0,0,0) + && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0) + && INSN(14,10) == BITS5(1,1,1,1,1)) { + UInt szBlg2 = INSN(31,30); +@@ -6678,7 +6679,8 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + // if it faults. + IRTemp loaded_data64 = newTemp(Ity_I64); + assign(loaded_data64, widenUto64(ty, loadLE(ty, mkexpr(ea)))); +- stmt( IRStmt_Put( OFFB_LLSC_DATA, mkexpr(loaded_data64) )); ++ stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64, mkexpr(loaded_data64) )); ++ stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64, mkU64(0) )); + stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) )); + stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(szB) )); + putIReg64orZR(tt, mkexpr(loaded_data64)); +@@ -6729,7 +6731,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + )); + // Fail if the data doesn't match the LL data + IRTemp llsc_data64 = newTemp(Ity_I64); +- assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64)); ++ assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64)); + stmt( IRStmt_Exit( + binop(Iop_CmpNE64, widenUto64(ty, loadLE(ty, mkexpr(ea))), + mkexpr(llsc_data64)), +@@ -6771,6 +6773,257 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + /* else fall through */ + } + ++ /* -------------------- LD{,A}XP -------------------- */ ++ /* -------------------- ST{,L}XP -------------------- */ ++ /* 31 30 29 23 20 15 14 9 4 ++ 1 sz 001000 011 11111 0 t2 n t1 LDXP Rt1, Rt2, [Xn|SP] ++ 1 sz 001000 011 11111 1 t2 n t1 LDAXP Rt1, Rt2, [Xn|SP] ++ 1 sz 001000 001 s 0 t2 n t1 STXP Ws, Rt1, Rt2, [Xn|SP] ++ 1 sz 001000 001 s 1 t2 n t1 STLXP Ws, Rt1, Rt2, [Xn|SP] ++ */ ++ /* See just above, "LD{,A}X{R,RH,RB} / ST{,L}X{R,RH,RB}", for detailed ++ comments about this implementation. Note the 'sz' field here is only 1 ++ bit; above, it is 2 bits, and has a different encoding. ++ */ ++ if (INSN(31,31) == 1 ++ && INSN(29,24) == BITS6(0,0,1,0,0,0) ++ && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,1)) { ++ Bool elemIs64 = INSN(30,30) == 1; ++ Bool isLD = INSN(22,22) == 1; ++ Bool isAcqOrRel = INSN(15,15) == 1; ++ UInt ss = INSN(20,16); ++ UInt tt2 = INSN(14,10); ++ UInt nn = INSN(9,5); ++ UInt tt1 = INSN(4,0); ++ ++ UInt elemSzB = elemIs64 ? 8 : 4; ++ UInt fullSzB = 2 * elemSzB; ++ IRType elemTy = integerIRTypeOfSize(elemSzB); ++ IRType fullTy = integerIRTypeOfSize(fullSzB); ++ ++ IRTemp ea = newTemp(Ity_I64); ++ assign(ea, getIReg64orSP(nn)); ++ /* FIXME generate check that ea is 2*elemSzB-aligned */ ++ ++ if (isLD && ss == BITS5(1,1,1,1,1)) { ++ if (abiinfo->guest__use_fallback_LLSC) { ++ // Fallback implementation of LL. ++ // Do the load first so we don't update any guest state if it ++ // faults. Assumes little-endian guest. ++ if (fullTy == Ity_I64) { ++ vassert(elemSzB == 4); ++ IRTemp loaded_data64 = newTemp(Ity_I64); ++ assign(loaded_data64, loadLE(fullTy, mkexpr(ea))); ++ stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64, mkexpr(loaded_data64) )); ++ stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64, mkU64(0) )); ++ stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) )); ++ stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(8) )); ++ putIReg64orZR(tt1, unop(Iop_32Uto64, ++ unop(Iop_64to32, ++ mkexpr(loaded_data64)))); ++ putIReg64orZR(tt2, unop(Iop_32Uto64, ++ unop(Iop_64HIto32, ++ mkexpr(loaded_data64)))); ++ } else { ++ vassert(elemSzB == 8 && fullTy == Ity_I128); ++ IRTemp loaded_data128 = newTemp(Ity_I128); ++ // Hack: do the load as V128 rather than I128 so as to avoid ++ // having to implement I128 loads in the arm64 back end. ++ assign(loaded_data128, unop(Iop_ReinterpV128asI128, ++ loadLE(Ity_V128, mkexpr(ea)))); ++ IRTemp loaded_data_lo64 = newTemp(Ity_I64); ++ IRTemp loaded_data_hi64 = newTemp(Ity_I64); ++ assign(loaded_data_lo64, unop(Iop_128to64, ++ mkexpr(loaded_data128))); ++ assign(loaded_data_hi64, unop(Iop_128HIto64, ++ mkexpr(loaded_data128))); ++ stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64, ++ mkexpr(loaded_data_lo64) )); ++ stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64, ++ mkexpr(loaded_data_hi64) )); ++ stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) )); ++ stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(16) )); ++ putIReg64orZR(tt1, mkexpr(loaded_data_lo64)); ++ putIReg64orZR(tt2, mkexpr(loaded_data_hi64)); ++ } ++ } else { ++ // Non-fallback implementation of LL. ++ IRTemp res = newTemp(fullTy); // I64 or I128 ++ stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/)); ++ // Assuming a little-endian guest here. Rt1 goes at the lower ++ // address, so it must live in the least significant half of `res`. ++ IROp opGetLO = fullTy == Ity_I128 ? Iop_128to64 : Iop_64to32; ++ IROp opGetHI = fullTy == Ity_I128 ? Iop_128HIto64 : Iop_64HIto32; ++ putIReg64orZR(tt1, widenUto64(elemTy, unop(opGetLO, mkexpr(res)))); ++ putIReg64orZR(tt2, widenUto64(elemTy, unop(opGetHI, mkexpr(res)))); ++ } ++ if (isAcqOrRel) { ++ stmt(IRStmt_MBE(Imbe_Fence)); ++ } ++ DIP("ld%sxp %s, %s, [%s] %s\n", ++ isAcqOrRel ? (isLD ? "a" : "l") : "", ++ nameIRegOrZR(elemSzB == 8, tt1), ++ nameIRegOrZR(elemSzB == 8, tt2), ++ nameIReg64orSP(nn), ++ abiinfo->guest__use_fallback_LLSC ++ ? "(fallback implementation)" : ""); ++ return True; ++ } ++ if (!isLD) { ++ if (isAcqOrRel) { ++ stmt(IRStmt_MBE(Imbe_Fence)); ++ } ++ if (abiinfo->guest__use_fallback_LLSC) { ++ // Fallback implementation of SC. ++ // This is really ugly, since we don't have any way to do ++ // proper if-then-else. First, set up as if the SC failed, ++ // and jump forwards if it really has failed. ++ ++ // Continuation address ++ IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4); ++ ++ // "the SC failed". Any non-zero value means failure. ++ putIReg64orZR(ss, mkU64(1)); ++ ++ IRTemp tmp_LLsize = newTemp(Ity_I64); ++ assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64)); ++ stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction" ++ )); ++ // Fail if no or wrong-size transaction ++ vassert((fullSzB == 8 && fullTy == Ity_I64) ++ || (fullSzB == 16 && fullTy == Ity_I128)); ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(fullSzB)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ // Fail if the address doesn't match the LL address ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, mkexpr(ea), ++ IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ // The data to be stored. ++ IRTemp store_data = newTemp(fullTy); ++ if (fullTy == Ity_I64) { ++ assign(store_data, ++ binop(Iop_32HLto64, ++ narrowFrom64(Ity_I32, getIReg64orZR(tt2)), ++ narrowFrom64(Ity_I32, getIReg64orZR(tt1)))); ++ } else { ++ assign(store_data, ++ binop(Iop_64HLto128, ++ getIReg64orZR(tt2), getIReg64orZR(tt1))); ++ } ++ ++ if (fullTy == Ity_I64) { ++ // 64 bit (2x32 bit) path ++ // Fail if the data in memory doesn't match the data stashed by ++ // the LL. ++ IRTemp llsc_data_lo64 = newTemp(Ity_I64); ++ assign(llsc_data_lo64, ++ IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64)); ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, loadLE(Ity_I64, mkexpr(ea)), ++ mkexpr(llsc_data_lo64)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ // Try to CAS the new value in. ++ IRTemp old = newTemp(Ity_I64); ++ IRTemp expd = newTemp(Ity_I64); ++ assign(expd, mkexpr(llsc_data_lo64)); ++ stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old, ++ Iend_LE, mkexpr(ea), ++ /*expdHi*/NULL, mkexpr(expd), ++ /*dataHi*/NULL, mkexpr(store_data) ++ ))); ++ // Fail if the CAS failed (viz, old != expd) ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, mkexpr(old), mkexpr(expd)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ } else { ++ // 128 bit (2x64 bit) path ++ // Fail if the data in memory doesn't match the data stashed by ++ // the LL. ++ IRTemp llsc_data_lo64 = newTemp(Ity_I64); ++ assign(llsc_data_lo64, ++ IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64)); ++ IRTemp llsc_data_hi64 = newTemp(Ity_I64); ++ assign(llsc_data_hi64, ++ IRExpr_Get(OFFB_LLSC_DATA_HI64, Ity_I64)); ++ IRTemp data_at_ea = newTemp(Ity_I128); ++ assign(data_at_ea, ++ unop(Iop_ReinterpV128asI128, ++ loadLE(Ity_V128, mkexpr(ea)))); ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, ++ unop(Iop_128to64, mkexpr(data_at_ea)), ++ mkexpr(llsc_data_lo64)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, ++ unop(Iop_128HIto64, mkexpr(data_at_ea)), ++ mkexpr(llsc_data_hi64)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ // Try to CAS the new value in. ++ IRTemp old_lo64 = newTemp(Ity_I64); ++ IRTemp old_hi64 = newTemp(Ity_I64); ++ IRTemp expd_lo64 = newTemp(Ity_I64); ++ IRTemp expd_hi64 = newTemp(Ity_I64); ++ IRTemp store_data_lo64 = newTemp(Ity_I64); ++ IRTemp store_data_hi64 = newTemp(Ity_I64); ++ assign(expd_lo64, mkexpr(llsc_data_lo64)); ++ assign(expd_hi64, mkexpr(llsc_data_hi64)); ++ assign(store_data_lo64, unop(Iop_128to64, mkexpr(store_data))); ++ assign(store_data_hi64, unop(Iop_128HIto64, mkexpr(store_data))); ++ stmt( IRStmt_CAS(mkIRCAS(old_hi64, old_lo64, ++ Iend_LE, mkexpr(ea), ++ mkexpr(expd_hi64), mkexpr(expd_lo64), ++ mkexpr(store_data_hi64), ++ mkexpr(store_data_lo64) ++ ))); ++ // Fail if the CAS failed (viz, old != expd) ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, mkexpr(old_lo64), mkexpr(expd_lo64)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, mkexpr(old_hi64), mkexpr(expd_hi64)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ } ++ // Otherwise we succeeded (!) ++ putIReg64orZR(ss, mkU64(0)); ++ } else { ++ // Non-fallback implementation of SC. ++ IRTemp res = newTemp(Ity_I1); ++ IRExpr* dataLO = narrowFrom64(elemTy, getIReg64orZR(tt1)); ++ IRExpr* dataHI = narrowFrom64(elemTy, getIReg64orZR(tt2)); ++ IROp opMerge = fullTy == Ity_I128 ? Iop_64HLto128 : Iop_32HLto64; ++ IRExpr* data = binop(opMerge, dataHI, dataLO); ++ // Assuming a little-endian guest here. Rt1 goes at the lower ++ // address, so it must live in the least significant half of `data`. ++ stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data)); ++ /* IR semantics: res is 1 if store succeeds, 0 if it fails. ++ Need to set rS to 1 on failure, 0 on success. */ ++ putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)), ++ mkU64(1))); ++ } ++ DIP("st%sxp %s, %s, %s, [%s] %s\n", ++ isAcqOrRel ? (isLD ? "a" : "l") : "", ++ nameIRegOrZR(False, ss), ++ nameIRegOrZR(elemSzB == 8, tt1), ++ nameIRegOrZR(elemSzB == 8, tt2), ++ nameIReg64orSP(nn), ++ abiinfo->guest__use_fallback_LLSC ++ ? "(fallback implementation)" : ""); ++ return True; ++ } ++ /* else fall through */ ++ } ++ + /* ------------------ LDA{R,RH,RB} ------------------ */ + /* ------------------ STL{R,RH,RB} ------------------ */ + /* 31 29 23 20 14 9 4 +diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c +index 5657bcab9..b65e27db4 100644 +--- a/VEX/priv/host_arm64_defs.c ++++ b/VEX/priv/host_arm64_defs.c +@@ -1059,6 +1059,16 @@ ARM64Instr* ARM64Instr_StrEX ( Int szB ) { + vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1); + return i; + } ++ARM64Instr* ARM64Instr_LdrEXP ( void ) { ++ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); ++ i->tag = ARM64in_LdrEXP; ++ return i; ++} ++ARM64Instr* ARM64Instr_StrEXP ( void ) { ++ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); ++ i->tag = ARM64in_StrEXP; ++ return i; ++} + ARM64Instr* ARM64Instr_CAS ( Int szB ) { + ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); + i->tag = ARM64in_CAS; +@@ -1699,12 +1709,19 @@ void ppARM64Instr ( const ARM64Instr* i ) { + sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w'); + return; + } ++ case ARM64in_LdrEXP: ++ vex_printf("ldxp x2, x3, [x4]"); ++ return; ++ case ARM64in_StrEXP: ++ vex_printf("stxp w0, x2, x3, [x4]"); ++ return; + case ARM64in_CAS: { + vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB); + return; + } + case ARM64in_CASP: { +- vex_printf("x0,x1 = casp(%dbit)(x2, x4,x5 -> x6,x7)", 8 * i->ARM64in.CASP.szB); ++ vex_printf("x0,x1 = casp(2x%dbit)(x2, x4,x5 -> x6,x7)", ++ 8 * i->ARM64in.CASP.szB); + return; + } + case ARM64in_MFence: +@@ -2253,6 +2270,17 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) + addHRegUse(u, HRmWrite, hregARM64_X0()); + addHRegUse(u, HRmRead, hregARM64_X2()); + return; ++ case ARM64in_LdrEXP: ++ addHRegUse(u, HRmRead, hregARM64_X4()); ++ addHRegUse(u, HRmWrite, hregARM64_X2()); ++ addHRegUse(u, HRmWrite, hregARM64_X3()); ++ return; ++ case ARM64in_StrEXP: ++ addHRegUse(u, HRmRead, hregARM64_X4()); ++ addHRegUse(u, HRmWrite, hregARM64_X0()); ++ addHRegUse(u, HRmRead, hregARM64_X2()); ++ addHRegUse(u, HRmRead, hregARM64_X3()); ++ return; + case ARM64in_CAS: + addHRegUse(u, HRmRead, hregARM64_X3()); + addHRegUse(u, HRmRead, hregARM64_X5()); +@@ -2571,6 +2599,10 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) + return; + case ARM64in_StrEX: + return; ++ case ARM64in_LdrEXP: ++ return; ++ case ARM64in_StrEXP: ++ return; + case ARM64in_CAS: + return; + case ARM64in_CASP: +@@ -4167,6 +4199,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, + } + goto bad; + } ++ case ARM64in_LdrEXP: { ++ // 820C7FC8 ldxp x2, x3, [x4] ++ *p++ = 0xC87F0C82; ++ goto done; ++ } ++ case ARM64in_StrEXP: { ++ // 820C20C8 stxp w0, x2, x3, [x4] ++ *p++ = 0xC8200C82; ++ goto done; ++ } + case ARM64in_CAS: { + /* This isn't simple. For an explanation see the comment in + host_arm64_defs.h on the definition of ARM64Instr case CAS. +diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h +index 01fb5708e..dc686dff7 100644 +--- a/VEX/priv/host_arm64_defs.h ++++ b/VEX/priv/host_arm64_defs.h +@@ -509,8 +509,10 @@ typedef + ARM64in_AddToSP, /* move SP by small, signed constant */ + ARM64in_FromSP, /* move SP to integer register */ + ARM64in_Mul, +- ARM64in_LdrEX, +- ARM64in_StrEX, ++ ARM64in_LdrEX, /* load exclusive, single register */ ++ ARM64in_StrEX, /* store exclusive, single register */ ++ ARM64in_LdrEXP, /* load exclusive, register pair, 2x64-bit only */ ++ ARM64in_StrEXP, /* store exclusive, register pair, 2x64-bit only */ + ARM64in_CAS, + ARM64in_CASP, + ARM64in_MFence, +@@ -719,6 +721,12 @@ typedef + struct { + Int szB; /* 1, 2, 4 or 8 */ + } StrEX; ++ /* LDXP x2, x3, [x4]. This is 2x64-bit only. */ ++ struct { ++ } LdrEXP; ++ /* STXP w0, x2, x3, [x4]. This is 2x64-bit only. */ ++ struct { ++ } StrEXP; + /* x1 = CAS(x3(addr), x5(expected) -> x7(new)), + and trashes x8 + where x1[8*szB-1 : 0] == x5[8*szB-1 : 0] indicates success, +@@ -1037,6 +1045,8 @@ extern ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR, + ARM64MulOp op ); + extern ARM64Instr* ARM64Instr_LdrEX ( Int szB ); + extern ARM64Instr* ARM64Instr_StrEX ( Int szB ); ++extern ARM64Instr* ARM64Instr_LdrEXP ( void ); ++extern ARM64Instr* ARM64Instr_StrEXP ( void ); + extern ARM64Instr* ARM64Instr_CAS ( Int szB ); + extern ARM64Instr* ARM64Instr_CASP ( Int szB ); + extern ARM64Instr* ARM64Instr_MFence ( void ); +diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c +index 4b1d8c846..094e7e74b 100644 +--- a/VEX/priv/host_arm64_isel.c ++++ b/VEX/priv/host_arm64_isel.c +@@ -196,9 +196,9 @@ static HReg iselCondCode_R ( ISelEnv* env, IRExpr* e ); + static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); + static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); + +-static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, ++static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo, + ISelEnv* env, IRExpr* e ); +-static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo, ++static void iselInt128Expr ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo, + ISelEnv* env, IRExpr* e ); + + static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); +@@ -1759,9 +1759,12 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) + + /* AND/OR/XOR(e1, e2) (for any e1, e2) */ + switch (e->Iex.Binop.op) { +- case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop; +- case Iop_Or64: case Iop_Or32: case Iop_Or16: lop = ARM64lo_OR; goto log_binop; +- case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop; ++ case Iop_And64: case Iop_And32: ++ lop = ARM64lo_AND; goto log_binop; ++ case Iop_Or64: case Iop_Or32: case Iop_Or16: ++ lop = ARM64lo_OR; goto log_binop; ++ case Iop_Xor64: case Iop_Xor32: ++ lop = ARM64lo_XOR; goto log_binop; + log_binop: { + HReg dst = newVRegI(env); + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); +@@ -2013,6 +2016,11 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) + iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); + return rHi; /* and abandon rLo */ + } ++ case Iop_128to64: { ++ HReg rHi, rLo; ++ iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); ++ return rLo; /* and abandon rHi */ ++ } + case Iop_8Sto32: case Iop_8Sto64: { + IRExpr* arg = e->Iex.Unop.arg; + HReg src = iselIntExpr_R(env, arg); +@@ -2185,13 +2193,19 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) + } + return dst; + } ++ case Iop_64HIto32: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ addInstr(env, ARM64Instr_Shift(dst, src, ARM64RI6_I6(32), ++ ARM64sh_SHR)); ++ return dst; ++ } + case Iop_64to32: + case Iop_64to16: + case Iop_64to8: + case Iop_32to16: + /* These are no-ops. */ + return iselIntExpr_R(env, e->Iex.Unop.arg); +- + default: + break; + } +@@ -2335,6 +2349,43 @@ static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, + vassert(e); + vassert(typeOfIRExpr(env->type_env,e) == Ity_I128); + ++ /* --------- TEMP --------- */ ++ if (e->tag == Iex_RdTmp) { ++ lookupIRTempPair(rHi, rLo, env, e->Iex.RdTmp.tmp); ++ return; ++ } ++ ++ /* --------- CONST --------- */ ++ if (e->tag == Iex_Const) { ++ IRConst* c = e->Iex.Const.con; ++ vassert(c->tag == Ico_U128); ++ if (c->Ico.U128 == 0) { ++ // The only case we need to handle (so far) ++ HReg zero = newVRegI(env); ++ addInstr(env, ARM64Instr_Imm64(zero, 0)); ++ *rHi = *rLo = zero; ++ return; ++ } ++ } ++ ++ /* --------- UNARY ops --------- */ ++ if (e->tag == Iex_Unop) { ++ switch (e->Iex.Unop.op) { ++ case Iop_ReinterpV128asI128: { ++ HReg dstHi = newVRegI(env); ++ HReg dstLo = newVRegI(env); ++ HReg src = iselV128Expr(env, e->Iex.Unop.arg); ++ addInstr(env, ARM64Instr_VXfromQ(dstHi, src, 1)); ++ addInstr(env, ARM64Instr_VXfromQ(dstLo, src, 0)); ++ *rHi = dstHi; ++ *rLo = dstLo; ++ return; ++ } ++ default: ++ break; ++ } ++ } ++ + /* --------- BINARY ops --------- */ + if (e->tag == Iex_Binop) { + switch (e->Iex.Binop.op) { +@@ -4086,6 +4137,14 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) + addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src)); + return; + } ++ if (ty == Ity_I128) { ++ HReg rHi, rLo, dstHi, dstLo; ++ iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); ++ lookupIRTempPair( &dstHi, &dstLo, env, tmp); ++ addInstr(env, ARM64Instr_MovI(dstHi, rHi)); ++ addInstr(env, ARM64Instr_MovI(dstLo, rLo)); ++ return; ++ } + if (ty == Ity_V128) { + HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data); + HReg dst = lookupIRTemp(env, tmp); +@@ -4183,42 +4242,67 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) + /* LL */ + IRTemp res = stmt->Ist.LLSC.result; + IRType ty = typeOfIRTemp(env->type_env, res); +- if (ty == Ity_I64 || ty == Ity_I32 ++ if (ty == Ity_I128 || ty == Ity_I64 || ty == Ity_I32 + || ty == Ity_I16 || ty == Ity_I8) { + Int szB = 0; +- HReg r_dst = lookupIRTemp(env, res); + HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); + switch (ty) { +- case Ity_I8: szB = 1; break; +- case Ity_I16: szB = 2; break; +- case Ity_I32: szB = 4; break; +- case Ity_I64: szB = 8; break; +- default: vassert(0); ++ case Ity_I8: szB = 1; break; ++ case Ity_I16: szB = 2; break; ++ case Ity_I32: szB = 4; break; ++ case Ity_I64: szB = 8; break; ++ case Ity_I128: szB = 16; break; ++ default: vassert(0); ++ } ++ if (szB == 16) { ++ HReg r_dstMSword = INVALID_HREG; ++ HReg r_dstLSword = INVALID_HREG; ++ lookupIRTempPair(&r_dstMSword, &r_dstLSword, env, res); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr)); ++ addInstr(env, ARM64Instr_LdrEXP()); ++ addInstr(env, ARM64Instr_MovI(r_dstLSword, hregARM64_X2())); ++ addInstr(env, ARM64Instr_MovI(r_dstMSword, hregARM64_X3())); ++ } else { ++ vassert(szB != 0); ++ HReg r_dst = lookupIRTemp(env, res); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr)); ++ addInstr(env, ARM64Instr_LdrEX(szB)); ++ addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2())); + } +- addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr)); +- addInstr(env, ARM64Instr_LdrEX(szB)); +- addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2())); + return; + } + goto stmt_fail; + } else { + /* SC */ + IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); +- if (tyd == Ity_I64 || tyd == Ity_I32 ++ if (tyd == Ity_I128 || tyd == Ity_I64 || tyd == Ity_I32 + || tyd == Ity_I16 || tyd == Ity_I8) { + Int szB = 0; +- HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); + HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); + switch (tyd) { +- case Ity_I8: szB = 1; break; +- case Ity_I16: szB = 2; break; +- case Ity_I32: szB = 4; break; +- case Ity_I64: szB = 8; break; +- default: vassert(0); ++ case Ity_I8: szB = 1; break; ++ case Ity_I16: szB = 2; break; ++ case Ity_I32: szB = 4; break; ++ case Ity_I64: szB = 8; break; ++ case Ity_I128: szB = 16; break; ++ default: vassert(0); ++ } ++ if (szB == 16) { ++ HReg rD_MSword = INVALID_HREG; ++ HReg rD_LSword = INVALID_HREG; ++ iselInt128Expr(&rD_MSword, ++ &rD_LSword, env, stmt->Ist.LLSC.storedata); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD_LSword)); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rD_MSword)); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA)); ++ addInstr(env, ARM64Instr_StrEXP()); ++ } else { ++ vassert(szB != 0); ++ HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD)); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA)); ++ addInstr(env, ARM64Instr_StrEX(szB)); + } +- addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD)); +- addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA)); +- addInstr(env, ARM64Instr_StrEX(szB)); + } else { + goto stmt_fail; + } +@@ -4243,10 +4327,10 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) + + /* --------- ACAS --------- */ + case Ist_CAS: { +- if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) { ++ IRCAS* cas = stmt->Ist.CAS.details; ++ if (cas->oldHi == IRTemp_INVALID && cas->end == Iend_LE) { + /* "normal" singleton CAS */ + UChar sz; +- IRCAS* cas = stmt->Ist.CAS.details; + IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); + switch (ty) { + case Ity_I64: sz = 8; break; +@@ -4281,10 +4365,9 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) + addInstr(env, ARM64Instr_MovI(rOld, rResult)); + return; + } +- else { ++ if (cas->oldHi != IRTemp_INVALID && cas->end == Iend_LE) { + /* Paired register CAS, i.e. CASP */ + UChar sz; +- IRCAS* cas = stmt->Ist.CAS.details; + IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); + switch (ty) { + case Ity_I64: sz = 8; break; +diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c +index 25566c41c..2d82c41a1 100644 +--- a/VEX/priv/ir_defs.c ++++ b/VEX/priv/ir_defs.c +@@ -76,6 +76,7 @@ void ppIRConst ( const IRConst* con ) + case Ico_U16: vex_printf( "0x%x:I16", (UInt)(con->Ico.U16)); break; + case Ico_U32: vex_printf( "0x%x:I32", (UInt)(con->Ico.U32)); break; + case Ico_U64: vex_printf( "0x%llx:I64", (ULong)(con->Ico.U64)); break; ++ case Ico_U128: vex_printf( "I128{0x%04x}", (UInt)(con->Ico.U128)); break; + case Ico_F32: u.f32 = con->Ico.F32; + vex_printf( "F32{0x%x}", u.i32); + break; +@@ -2266,6 +2267,13 @@ IRConst* IRConst_U64 ( ULong u64 ) + c->Ico.U64 = u64; + return c; + } ++IRConst* IRConst_U128 ( UShort con ) ++{ ++ IRConst* c = LibVEX_Alloc_inline(sizeof(IRConst)); ++ c->tag = Ico_U128; ++ c->Ico.U128 = con; ++ return c; ++} + IRConst* IRConst_F32 ( Float f32 ) + { + IRConst* c = LibVEX_Alloc_inline(sizeof(IRConst)); +@@ -4230,6 +4238,7 @@ IRType typeOfIRConst ( const IRConst* con ) + case Ico_U16: return Ity_I16; + case Ico_U32: return Ity_I32; + case Ico_U64: return Ity_I64; ++ case Ico_U128: return Ity_I128; + case Ico_F32: return Ity_F32; + case Ico_F32i: return Ity_F32; + case Ico_F64: return Ity_F64; +@@ -5129,7 +5138,7 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) + tyRes = typeOfIRTemp(tyenv, stmt->Ist.LLSC.result); + if (stmt->Ist.LLSC.storedata == NULL) { + /* it's a LL */ +- if (tyRes != Ity_I64 && tyRes != Ity_I32 ++ if (tyRes != Ity_I128 && tyRes != Ity_I64 && tyRes != Ity_I32 + && tyRes != Ity_I16 && tyRes != Ity_I8) + sanityCheckFail(bb,stmt,"Ist.LLSC(LL).result :: bogus"); + } else { +@@ -5137,7 +5146,7 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) + if (tyRes != Ity_I1) + sanityCheckFail(bb,stmt,"Ist.LLSC(SC).result: not :: Ity_I1"); + tyData = typeOfIRExpr(tyenv, stmt->Ist.LLSC.storedata); +- if (tyData != Ity_I64 && tyData != Ity_I32 ++ if (tyData != Ity_I128 && tyData != Ity_I64 && tyData != Ity_I32 + && tyData != Ity_I16 && tyData != Ity_I8) + sanityCheckFail(bb,stmt, + "Ist.LLSC(SC).result :: storedata bogus"); +@@ -5385,6 +5394,7 @@ Int sizeofIRType ( IRType ty ) + IRType integerIRTypeOfSize ( Int szB ) + { + switch (szB) { ++ case 16: return Ity_I128; + case 8: return Ity_I64; + case 4: return Ity_I32; + case 2: return Ity_I16; +diff --git a/VEX/pub/libvex_guest_arm64.h b/VEX/pub/libvex_guest_arm64.h +index 39b6ecdc2..91d06bd75 100644 +--- a/VEX/pub/libvex_guest_arm64.h ++++ b/VEX/pub/libvex_guest_arm64.h +@@ -157,14 +157,18 @@ typedef + note of bits 23 and 22. */ + UInt guest_FPCR; + +- /* Fallback LL/SC support. See bugs 344524 and 369459. */ +- ULong guest_LLSC_SIZE; // 0==no current transaction, else 1,2,4 or 8. ++ /* Fallback LL/SC support. See bugs 344524 and 369459. _LO64 and _HI64 ++ contain the original contents of _ADDR+0 .. _ADDR+15, but only _SIZE ++ number of bytes of it. The remaining 16-_SIZE bytes of them must be ++ zero. */ ++ ULong guest_LLSC_SIZE; // 0==no current transaction, else 1,2,4,8 or 16. + ULong guest_LLSC_ADDR; // Address of transaction. +- ULong guest_LLSC_DATA; // Original value at _ADDR, zero-extended. ++ ULong guest_LLSC_DATA_LO64; // Original value at _ADDR+0. ++ ULong guest_LLSC_DATA_HI64; // Original value at _ADDR+8. + + /* Padding to make it have an 16-aligned size */ + /* UInt pad_end_0; */ +- ULong pad_end_1; ++ /* ULong pad_end_1; */ + } + VexGuestARM64State; + +diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h +index deaa044c1..85805bb69 100644 +--- a/VEX/pub/libvex_ir.h ++++ b/VEX/pub/libvex_ir.h +@@ -269,6 +269,8 @@ typedef + Ico_U16, + Ico_U32, + Ico_U64, ++ Ico_U128, /* 128-bit restricted integer constant, ++ same encoding scheme as V128 */ + Ico_F32, /* 32-bit IEEE754 floating */ + Ico_F32i, /* 32-bit unsigned int to be interpreted literally + as a IEEE754 single value. */ +@@ -295,6 +297,7 @@ typedef + UShort U16; + UInt U32; + ULong U64; ++ UShort U128; + Float F32; + UInt F32i; + Double F64; +@@ -311,6 +314,7 @@ extern IRConst* IRConst_U8 ( UChar ); + extern IRConst* IRConst_U16 ( UShort ); + extern IRConst* IRConst_U32 ( UInt ); + extern IRConst* IRConst_U64 ( ULong ); ++extern IRConst* IRConst_U128 ( UShort ); + extern IRConst* IRConst_F32 ( Float ); + extern IRConst* IRConst_F32i ( UInt ); + extern IRConst* IRConst_F64 ( Double ); +diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c +index 919c7fae8..176c8e5cb 100644 +--- a/memcheck/mc_machine.c ++++ b/memcheck/mc_machine.c +@@ -1115,9 +1115,10 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB ) + if (o == GOF(CMSTART) && sz == 8) return -1; // untracked + if (o == GOF(CMLEN) && sz == 8) return -1; // untracked + +- if (o == GOF(LLSC_SIZE) && sz == 8) return -1; // untracked +- if (o == GOF(LLSC_ADDR) && sz == 8) return o; +- if (o == GOF(LLSC_DATA) && sz == 8) return o; ++ if (o == GOF(LLSC_SIZE) && sz == 8) return -1; // untracked ++ if (o == GOF(LLSC_ADDR) && sz == 8) return o; ++ if (o == GOF(LLSC_DATA_LO64) && sz == 8) return o; ++ if (o == GOF(LLSC_DATA_HI64) && sz == 8) return o; + + VG_(printf)("MC_(get_otrack_shadow_offset)(arm64)(off=%d,sz=%d)\n", + offset,szB); +diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c +index c6fd2653f..72ccb3c8c 100644 +--- a/memcheck/mc_translate.c ++++ b/memcheck/mc_translate.c +@@ -5497,8 +5497,11 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, + the address (shadow) to 'defined' following the test. */ + complainIfUndefined( mce, addr, guard ); + +- /* Now cook up a call to the relevant helper function, to read the +- data V bits from shadow memory. */ ++ /* Now cook up a call to the relevant helper function, to read the data V ++ bits from shadow memory. Note that I128 loads are done by pretending ++ we're doing a V128 load, and then converting the resulting V128 vbits ++ word to an I128, right at the end of this function -- see `castedToI128` ++ below. (It's only a minor hack :-) This pertains to bug 444399. */ + ty = shadowTypeV(ty); + + void* helper = NULL; +@@ -5511,6 +5514,7 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, + hname = "MC_(helperc_LOADV256le)"; + ret_via_outparam = True; + break; ++ case Ity_I128: // fallthrough. See comment above. + case Ity_V128: helper = &MC_(helperc_LOADV128le); + hname = "MC_(helperc_LOADV128le)"; + ret_via_outparam = True; +@@ -5576,7 +5580,7 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, + + /* We need to have a place to park the V bits we're just about to + read. */ +- IRTemp datavbits = newTemp(mce, ty, VSh); ++ IRTemp datavbits = newTemp(mce, ty == Ity_I128 ? Ity_V128 : ty, VSh); + + /* Here's the call. */ + IRDirty* di; +@@ -5603,7 +5607,14 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, + } + stmt( 'V', mce, IRStmt_Dirty(di) ); + +- return mkexpr(datavbits); ++ if (ty == Ity_I128) { ++ IRAtom* castedToI128 ++ = assignNew('V', mce, Ity_I128, ++ unop(Iop_ReinterpV128asI128, mkexpr(datavbits))); ++ return castedToI128; ++ } else { ++ return mkexpr(datavbits); ++ } + } + + +@@ -5631,6 +5642,7 @@ IRAtom* expr2vbits_Load ( MCEnv* mce, + case Ity_I16: + case Ity_I32: + case Ity_I64: ++ case Ity_I128: + case Ity_V128: + case Ity_V256: + return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard); +@@ -5928,6 +5940,7 @@ void do_shadow_Store ( MCEnv* mce, + c = IRConst_V256(V_BITS32_DEFINED); break; + case Ity_V128: // V128 weirdness -- used twice + c = IRConst_V128(V_BITS16_DEFINED); break; ++ case Ity_I128: c = IRConst_U128(V_BITS16_DEFINED); break; + case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break; + case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break; + case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break; +@@ -5948,6 +5961,7 @@ void do_shadow_Store ( MCEnv* mce, + switch (ty) { + case Ity_V256: /* we'll use the helper four times */ + case Ity_V128: /* we'll use the helper twice */ ++ case Ity_I128: /* we'll use the helper twice */ + case Ity_I64: helper = &MC_(helperc_STOREV64le); + hname = "MC_(helperc_STOREV64le)"; + break; +@@ -6051,9 +6065,9 @@ void do_shadow_Store ( MCEnv* mce, + stmt( 'V', mce, IRStmt_Dirty(diQ3) ); + + } +- else if (UNLIKELY(ty == Ity_V128)) { ++ else if (UNLIKELY(ty == Ity_V128 || ty == Ity_I128)) { + +- /* V128-bit case */ ++ /* V128/I128-bit case */ + /* See comment in next clause re 64-bit regparms */ + /* also, need to be careful about endianness */ + +@@ -6062,6 +6076,7 @@ void do_shadow_Store ( MCEnv* mce, + IRAtom *addrLo64, *addrHi64; + IRAtom *vdataLo64, *vdataHi64; + IRAtom *eBiasLo64, *eBiasHi64; ++ IROp opGetLO64, opGetHI64; + + if (end == Iend_LE) { + offLo64 = 0; +@@ -6071,9 +6086,17 @@ void do_shadow_Store ( MCEnv* mce, + offHi64 = 0; + } + ++ if (ty == Ity_V128) { ++ opGetLO64 = Iop_V128to64; ++ opGetHI64 = Iop_V128HIto64; ++ } else { ++ opGetLO64 = Iop_128to64; ++ opGetHI64 = Iop_128HIto64; ++ } ++ + eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64); + addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) ); +- vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata)); ++ vdataLo64 = assignNew('V', mce, Ity_I64, unop(opGetLO64, vdata)); + diLo64 = unsafeIRDirty_0_N( + 1/*regparms*/, + hname, VG_(fnptr_to_fnentry)( helper ), +@@ -6081,7 +6104,7 @@ void do_shadow_Store ( MCEnv* mce, + ); + eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64); + addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) ); +- vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata)); ++ vdataHi64 = assignNew('V', mce, Ity_I64, unop(opGetHI64, vdata)); + diHi64 = unsafeIRDirty_0_N( + 1/*regparms*/, + hname, VG_(fnptr_to_fnentry)( helper ), +@@ -6888,7 +6911,7 @@ static void do_shadow_LLSC ( MCEnv* mce, + /* Just treat this as a normal load, followed by an assignment of + the value to .result. */ + /* Stay sane */ +- tl_assert(resTy == Ity_I64 || resTy == Ity_I32 ++ tl_assert(resTy == Ity_I128 || resTy == Ity_I64 || resTy == Ity_I32 + || resTy == Ity_I16 || resTy == Ity_I8); + assign( 'V', mce, resTmp, + expr2vbits_Load( +@@ -6899,7 +6922,7 @@ static void do_shadow_LLSC ( MCEnv* mce, + /* Stay sane */ + IRType dataTy = typeOfIRExpr(mce->sb->tyenv, + stStoredata); +- tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32 ++ tl_assert(dataTy == Ity_I128 || dataTy == Ity_I64 || dataTy == Ity_I32 + || dataTy == Ity_I16 || dataTy == Ity_I8); + do_shadow_Store( mce, stEnd, + stAddr, 0/* addr bias */, +@@ -7684,7 +7707,7 @@ static void schemeS ( MCEnv* mce, IRStmt* st ) + = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result); + IRExpr* vanillaLoad + = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr); +- tl_assert(resTy == Ity_I64 || resTy == Ity_I32 ++ tl_assert(resTy == Ity_I128 || resTy == Ity_I64 || resTy == Ity_I32 + || resTy == Ity_I16 || resTy == Ity_I8); + assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), + schemeE(mce, vanillaLoad)); +diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am +index 449710020..2b43ef7d7 100644 +--- a/memcheck/tests/Makefile.am ++++ b/memcheck/tests/Makefile.am +@@ -90,6 +90,7 @@ EXTRA_DIST = \ + addressable.stderr.exp addressable.stdout.exp addressable.vgtest \ + atomic_incs.stderr.exp atomic_incs.vgtest \ + atomic_incs.stdout.exp-32bit atomic_incs.stdout.exp-64bit \ ++ atomic_incs.stdout.exp-64bit-and-128bit \ + badaddrvalue.stderr.exp \ + badaddrvalue.stdout.exp badaddrvalue.vgtest \ + exit_on_first_error.stderr.exp \ +diff --git a/memcheck/tests/atomic_incs.c b/memcheck/tests/atomic_incs.c +index f931750f4..1c738c530 100644 +--- a/memcheck/tests/atomic_incs.c ++++ b/memcheck/tests/atomic_incs.c +@@ -22,6 +22,17 @@ + #define NNN 3456987 + + #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7)) ++#define IS_16_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 15)) ++ ++// U128 from libvex_basictypes.h is a 4-x-UInt array, which is a bit ++// inconvenient, hence: ++typedef ++ struct { ++ // assuming little-endianness ++ unsigned long long int lo64; ++ unsigned long long int hi64; ++ } ++ MyU128; + + + __attribute__((noinline)) void atomic_add_8bit ( char* p, int n ) +@@ -712,6 +723,40 @@ __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n ) + #endif + } + ++__attribute__((noinline)) void atomic_add_128bit ( MyU128* p, ++ unsigned long long int n ) ++{ ++#if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32) \ ++ || defined (VGA_nanomips) || defined(VGA_mips64) \ ++ || defined(VGA_amd64) \ ++ || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ ++ || defined(VGA_arm) \ ++ || defined(VGA_s390x) ++ /* do nothing; is not supported */ ++#elif defined(VGA_arm64) ++ unsigned long long int block[3] ++ = { (unsigned long long int)p, (unsigned long long int)n, ++ 0xFFFFFFFFFFFFFFFFULL}; ++ do { ++ __asm__ __volatile__( ++ "mov x5, %0" "\n\t" // &block[0] ++ "ldr x9, [x5, #0]" "\n\t" // p ++ "ldr x10, [x5, #8]" "\n\t" // n ++ "ldxp x7, x8, [x9]" "\n\t" ++ "adds x7, x7, x10" "\n\t" ++ "adc x8, x8, xzr" "\n\t" ++ "stxp w4, x7, x8, [x9]" "\n\t" ++ "str x4, [x5, #16]" "\n\t" ++ : /*out*/ ++ : /*in*/ "r"(&block[0]) ++ : /*trash*/ "memory", "cc", "x5", "x7", "x8", "x9", "x10", "x4" ++ ); ++ } while (block[2] != 0); ++#else ++# error "Unsupported arch" ++#endif ++} ++ + int main ( int argc, char** argv ) + { + int i, status; +@@ -720,8 +765,12 @@ int main ( int argc, char** argv ) + short* p16; + int* p32; + long long int* p64; ++ MyU128* p128; + pid_t child, p2; + ++ assert(sizeof(MyU128) == 16); ++ assert(sysconf(_SC_PAGESIZE) >= 4096); ++ + printf("parent, pre-fork\n"); + + page = mmap( 0, sysconf(_SC_PAGESIZE), +@@ -736,11 +785,13 @@ int main ( int argc, char** argv ) + p16 = (short*)(page+256); + p32 = (int*)(page+512); + p64 = (long long int*)(page+768); ++ p128 = (MyU128*)(page+1024); + + assert( IS_8_ALIGNED(p8) ); + assert( IS_8_ALIGNED(p16) ); + assert( IS_8_ALIGNED(p32) ); + assert( IS_8_ALIGNED(p64) ); ++ assert( IS_16_ALIGNED(p128) ); + + memset(page, 0, 1024); + +@@ -748,6 +799,7 @@ int main ( int argc, char** argv ) + *p16 = 0; + *p32 = 0; + *p64 = 0; ++ p128->lo64 = p128->hi64 = 0; + + child = fork(); + if (child == -1) { +@@ -763,6 +815,7 @@ int main ( int argc, char** argv ) + atomic_add_16bit(p16, 1); + atomic_add_32bit(p32, 1); + atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ ++ atomic_add_128bit(p128, 0x1000000013374771ULL); // ditto re upper 64 + } + return 1; + /* NOTREACHED */ +@@ -778,6 +831,7 @@ int main ( int argc, char** argv ) + atomic_add_16bit(p16, 1); + atomic_add_32bit(p32, 1); + atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ ++ atomic_add_128bit(p128, 0x1000000013374771ULL); // ditto re upper 64 + } + + p2 = waitpid(child, &status, 0); +@@ -788,11 +842,17 @@ int main ( int argc, char** argv ) + + printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n", + (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 ); ++ printf(" 128 bit 0x%016llx:0x%016llx\n", ++ p128->hi64, p128->lo64); + + if (-74 == (int)(*(signed char*)p8) + && 32694 == (int)(*p16) + && 6913974 == *p32 +- && (0LL == *p64 || 682858642110LL == *p64)) { ++ && (0LL == *p64 || 682858642110LL == *p64) ++ && ((0 == p128->hi64 && 0 == p128->lo64) ++ || (0x00000000000697fb == p128->hi64 ++ && 0x6007eb426316d956ULL == p128->lo64)) ++ ) { + printf("PASS\n"); + } else { + printf("FAIL -- see source code for expected values\n"); +diff --git a/memcheck/tests/atomic_incs.stdout.exp-32bit b/memcheck/tests/atomic_incs.stdout.exp-32bit +index c5b8781e5..55e5044b5 100644 +--- a/memcheck/tests/atomic_incs.stdout.exp-32bit ++++ b/memcheck/tests/atomic_incs.stdout.exp-32bit +@@ -3,5 +3,6 @@ child + parent, pre-fork + parent + FINAL VALUES: 8 bit -74, 16 bit 32694, 32 bit 6913974, 64 bit 0 ++ 128 bit 0x0000000000000000:0x0000000000000000 + PASS + parent exits +diff --git a/memcheck/tests/atomic_incs.stdout.exp-64bit b/memcheck/tests/atomic_incs.stdout.exp-64bit +index 82405c520..ca2f4fc97 100644 +--- a/memcheck/tests/atomic_incs.stdout.exp-64bit ++++ b/memcheck/tests/atomic_incs.stdout.exp-64bit +@@ -3,5 +3,6 @@ child + parent, pre-fork + parent + FINAL VALUES: 8 bit -74, 16 bit 32694, 32 bit 6913974, 64 bit 682858642110 ++ 128 bit 0x0000000000000000:0x0000000000000000 + PASS + parent exits +diff --git a/memcheck/tests/atomic_incs.stdout.exp-64bit-and-128bit b/memcheck/tests/atomic_incs.stdout.exp-64bit-and-128bit +new file mode 100644 +index 000000000..ef6580917 +--- /dev/null ++++ b/memcheck/tests/atomic_incs.stdout.exp-64bit-and-128bit +@@ -0,0 +1,8 @@ ++parent, pre-fork ++child ++parent, pre-fork ++parent ++FINAL VALUES: 8 bit -74, 16 bit 32694, 32 bit 6913974, 64 bit 682858642110 ++ 128 bit 0x00000000000697fb:0x6007eb426316d956 ++PASS ++parent exits +diff --git a/none/tests/arm64/Makefile.am b/none/tests/arm64/Makefile.am +index 00cbfa52c..9efb49b27 100644 +--- a/none/tests/arm64/Makefile.am ++++ b/none/tests/arm64/Makefile.am +@@ -12,7 +12,10 @@ EXTRA_DIST = \ + atomics_v81.stdout.exp atomics_v81.stderr.exp atomics_v81.vgtest \ + simd_v81.stdout.exp simd_v81.stderr.exp simd_v81.vgtest \ + fmadd_sub.stdout.exp fmadd_sub.stderr.exp fmadd_sub.vgtest \ +- fp_and_simd_v82.stdout.exp fp_and_simd_v82.stderr.exp fp_and_simd_v82.vgtest ++ fp_and_simd_v82.stdout.exp fp_and_simd_v82.stderr.exp \ ++ fp_and_simd_v82.vgtest \ ++ ldxp_stxp.stdout.exp ldxp_stxp.stderr.exp \ ++ ldxp_stxp_basisimpl.vgtest ldxp_stxp_fallbackimpl.vgtest + + check_PROGRAMS = \ + allexec \ +@@ -20,7 +23,8 @@ check_PROGRAMS = \ + fp_and_simd \ + integer \ + memory \ +- fmadd_sub ++ fmadd_sub \ ++ ldxp_stxp + + if BUILD_ARMV8_CRC_TESTS + check_PROGRAMS += crc32 +diff --git a/none/tests/arm64/ldxp_stxp.c b/none/tests/arm64/ldxp_stxp.c +new file mode 100644 +index 000000000..b5f6ea121 +--- /dev/null ++++ b/none/tests/arm64/ldxp_stxp.c +@@ -0,0 +1,93 @@ ++ ++/* Note, this is only a basic smoke test of LD{A}XP and ST{L}XP. Their ++ atomicity properties are tested by memcheck/tests/atomic_incs.c. */ ++ ++#include ++#include ++#include ++#include ++ ++typedef unsigned int UInt; ++typedef unsigned long long int ULong; ++ ++ ++void initBlock ( ULong* block ) ++{ ++ block[0] = 0x0001020304050607ULL; ++ block[1] = 0x1011121314151617ULL; ++ block[2] = 0x2021222324252627ULL; ++ block[3] = 0x3031323334353637ULL; ++ block[4] = 0x4041424344454647ULL; ++ block[5] = 0x5051525354555657ULL; ++} ++ ++void printBlock ( const char* who, ++ ULong* block, ULong rt1contents, ULong rt2contents, ++ UInt zeroIfSuccess ) ++{ ++ printf("Block %s (%s)\n", who, zeroIfSuccess == 0 ? "success" : "FAILURE" ); ++ for (int i = 0; i < 6; i++) { ++ printf("0x%016llx\n", block[i]); ++ } ++ printf("0x%016llx rt1contents\n", rt1contents); ++ printf("0x%016llx rt2contents\n", rt2contents); ++ printf("\n"); ++} ++ ++int main ( void ) ++{ ++ ULong* block = memalign(16, 6 * sizeof(ULong)); ++ assert(block); ++ ++ ULong rt1in, rt2in, rt1out, rt2out; ++ UInt scRes; ++ ++ // Do ldxp then stxp with x-registers ++ initBlock(block); ++ rt1in = 0x5555666677778888ULL; ++ rt2in = 0xAAAA9999BBBB0000ULL; ++ rt1out = 0x1111222233334444ULL; ++ rt2out = 0xFFFFEEEEDDDDCCCCULL; ++ scRes = 0x55555555; ++ __asm__ __volatile__( ++ "ldxp %1, %2, [%5]" "\n\t" ++ "stxp %w0, %3, %4, [%5]" "\n\t" ++ : /*OUT*/ ++ "=&r"(scRes), // %0 ++ "=&r"(rt1out), // %1 ++ "=&r"(rt2out) // %2 ++ : /*IN*/ ++ "r"(rt1in), // %3 ++ "r"(rt2in), // %4 ++ "r"(&block[2]) // %5 ++ : /*TRASH*/ ++ "memory","cc" ++ ); ++ printBlock("after ldxp/stxp 2x64-bit", block, rt1out, rt2out, scRes); ++ ++ // Do ldxp then stxp with w-registers ++ initBlock(block); ++ rt1in = 0x5555666677778888ULL; ++ rt2in = 0xAAAA9999BBBB0000ULL; ++ rt1out = 0x1111222233334444ULL; ++ rt2out = 0xFFFFEEEEDDDDCCCCULL; ++ scRes = 0x55555555; ++ __asm__ __volatile__( ++ "ldxp %w1, %w2, [%5]" "\n\t" ++ "stxp %w0, %w3, %w4, [%5]" "\n\t" ++ : /*OUT*/ ++ "=&r"(scRes), // %0 ++ "=&r"(rt1out), // %1 ++ "=&r"(rt2out) // %2 ++ : /*IN*/ ++ "r"(rt1in), // %3 ++ "r"(rt2in), // %4 ++ "r"(&block[2]) // %5 ++ : /*TRASH*/ ++ "memory","cc" ++ ); ++ printBlock("after ldxp/stxp 2x32-bit", block, rt1out, rt2out, scRes); ++ ++ free(block); ++ return 0; ++} +diff --git a/none/tests/arm64/ldxp_stxp_basisimpl.stderr.exp b/none/tests/arm64/ldxp_stxp_basisimpl.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/arm64/ldxp_stxp_basisimpl.stdout.exp b/none/tests/arm64/ldxp_stxp_basisimpl.stdout.exp +new file mode 100644 +index 000000000..f269ecdcc +--- /dev/null ++++ b/none/tests/arm64/ldxp_stxp_basisimpl.stdout.exp +@@ -0,0 +1,20 @@ ++Block after ldxp/stxp 2x64-bit (success) ++0x0001020304050607 ++0x1011121314151617 ++0x5555666677778888 ++0xaaaa9999bbbb0000 ++0x4041424344454647 ++0x5051525354555657 ++0x2021222324252627 rt1contents ++0x3031323334353637 rt2contents ++ ++Block after ldxp/stxp 2x32-bit (success) ++0x0001020304050607 ++0x1011121314151617 ++0xbbbb000077778888 ++0x3031323334353637 ++0x4041424344454647 ++0x5051525354555657 ++0x0000000024252627 rt1contents ++0x0000000020212223 rt2contents ++ +diff --git a/none/tests/arm64/ldxp_stxp_basisimpl.vgtest b/none/tests/arm64/ldxp_stxp_basisimpl.vgtest +new file mode 100644 +index 000000000..29133729a +--- /dev/null ++++ b/none/tests/arm64/ldxp_stxp_basisimpl.vgtest +@@ -0,0 +1,2 @@ ++prog: ldxp_stxp ++vgopts: -q +diff --git a/none/tests/arm64/ldxp_stxp_fallbackimpl.stderr.exp b/none/tests/arm64/ldxp_stxp_fallbackimpl.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/arm64/ldxp_stxp_fallbackimpl.stdout.exp b/none/tests/arm64/ldxp_stxp_fallbackimpl.stdout.exp +new file mode 100644 +index 000000000..f269ecdcc +--- /dev/null ++++ b/none/tests/arm64/ldxp_stxp_fallbackimpl.stdout.exp +@@ -0,0 +1,20 @@ ++Block after ldxp/stxp 2x64-bit (success) ++0x0001020304050607 ++0x1011121314151617 ++0x5555666677778888 ++0xaaaa9999bbbb0000 ++0x4041424344454647 ++0x5051525354555657 ++0x2021222324252627 rt1contents ++0x3031323334353637 rt2contents ++ ++Block after ldxp/stxp 2x32-bit (success) ++0x0001020304050607 ++0x1011121314151617 ++0xbbbb000077778888 ++0x3031323334353637 ++0x4041424344454647 ++0x5051525354555657 ++0x0000000024252627 rt1contents ++0x0000000020212223 rt2contents ++ +diff --git a/none/tests/arm64/ldxp_stxp_fallbackimpl.vgtest b/none/tests/arm64/ldxp_stxp_fallbackimpl.vgtest +new file mode 100644 +index 000000000..474282a03 +--- /dev/null ++++ b/none/tests/arm64/ldxp_stxp_fallbackimpl.vgtest +@@ -0,0 +1,2 @@ ++prog: ldxp_stxp ++vgopts: -q --sim-hints=fallback-llsc + +commit 0d38ca5dd6b446c70738031132d41f09de0f7a8a +Author: Julian Seward +Date: Fri Nov 12 13:08:45 2021 +0100 + + Bug 444399 - disInstr(arm64): unhandled instruction 0xC87F2D89 (LD{,A}XP and ST{,L}XP). FOLLOWUP FIX. + + This is an attempt to un-break 'make dist', as broken by the main commit for + this bug, which was 530df882b8f60ecacaf2b9b8a719f7ea1c1d1650. + +diff --git a/none/tests/arm64/Makefile.am b/none/tests/arm64/Makefile.am +index 9efb49b27..4a06f0996 100644 +--- a/none/tests/arm64/Makefile.am ++++ b/none/tests/arm64/Makefile.am +@@ -14,8 +14,10 @@ EXTRA_DIST = \ + fmadd_sub.stdout.exp fmadd_sub.stderr.exp fmadd_sub.vgtest \ + fp_and_simd_v82.stdout.exp fp_and_simd_v82.stderr.exp \ + fp_and_simd_v82.vgtest \ +- ldxp_stxp.stdout.exp ldxp_stxp.stderr.exp \ +- ldxp_stxp_basisimpl.vgtest ldxp_stxp_fallbackimpl.vgtest ++ ldxp_stxp_basisimpl.stdout.exp ldxp_stxp_basisimpl.stderr.exp \ ++ ldxp_stxp_basisimpl.vgtest \ ++ ldxp_stxp_fallbackimpl.stdout.exp ldxp_stxp_fallbackimpl.stderr.exp \ ++ ldxp_stxp_fallbackimpl.vgtest + + check_PROGRAMS = \ + allexec \ diff --git a/SOURCES/valgrind-3.18.1-condvar.patch b/SOURCES/valgrind-3.18.1-condvar.patch new file mode 100644 index 0000000..e129326 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-condvar.patch @@ -0,0 +1,284 @@ +commit 9abfed23c0d430aafb85de6397d171316c982792 +Author: Paul Floyd +Date: Fri Nov 19 08:34:53 2021 +0100 + + Bug 445504 Using C++ condition_variable results in bogus "mutex is locked simultaneously by two threads" warning(edit) + + Add intercepts for pthread_cond_clockwait to DRD and Helgrind + Also testcase from bugzilla done by Bart, with configure check + +diff --git a/configure.ac b/configure.ac +index e7381f205..cb836dbff 100755 +--- a/configure.ac ++++ b/configure.ac +@@ -1989,6 +1989,27 @@ AC_LANG(C) + + AM_CONDITIONAL(CXX_CAN_INCLUDE_THREAD_HEADER, test x$ac_cxx_can_include_thread_header = xyes) + ++# Check whether compiler can process #include without errors ++ ++AC_MSG_CHECKING([that C++ compiler can include header file]) ++AC_LANG(C++) ++safe_CXXFLAGS=$CXXFLAGS ++CXXFLAGS=-std=c++0x ++ ++AC_COMPILE_IFELSE([AC_LANG_SOURCE([ ++#include ++])], ++[ ++ac_cxx_can_include_condition_variable_header=yes ++AC_MSG_RESULT([yes]) ++], [ ++ac_cxx_can_include_condition_variable_header=no ++AC_MSG_RESULT([no]) ++]) ++CXXFLAGS=$safe_CXXFLAGS ++AC_LANG(C) ++ ++AM_CONDITIONAL(CXX_CAN_INCLUDE_CONDITION_VARIABLE_HEADER, test x$ac_cxx_can_include_condition_variable_header = xyes) + + # On aarch64 before glibc 2.20 we would get the kernel user_pt_regs instead + # of the user_regs_struct from sys/user.h. They are structurally the same +diff --git a/drd/drd_pthread_intercepts.c b/drd/drd_pthread_intercepts.c +index 8b4454364..95127b42c 100644 +--- a/drd/drd_pthread_intercepts.c ++++ b/drd/drd_pthread_intercepts.c +@@ -1175,6 +1175,30 @@ PTH_FUNCS(int, condZureltimedwait, pthread_cond_timedwait_intercept, + (cond, mutex, timeout)); + #endif /* VGO_solaris */ + ++ ++static __always_inline ++int pthread_cond_clockwait_intercept(pthread_cond_t *cond, ++ pthread_mutex_t *mutex, ++ clockid_t clockid, ++ const struct timespec* abstime) ++{ ++ int ret; ++ OrigFn fn; ++ VALGRIND_GET_ORIG_FN(fn); ++ VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__PRE_COND_WAIT, ++ cond, mutex, DRD_(mutex_type)(mutex), 0, 0); ++ CALL_FN_W_WWWW(ret, fn, cond, mutex, clockid, abstime); ++ VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__POST_COND_WAIT, ++ cond, mutex, 1, 0, 0); ++ return ret; ++} ++ ++PTH_FUNCS(int, pthreadZucondZuclockwait, pthread_cond_clockwait_intercept, ++ (pthread_cond_t *cond, pthread_mutex_t *mutex, ++ clockid_t clockid, const struct timespec* abstime), ++ (cond, mutex, clockid, abstime)); ++ ++ + // NOTE: be careful to intercept only pthread_cond_signal() and not Darwin's + // pthread_cond_signal_thread_np(). The former accepts one argument; the latter + // two. Intercepting all pthread_cond_signal* functions will cause only one +diff --git a/drd/tests/Makefile.am b/drd/tests/Makefile.am +index 4cb2f7f84..c804391e8 100755 +--- a/drd/tests/Makefile.am ++++ b/drd/tests/Makefile.am +@@ -105,6 +105,8 @@ EXTRA_DIST = \ + circular_buffer.vgtest \ + concurrent_close.stderr.exp \ + concurrent_close.vgtest \ ++ condvar.stderr.exp \ ++ condvar.vgtest \ + custom_alloc.stderr.exp \ + custom_alloc.vgtest \ + custom_alloc_fiw.stderr.exp \ +@@ -458,6 +460,11 @@ check_PROGRAMS += \ + endif + endif + ++if CXX_CAN_INCLUDE_CONDITION_VARIABLE_HEADER ++check_PROGRAMS += \ ++ condvar ++endif ++ + if HAVE_OPENMP + check_PROGRAMS += omp_matinv omp_prime omp_printf + endif +@@ -502,6 +509,8 @@ LDADD = -lpthread + + + bug322621_SOURCES = bug322621.cpp ++condvar_SOURCES = condvar.cpp ++condvar_CXXFLAGS = $(AM_CXXFLAGS) -std=c++0x + concurrent_close_SOURCES = concurrent_close.cpp + if !VGCONF_OS_IS_FREEBSD + dlopen_main_LDADD = -ldl +diff --git a/drd/tests/condvar.cpp b/drd/tests/condvar.cpp +new file mode 100644 +index 000000000..18ecb3f8a +--- /dev/null ++++ b/drd/tests/condvar.cpp +@@ -0,0 +1,55 @@ ++/* See also https://bugs.kde.org/show_bug.cgi?id=445504 */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++using lock_guard = std::lock_guard; ++using unique_lock = std::unique_lock; ++ ++struct state { ++ std::mutex m; ++ std::vector v; ++ std::condition_variable cv; ++ ++ state() { ++ // Call pthread_cond_init() explicitly to let DRD know about 'cv'. ++ pthread_cond_init(cv.native_handle(), NULL); ++ } ++}; ++ ++void other_thread(state *sp) { ++ state &s = *sp; ++ std::cerr << "Other thread: waiting for notify\n"; ++ unique_lock l{s.m}; ++ while (true) { ++ if (s.cv.wait_for(l, std::chrono::seconds(3)) != ++ std::cv_status::timeout) { ++ std::cerr << "Other thread: notified\n"; ++ break; ++ } ++ } ++ return; ++} ++ ++ ++int main() { ++ state s; ++ auto future = std::async(std::launch::async, other_thread, &s); ++ ++ if (future.wait_for(std::chrono::seconds(1)) != std::future_status::timeout) { ++ std::cerr << "Main: other thread returned too early!\n"; ++ return 2; ++ } ++ ++ { ++ std::lock_guard g{s.m}; ++ s.v.push_back(1); ++ s.v.push_back(2); ++ s.cv.notify_all(); ++ } ++ return 0; ++} +diff --git a/drd/tests/condvar.stderr.exp b/drd/tests/condvar.stderr.exp +new file mode 100644 +index 000000000..be1de9f97 +--- /dev/null ++++ b/drd/tests/condvar.stderr.exp +@@ -0,0 +1,5 @@ ++ ++Other thread: waiting for notify ++Other thread: notified ++ ++ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) +diff --git a/drd/tests/condvar.vgtest b/drd/tests/condvar.vgtest +new file mode 100644 +index 000000000..2e7d49f5a +--- /dev/null ++++ b/drd/tests/condvar.vgtest +@@ -0,0 +1,3 @@ ++prereq: ./supported_libpthread && [ -e condvar ] ++vgopts: --check-stack-var=yes --read-var-info=yes ++prog: condvar +diff --git a/helgrind/hg_intercepts.c b/helgrind/hg_intercepts.c +index 866efdbaa..49c3ddcd9 100644 +--- a/helgrind/hg_intercepts.c ++++ b/helgrind/hg_intercepts.c +@@ -1409,6 +1409,88 @@ static int pthread_cond_timedwait_WRK(pthread_cond_t* cond, + # error "Unsupported OS" + #endif + ++//----------------------------------------------------------- ++// glibc: pthread_cond_clockwait ++// ++__attribute__((noinline)) ++static int pthread_cond_clockwait_WRK(pthread_cond_t* cond, ++ pthread_mutex_t* mutex, ++ clockid_t clockid, ++ struct timespec* abstime, ++ int timeout_error) ++{ ++ int ret; ++ OrigFn fn; ++ unsigned long mutex_is_valid; ++ Bool abstime_is_valid; ++ VALGRIND_GET_ORIG_FN(fn); ++ ++ if (TRACE_PTH_FNS) { ++ fprintf(stderr, "<< pthread_cond_clockwait %p %p %p", ++ cond, mutex, abstime); ++ fflush(stderr); ++ } ++ ++ /* Tell the tool a cond-wait is about to happen, so it can check ++ for bogus argument values. In return it tells us whether it ++ thinks the mutex is valid or not. */ ++ DO_CREQ_W_WW(mutex_is_valid, ++ _VG_USERREQ__HG_PTHREAD_COND_WAIT_PRE, ++ pthread_cond_t*,cond, pthread_mutex_t*,mutex); ++ assert(mutex_is_valid == 1 || mutex_is_valid == 0); ++ ++ abstime_is_valid = abstime->tv_nsec >= 0 && abstime->tv_nsec < 1000000000; ++ ++ /* Tell the tool we're about to drop the mutex. This reflects the ++ fact that in a cond_wait, we show up holding the mutex, and the ++ call atomically drops the mutex and waits for the cv to be ++ signalled. */ ++ if (mutex_is_valid && abstime_is_valid) { ++ DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_PRE, ++ pthread_mutex_t*,mutex); ++ } ++ ++ CALL_FN_W_WWWW(ret, fn, cond,mutex,clockid,abstime); ++ ++ if (mutex_is_valid && !abstime_is_valid && ret != EINVAL) { ++ DO_PthAPIerror("Bug in libpthread: pthread_cond_clockwait " ++ "invalid abstime did not cause" ++ " EINVAL", ret); ++ } ++ ++ if (mutex_is_valid && abstime_is_valid) { ++ /* and now we have the mutex again if (ret == 0 || ret == timeout) */ ++ DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST, ++ pthread_mutex_t *, mutex, ++ long, (ret == 0 || ret == timeout_error) ? True : False); ++ } ++ ++ DO_CREQ_v_WWWW(_VG_USERREQ__HG_PTHREAD_COND_WAIT_POST, ++ pthread_cond_t*,cond, pthread_mutex_t*,mutex, ++ long,ret == timeout_error, ++ long, (ret == 0 || ret == timeout_error) && mutex_is_valid ++ ? True : False); ++ ++ if (ret != 0 && ret != timeout_error) { ++ DO_PthAPIerror( "pthread_cond_clockwait", ret ); ++ } ++ ++ if (TRACE_PTH_FNS) { ++ fprintf(stderr, " cotimedwait -> %d >>\n", ret); ++ } ++ ++ return ret; ++} ++ ++#if defined(VGO_linux) ++ PTH_FUNC(int, pthreadZucondZuclockwait, // pthread_cond_clockwait ++ pthread_cond_t* cond, pthread_mutex_t* mutex, ++ clockid_t clockid, ++ struct timespec* abstime) { ++ return pthread_cond_clockwait_WRK(cond, mutex, clockid, abstime, ETIMEDOUT); ++ } ++#endif ++ + + //----------------------------------------------------------- + // glibc: pthread_cond_signal@GLIBC_2.0 diff --git a/SOURCES/valgrind-3.18.1-demangle-namespace.patch b/SOURCES/valgrind-3.18.1-demangle-namespace.patch new file mode 100644 index 0000000..25ddf92 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-demangle-namespace.patch @@ -0,0 +1,35 @@ +commit 542447d4708d4418a08e678dcf467af92b90b7ad +Author: Mark Wielaard +Date: Mon Nov 22 13:07:59 2021 +0100 + + readdwarf3.c (parse_inl_DIE) inlined_subroutine can appear in namespaces + + This was broken by commit 75e3ef0f3 "readdwarf3: Skip units without + addresses when looking for inlined functions". Specifically by this + part: "Also use skip_DIE instead of read_DIE when not parsing + (skipping) children" + + rustc puts concrete function instances in namespaces (which is + allowed in DWARF since there is no strict separation between type + declarations and program scope entries in a DIE tree), the inline + parser didn't expect this and so skipped any DIE under a namespace + entry. This wasn't an issue before because "skipping" a DIE tree was + done by reading it, so it wasn't actually skipped. But now that we + really skip the DIE (sub)tree (which is faster than actually parsing + it) some entries were missed in the rustc case. + + https://bugs.kde.org/show_bug.cgi?id=445668 + +diff --git a/coregrind/m_debuginfo/readdwarf3.c b/coregrind/m_debuginfo/readdwarf3.c +index 18eecea9f..5489f8d13 100644 +--- a/coregrind/m_debuginfo/readdwarf3.c ++++ b/coregrind/m_debuginfo/readdwarf3.c +@@ -3358,7 +3358,7 @@ static Bool parse_inl_DIE ( + // might maybe contain a DW_TAG_inlined_subroutine: + Bool ret = (unit_has_addrs + || dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram +- || dtag == DW_TAG_inlined_subroutine); ++ || dtag == DW_TAG_inlined_subroutine || dtag == DW_TAG_namespace); + return ret; + + bad_DIE: diff --git a/SOURCES/valgrind-3.18.1-dhat-tests-copy.patch b/SOURCES/valgrind-3.18.1-dhat-tests-copy.patch new file mode 100644 index 0000000..8e183b9 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-dhat-tests-copy.patch @@ -0,0 +1,20 @@ +commit 33aba8eef68b1745d3de96b609ff8296b70d9a1c +Author: Paul Floyd +Date: Wed Oct 27 21:37:00 2021 +0200 + + Bug 444495 - dhat/tests/copy fails on s390x + + Add -fno-builtin to ensure that the copy functions get called and so dhat + can intercept and count them. + +diff --git a/dhat/tests/Makefile.am b/dhat/tests/Makefile.am +index 86a9b6d64..b86fc416d 100644 +--- a/dhat/tests/Makefile.am ++++ b/dhat/tests/Makefile.am +@@ -29,3 +29,6 @@ AM_CXXFLAGS += $(AM_FLAG_M3264_PRI) + # We don't care about uninitialized or unused malloc results + basic_CFLAGS = $(AM_CFLAGS) -Wno-uninitialized + big_CFLAGS = $(AM_CFLAGS) -Wno-unused-result ++ ++# Prevent the copying functions from being inlined ++copy_CFLAGS = $(AM_CFLAGS) -fno-builtin diff --git a/SOURCES/valgrind-3.18.1-gdbserver_tests-hwcap.patch b/SOURCES/valgrind-3.18.1-gdbserver_tests-hwcap.patch new file mode 100644 index 0000000..2d952cd --- /dev/null +++ b/SOURCES/valgrind-3.18.1-gdbserver_tests-hwcap.patch @@ -0,0 +1,25 @@ +commit 64ab89162906d5b9e2de6c3afe476fec861ef7ec +Author: Mark Wielaard +Date: Tue Nov 2 14:27:45 2021 +0100 + + gdbserver_tests: Filter out glibc hwcaps libc.so + + On some systems the gdbserver_tests would fail because the filter + for the optimized hwcaps subdir didn't match because the file is + called slightly differently, with the version number before .so + instead of after. For example: /lib64/glibc-hwcaps/power9/libc-2.28.so + + Add one extra filter for this pattern. + +diff --git a/gdbserver_tests/filter_gdb.in b/gdbserver_tests/filter_gdb.in +index d0c94f3f1..b753e0168 100755 +--- a/gdbserver_tests/filter_gdb.in ++++ b/gdbserver_tests/filter_gdb.in +@@ -134,6 +134,7 @@ s/in \(.__\)\{0,1\}select () from \/.*$/in syscall .../ + /^ from \/lib\/libc.so.*$/d + /^ from \/lib64\/libc.so.*$/d + /^ from \/lib64\/.*\/libc.so.*$/d ++/^ from \/lib64\/.*\/libc-.*.so/d + + # and yet another (gdb 7.0 way) to get a system call + s/in select ()$/in syscall .../ diff --git a/SOURCES/valgrind-3.18.1-ppc-pstq-tests.patch b/SOURCES/valgrind-3.18.1-ppc-pstq-tests.patch new file mode 100644 index 0000000..58498f2 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-ppc-pstq-tests.patch @@ -0,0 +1,1876 @@ +commit 3950c5d661ee09526cddcf24daf5fc22bc83f70c +Author: Carl Love +Date: Mon Nov 1 11:18:32 2021 -0500 + + Valgrind Add powerpc R=1 tests + + Contributed by Will Schmidt + + This includes updates and adjustments as suggested by Carl. + + Add tests that exercise PCRelative instructions. + These instructions are encoded with R==1, which indicate that + the memory accessed by the instruction is at a location + relative to the currently executing instruction. + + These tests are built using -Wl,-text and -Wl,-bss + options to ensure the location of the target array is at a + location with a specific offset from the currently + executing instruction. + + The write instructions are aimed at a large buffer in + the bss section; which is checked for updates at the + completion of each test. + + In order to ensure consistent output across assorted + systems, the tests have been padded with ori, nop instructions + and align directives. + + Detailed changes: + * Makefile.am: Add test_isa_3_1_R1_RT and test_isa_3_1_R1_XT tests. + * isa_3_1_helpers.h: Add identify_instruction_by_func_name() helper function + to indicate if the test is for R==1. + Add helpers to initialize and print changes to the pcrelative_write_target + array. + Add #define to help pad code with a series of eyecatcher ORI instructions. + * test_isa_3_1_R1_RT.c: New test. + * test_isa_3_1_R1_XT.c: New test. + * test_isa_3_1_R1_XT.stdout.exp: New expected output. + * test_isa_3_1_R1_XT.stdout.exp: New expected output. + * test_isa_3_1_R1_RT.stderr.exp: New expected output. + * test_isa_3_1_R1_RT.stderr.exp: New expected output. + + * test_isa_3_1_R1_RT.vgtest: New test handler. + * test_isa_3_1_R1_XT.vgtest: New test handler. + + * test_isa_3_1_common.c: Add indicators (updates_byte,updates_halfword, + updates_word) indicators to control the output from the R==1 tests. + Add helper check for "_R1" to indicate if instruction is coded with R==1. + Add init and print helpers for the pcrelative_write_target array. + +diff --git a/none/tests/ppc64/Makefile.am b/none/tests/ppc64/Makefile.am +index b709f3ef4..f8eab9fc0 100644 +--- a/none/tests/ppc64/Makefile.am ++++ b/none/tests/ppc64/Makefile.am +@@ -61,6 +61,8 @@ EXTRA_DIST = \ + test_isa_3_1_VRT.vgtest test_isa_3_1_VRT.stderr.exp test_isa_3_1_VRT.stdout.exp \ + test_isa_3_1_Misc.vgtest test_isa_3_1_Misc.stderr.exp test_isa_3_1_Misc.stdout.exp \ + test_isa_3_1_AT.vgtest test_isa_3_1_AT.stderr.exp test_isa_3_1_AT.stdout.exp \ ++ test_isa_3_1_R1_RT.vgtest test_isa_3_1_R1_RT.stderr.exp test_isa_3_1_R1_RT.stdout.exp \ ++ test_isa_3_1_R1_XT.vgtest test_isa_3_1_R1_XT.stderr.exp test_isa_3_1_R1_XT.stdout.exp \ + subnormal_test.stderr.exp subnormal_test.stdout.exp \ + subnormal_test.vgtest test_darn_inst.stderr.exp \ + test_darn_inst.stdout.exp test_darn_inst.vgtest \ +@@ -68,8 +70,8 @@ EXTRA_DIST = \ + test_copy_paste.stderr.exp test_copy_paste.stdout.exp \ + test_copy_paste.vgtest \ + test_mcrxrx.vgtest test_mcrxrx.stderr.exp test_mcrxrx.stdout.exp \ +- test_lxvx_stxvx.vgtest test_lxvx_stxvx.stderr.exp test_lxvx_stxvx.stdout.exp-p8 test_lxvx_stxvx.stdout.exp-p9 +- ++ test_lxvx_stxvx.vgtest test_lxvx_stxvx.stderr.exp \ ++ test_lxvx_stxvx.stdout.exp-p8 test_lxvx_stxvx.stdout.exp-p9 + + check_PROGRAMS = \ + allexec \ +@@ -80,11 +82,12 @@ check_PROGRAMS = \ + test_isa_3_0 test_mod_instructions \ + test_isa_3_1_RT test_isa_3_1_XT test_isa_3_1_VRT \ + test_isa_3_1_Misc test_isa_3_1_AT \ ++ test_isa_3_1_R1_RT test_isa_3_1_R1_XT \ + subnormal_test test_darn_inst test_copy_paste \ + test_tm test_touch_tm data-cache-instructions \ + std_reg_imm \ + twi_tdi tw_td power6_bcmp scv_test \ +- test_mcrxrx test_lxvx_stxvx ++ test_mcrxrx test_lxvx_stxvx + + # lmw, stmw, lswi, lswx, stswi, stswx compile (and run) only on big endian. + if VGCONF_PLATFORMS_INCLUDE_PPC64BE_LINUX +@@ -106,6 +109,8 @@ test_isa_3_1_RT_SOURCES = test_isa_3_1_RT.c test_isa_3_1_common.c + test_isa_3_1_VRT_SOURCES = test_isa_3_1_VRT.c test_isa_3_1_common.c + test_isa_3_1_Misc_SOURCES = test_isa_3_1_Misc.c test_isa_3_1_common.c + test_isa_3_1_AT_SOURCES = test_isa_3_1_AT.c test_isa_3_1_common.c ++test_isa_3_1_R1_XT_SOURCES = test_isa_3_1_R1_XT.c test_isa_3_1_common.c ++test_isa_3_1_R1_RT_SOURCES = test_isa_3_1_R1_RT.c test_isa_3_1_common.c + test_darn_inst_SOURCES = test_darn_inst.c + + if HAS_ALTIVEC +@@ -224,6 +229,11 @@ test_isa_3_1_VRT_CFLAGS = $(test_isa_3_1_CFLAGS) + test_isa_3_1_Misc_CFLAGS = $(test_isa_3_1_CFLAGS) + test_isa_3_1_AT_CFLAGS = $(test_isa_3_1_CFLAGS) + ++# The _R1_foo tests exercise pc-relative instructions, so require the bss and text sections ++# exist at known offsets with respect to each other. ++test_isa_3_1_R1_RT_CFLAGS = $(test_isa_3_1_CFLAGS) -Wl,-Tbss,0x20000 -Wl,-Ttext,0x40000 ++test_isa_3_1_R1_XT_CFLAGS = $(test_isa_3_1_CFLAGS) -Wl,-Tbss,0x20000 -Wl,-Ttext,0x40000 ++ + subnormal_test_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(VSX_FLAG) $(ISA_2_06_FLAG) \ + @FLAG_M64@ $(ALTIVEC_FLAG) $(BUILD_FLAG_VSX) $(BUILD_FLAGS_ISA_2_06) + +diff --git a/none/tests/ppc64/isa_3_1_helpers.h b/none/tests/ppc64/isa_3_1_helpers.h +index 338f55526..716a6277b 100644 +--- a/none/tests/ppc64/isa_3_1_helpers.h ++++ b/none/tests/ppc64/isa_3_1_helpers.h +@@ -43,6 +43,9 @@ extern void debug_show_current_iteration(); + extern void debug_dump_buffer(); + + extern void identify_form_components(const char *, const char *); ++extern void identify_instruction_by_func_name(const char *); ++extern void init_pcrelative_write_target(); ++extern void print_pcrelative_write_target(); + extern void dump_vsxargs(); + extern void generic_prologue(); + extern void build_args_table(); +@@ -58,6 +61,21 @@ extern void initialize_source_registers(); + extern void set_up_iterators(); + extern void initialize_buffer(int); + ++/* This (TEXT_BSS_DELTA) is the relative distance between those ++ sections as set by the linker options for the R==1 tests. */ ++#define TEXT_BSS_DELTA 0x20000 ++#define RELOC_BUFFER_SIZE 0x1000 ++extern unsigned long long pcrelative_buff_addr(int); ++#define PAD_ORI \ ++ __asm__ __volatile__ ("ori 21,21,21"); \ ++ __asm__ __volatile__ ("ori 22,22,22");\ ++ __asm__ __volatile__ ("ori 23,23,23");\ ++ __asm__ __volatile__ ("ori 24,24,24");\ ++ __asm__ __volatile__ ("ori 25,25,25");\ ++ __asm__ __volatile__ ("ori 26,26,26");\ ++ __asm__ __volatile__ ("ori 27,27,27");\ ++ __asm__ __volatile__ ("ori 28,28,28"); ++ + extern int verbose; + #define debug_printf(X) if (verbose>0) printf(X); + #define debug_show_labels (verbose>0) +diff --git a/none/tests/ppc64/test_isa_3_1_R1_RT.c b/none/tests/ppc64/test_isa_3_1_R1_RT.c +new file mode 100644 +index 000000000..d73b84b10 +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_RT.c +@@ -0,0 +1,624 @@ ++/* ++ * Valgrind testcase for PowerPC ISA 3.1 ++ * ++ * Copyright (C) 2019-2020 Will Schmidt ++ * ++ * 64bit build: ++ * gcc -Winline -Wall -g -O -mregnames -maltivec -m64 ++ */ ++ ++/* ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#include ++#ifdef HAS_ISA_3_1 ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++/* Condition Register fields. ++ These are used to capture the condition register values immediately after ++ the instruction under test is executed. This is done to help prevent other ++ test overhead (switch statements, result compares, etc) from disturbing ++ the test case results. */ ++unsigned long current_cr; ++unsigned long current_fpscr; ++ ++struct test_list_t current_test; ++ ++#include "isa_3_1_helpers.h" ++ ++static void test_plxvp_off0_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plxvp 20, +0(0),1" ); ++ PAD_ORI ++} ++static void test_plxvp_off8_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plxvp 20, +8(0),1" ); ++ PAD_ORI ++} ++static void test_plxvp_off16_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plxvp 20, +16(0),1" ); ++ PAD_ORI ++} ++static void test_plxvp_off24_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plxvp 20, +24(0),1" ); ++ PAD_ORI ++} ++static void test_plxvp_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plxvp 20, +32(0),1" ); ++ PAD_ORI ++} ++static void test_plbz_off0_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plbz %0, +0(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plbz_off8_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plbz %0, +8(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plbz_off16_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plbz %0, +16(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plbz_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plbz %0, +32(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plbz_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plbz %0, +64(0), 1" : "=r" (rt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plhz_off0_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plhz %0, +0(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plhz_off8_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plhz %0, +8(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plhz_off16_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plhz %0, +16(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plhz_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plhz %0, +32(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plhz_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plhz %0, +64(0), 1" : "=r" (rt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plha_off0_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plha %0, +0(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plha_off8_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plha %0, +8(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plha_off16_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plha %0, +16(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plha_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plha %0, +32(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plha_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plha %0, +64(0), 1" : "=r" (rt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plwz_off0_R1 (void) { ++ __asm__ __volatile__ ("plwz %0, +0(0), 1" : "=r" (rt) ); ++} ++static void test_plwz_off8_R1 (void) { ++ __asm__ __volatile__ ("plwz %0, +8(0), 1" : "=r" (rt) ); ++} ++static void test_plwz_off16_R1 (void) { ++ __asm__ __volatile__ ("plwz %0, +16(0), 1" : "=r" (rt) ); ++} ++static void test_plwz_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plwz %0, +32(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plwz_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plwz %0, +64(0), 1" : "=r" (rt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plwa_off0_R1 (void) { ++ __asm__ __volatile__ ("plwa %0, +0(0), 1" : "=r" (rt) ); ++} ++static void test_plwa_off8_R1 (void) { ++ __asm__ __volatile__ ("plwa %0, +8(0), 1" : "=r" (rt) ); ++} ++static void test_plwa_off16_R1 (void) { ++ __asm__ __volatile__ ("plwa %0, +16(0), 1" : "=r" (rt) ); ++} ++static void test_plwa_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plwa %0, +32(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plwa_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plwa %0, +64(0), 1" : "=r" (rt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_pld_off0_R1 (void) { ++ __asm__ __volatile__ ("pld %0, +0(0), 1" : "=r" (rt) ); ++} ++static void test_pld_off8_R1 (void) { ++ __asm__ __volatile__ ("pld %0, +8(0), 1" : "=r" (rt) ); ++} ++static void test_pld_off16_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("pld %0, +16(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_pld_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("pld %0, +32(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_pld_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("pld %0, +64(0), 1" : "=r" (rt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_pstb_off0_R1 (void) { ++ __asm__ __volatile__ ("pstb %0, -0x1f400+0(0), 1" :: "r" (rs) ); ++} ++static void test_pstb_off8_R1 (void) { ++ __asm__ __volatile__ ("pstb %0, -0x1f400+8(0), 1" :: "r" (rs) ); ++} ++static void test_pstb_off16_R1 (void) { ++ __asm__ __volatile__ ("pstb %0, -0x1f400+16(0), 1" :: "r" (rs) ); ++} ++static void test_pstb_off32_R1 (void) { ++ __asm__ __volatile__ ("pstb %0, -0x1f400+32(0), 1" :: "r" (rs) ); ++} ++static void test_psth_off0_R1 (void) { ++ __asm__ __volatile__ ("psth %0, -0x1f400+0(0), 1" :: "r" (rs) ); ++} ++static void test_psth_off8_R1 (void) { ++ __asm__ __volatile__ ("psth %0, -0x1f400+8(0), 1" :: "r" (rs) ); ++} ++static void test_psth_off16_R1 (void) { ++ __asm__ __volatile__ ("psth %0, -0x1f400+16(0), 1" :: "r" (rs) ); ++} ++static void test_psth_off32_R1 (void) { ++ __asm__ __volatile__ ("psth %0, -0x1f400+32(0), 1" :: "r" (rs) ); ++} ++static void test_pstw_off0_R1 (void) { ++ __asm__ __volatile__ ("pstw %0, -0x1f400+0(0), 1" :: "r" (rs) ); ++} ++static void test_pstw_off8_R1 (void) { ++ __asm__ __volatile__ ("pstw %0, -0x1f400+8(0), 1" :: "r" (rs) ); ++} ++static void test_pstw_off16_R1 (void) { ++ __asm__ __volatile__ ("pstw %0, -0x1f400+16(0), 1" :: "r" (rs) ); ++} ++static void test_pstw_off32_R1 (void) { ++ __asm__ __volatile__ ("pstw %0, -0x1f400+32(0), 1" :: "r" (rs) ); ++} ++static void test_pstd_off0_R1 (void) { ++ __asm__ __volatile__ ("pstd %0, -0x1f400+0(0), 1" :: "r" (rs) ); ++} ++static void test_pstd_off8_R1 (void) { ++ __asm__ __volatile__ ("pstd %0, -0x1f400+8(0), 1" :: "r" (rs) ); ++} ++static void test_pstd_off16_R1 (void) { ++ __asm__ __volatile__ ("pstd %0, -0x1f400+16(0), 1" :: "r" (rs) ); ++} ++static void test_pstd_off32_R1 (void) { ++ __asm__ __volatile__ ("pstd %0, -0x1f400+32(0), 1" :: "r" (rs) ); ++} ++ /* For the paddi tests; although we can get close to a read/write target ++ due to forcing where the .text and .bss sections are placed, there is ++ still enough codegen variability that having a raw value in the exp ++ file will not be determinative for these instructions. ++ Thus, compromise and just ensure that the generated value is an ++ address that lands within the reloc buffer, and use quasi magic ++ eyecatcher values in the return to indicate success. */ ++static void test_paddi_0_R1 (void) { ++ __asm__ __volatile__ ("paddi %0, 0, 0+0, 1" : "=r" (rt) ); ++ rt = rt - TEXT_BSS_DELTA; ++ if (rt > pcrelative_buff_addr(0) && ++ rt < pcrelative_buff_addr(RELOC_BUFFER_SIZE)) ++ rt = 0xffff0000; ++} ++static void test_paddi_12_R1 (void) { ++ __asm__ __volatile__ ("paddi %0, 0, 0+12, 1" : "=r" (rt) ); ++ rt = rt - TEXT_BSS_DELTA; ++ if (rt > pcrelative_buff_addr(0) && ++ rt < pcrelative_buff_addr(RELOC_BUFFER_SIZE)) ++ rt = 0xffff0012; ++} ++static void test_paddi_48_R1 (void) { ++ __asm__ __volatile__ ("paddi %0, 0, 0+48, 1" : "=r" (rt) ); ++ rt = rt - TEXT_BSS_DELTA; ++ if (rt > pcrelative_buff_addr(0) && ++ rt < pcrelative_buff_addr(RELOC_BUFFER_SIZE)) ++ rt = 0xffff0048; ++} ++static void test_paddi_98_R1 (void) { ++ __asm__ __volatile__ ("paddi %0, 0, 0+98, 1" : "=r" (rt) ); ++ rt = rt - TEXT_BSS_DELTA; ++ if (rt > pcrelative_buff_addr(0) && ++ rt < pcrelative_buff_addr(RELOC_BUFFER_SIZE)) ++ rt = 0xffff0098; ++} ++static void test_plq_off0_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plq 26, +0(0), 1" ); ++ PAD_ORI ++} ++static void test_plq_off8_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plq 26, +8(0), 1" ); ++ PAD_ORI ++} ++static void test_plq_off16_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plq 26, +16(0), 1" ); ++ PAD_ORI ++} ++static void test_plq_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plq 26, +32(0), 1" ); ++ PAD_ORI ++} ++static void test_plq_off48_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plq 26, +48(0), 1" ); ++ PAD_ORI ++} ++static void test_plq_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plq 26, +64(0), 1" ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_pstq_off0_R1 (void) { ++ __asm__ __volatile__ ("pstq 24, -0x1f400+0(0), 1" ); ++} ++static void test_pstq_off8_R1 (void) { ++ __asm__ __volatile__ ("pstq 24, -0x1f400+8(0), 1" ); ++} ++static void test_pstq_off16_R1 (void) { ++ __asm__ __volatile__ ("pstq 24, -0x1f400+16(0), 1" ); ++} ++static void test_pstq_off32_R1 (void) { ++ __asm__ __volatile__ ("pstq 24, -0x1f400+32(0), 1" ); ++} ++static void test_pstq_off64_R1 (void) { ++ __asm__ __volatile__ ("pstq 24, -0x1f400+64(0), 1" ); ++} ++ ++static test_list_t testgroup_generic[] = { ++ { &test_paddi_0_R1, "paddi 0_R1", "RT,RA,SI,R"}, /* bcwp */ ++ { &test_paddi_12_R1, "paddi 12_R1", "RT,RA,SI,R"}, /* bcwp */ ++ { &test_paddi_48_R1, "paddi 48_R1", "RT,RA,SI,R"}, /* bcwp */ ++ { &test_paddi_98_R1, "paddi 98_R1", "RT,RA,SI,R"}, /* bcwp */ ++ { &test_plbz_off0_R1, "plbz off0_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plbz_off8_R1, "plbz off8_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plbz_off16_R1, "plbz off16_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plbz_off32_R1, "plbz off32_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plbz_off64_R1, "plbz off64_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_pld_off0_R1, "pld off0_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_pld_off8_R1, "pld off8_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_pld_off16_R1, "pld off16_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_pld_off32_R1, "pld off32_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_pld_off64_R1, "pld off64_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plha_off0_R1, "plha off0_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plha_off8_R1, "plha off8_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plha_off16_R1, "plha off16_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plha_off32_R1, "plha off32_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plha_off64_R1, "plha off64_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plhz_off0_R1, "plhz off0_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plhz_off8_R1, "plhz off8_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plhz_off16_R1, "plhz off16_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plhz_off32_R1, "plhz off32_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plhz_off64_R1, "plhz off64_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plq_off0_R1, "plq off0_R1", "RTp,D(RA),R"}, /* bcwp */ ++ { &test_plq_off8_R1, "plq off8_R1", "RTp,D(RA),R"}, /* bcwp */ ++ { &test_plq_off16_R1, "plq off16_R1", "RTp,D(RA),R"}, /* bcwp */ ++ { &test_plq_off32_R1, "plq off32_R1", "RTp,D(RA),R"}, /* bcwp */ ++ { &test_plq_off48_R1, "plq off48_R1", "RTp,D(RA),R"}, /* bcwp */ ++ { &test_plq_off64_R1, "plq off64_R1", "RTp,D(RA),R"}, /* bcwp */ ++ { &test_plwa_off0_R1, "plwa off0_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwa_off8_R1, "plwa off8_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwa_off16_R1, "plwa off16_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwa_off32_R1, "plwa off32_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwa_off64_R1, "plwa off64_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwz_off0_R1, "plwz off0_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwz_off8_R1, "plwz off8_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwz_off16_R1, "plwz off16_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwz_off32_R1, "plwz off32_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwz_off64_R1, "plwz off64_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plxvp_off0_R1, "plxvp off0_R1", "XTp,D(RA),R"}, /* bcwp */ ++ { &test_plxvp_off8_R1, "plxvp off8_R1", "XTp,D(RA),R"}, /* bcwp */ ++ { &test_plxvp_off16_R1, "plxvp off16_R1", "XTp,D(RA),R"}, /* bcwp */ ++ { &test_plxvp_off24_R1, "plxvp off24_R1", "XTp,D(RA),R"}, /* bcwp */ ++ { &test_plxvp_off32_R1, "plxvp off32_R1", "XTp,D(RA),R"}, /* bcwp */ ++ { &test_pstb_off0_R1, "pstb off0_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstb_off8_R1, "pstb off8_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstb_off16_R1, "pstb off16_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstb_off32_R1, "pstb off32_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstd_off0_R1, "pstd off0_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstd_off8_R1, "pstd off8_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstd_off16_R1, "pstd off16_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstd_off32_R1, "pstd off32_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_psth_off0_R1, "psth off0_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_psth_off8_R1, "psth off8_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_psth_off16_R1, "psth off16_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_psth_off32_R1, "psth off32_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstq_off0_R1, "pstq off0_R1", "RSp,D(RA),R"}, /* bcwp */ ++ { &test_pstq_off8_R1, "pstq off8_R1", "RSp,D(RA),R"}, /* bcwp */ ++ { &test_pstq_off16_R1, "pstq off16_R1", "RSp,D(RA),R"}, /* bcwp */ ++ { &test_pstq_off32_R1, "pstq off32_R1", "RSp,D(RA),R"}, /* bcwp */ ++ { &test_pstq_off64_R1, "pstq off64_R1", "RSp,D(RA),R"}, /* bcwp */ ++ { &test_pstw_off0_R1, "pstw off0_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstw_off8_R1, "pstw off8_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstw_off16_R1, "pstw off16_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstw_off32_R1, "pstw off32_R1", "RS,D(RA),R"}, /* bcwp */ ++ { NULL, NULL }, ++}; ++ ++/* Allow skipping of tests. */ ++unsigned long test_count=0xffff; ++unsigned long skip_count=0; ++unsigned long setup_only=0; ++ ++/* Set up a setjmp/longjmp to gently handle our SIGILLs and SIGSEGVs. */ ++static jmp_buf mybuf; ++ ++/* This (testfunction_generic) is meant to handle all of the instruction ++ variations. The helpers set up the register and iterator values ++ as is appropriate for the instruction being tested. */ ++static void testfunction_generic (const char* instruction_name, ++ test_func_t test_function, ++ unsigned int ignore_flags, ++ char * cur_form) { ++ ++ identify_form_components (instruction_name , cur_form); ++ debug_show_form (instruction_name, cur_form); ++ set_up_iterators (); ++ debug_show_iter_ranges (); ++ initialize_buffer (0); ++ init_pcrelative_write_target (); ++ debug_dump_buffer (); ++ ++ for (vrai = a_start; vrai < a_iters ; vrai+=a_inc) { ++ for (vrbi = b_start; vrbi < b_iters ; vrbi+=b_inc) { ++ for (vrci = c_start; vrci < c_iters ; vrci+=c_inc) { ++ for (vrmi = m_start; (vrmi < m_iters) ; vrmi+=m_inc) { ++ CHECK_OVERRIDES ++ debug_show_current_iteration (); ++ // Be sure to initialize the target registers first. ++ initialize_target_registers (); ++ initialize_source_registers (); ++ printf ("%s", instruction_name); ++ print_register_header (); ++ printf( " =>"); fflush (stdout); ++ if (!setup_only) { ++ if (enable_setjmp) { ++ if ( setjmp ( mybuf ) ) { ++ printf("signal tripped. (FIXME)\n"); ++ continue; ++ } ++ } ++ (*test_function) (); ++ } ++ print_register_footer (); ++ print_result_buffer (); ++ print_pcrelative_write_target (); ++ printf ("\n"); ++ } ++ } ++ } ++ } ++} ++ ++void mykillhandler ( int x ) { longjmp (mybuf, 1); } ++void mysegvhandler ( int x ) { longjmp (mybuf, 1); } ++ ++static void do_tests ( void ) ++{ ++ int groupcount; ++ char * cur_form; ++ test_group_t group_function = &testfunction_generic; ++ test_list_t *tests = testgroup_generic; ++ ++ struct sigaction kill_action, segv_action; ++ struct sigaction old_kill_action, old_segv_action; ++ if (enable_setjmp) { ++ kill_action.sa_handler = mykillhandler; ++ segv_action.sa_handler = mysegvhandler; ++ sigemptyset ( &kill_action.sa_mask ); ++ sigemptyset ( &segv_action.sa_mask ); ++ kill_action.sa_flags = SA_NODEFER; ++ segv_action.sa_flags = SA_NODEFER; ++ sigaction ( SIGILL, &kill_action, &old_kill_action); ++ sigaction ( SIGSEGV, &segv_action, &old_segv_action); ++ } ++ ++ for (groupcount = 0; tests[groupcount].name != NULL; groupcount++) { ++ cur_form = strdup(tests[groupcount].form); ++ current_test = tests[groupcount]; ++ identify_instruction_by_func_name (current_test.name); ++ if (groupcount < skip_count) continue; ++ if (verbose) printf("Test #%d ,", groupcount); ++ if (verbose > 1) printf(" instruction %s (v=%d)", current_test.name, verbose); ++ (*group_function) (current_test.name, current_test.func, 0, cur_form ); ++ printf ("\n"); ++ if (groupcount >= (skip_count+test_count)) break; ++ } ++ if (debug_show_labels) printf("\n"); ++ printf ("All done. Tested %d different instruction groups\n", groupcount); ++} ++ ++static void usage (void) ++{ ++ fprintf(stderr, ++ "Usage: test_isa_XXX [OPTIONS]\n" ++ "\t-h: display this help and exit\n" ++ "\t-v: increase verbosity\n" ++ "\t-a : limit number of a-iterations to \n" ++ "\t-b : limit number of b-iterations to \n" ++ "\t-c : limit number of c-iterations to \n" ++ "\t-n : limit to this number of tests.\n" ++ "\t-r : run only test # \n" ++ "\t\n" ++ "\t-j :enable setjmp to recover from illegal insns. \n" ++ "\t-m :(dev only?) lock VRM value to zero.\n" ++ "\t-z :(dev only?) lock MC value to zero.\n" ++ "\t-p :(dev only?) disable prefix instructions\n" ++ "\t-s : skip tests \n" ++ "\t-c : stop after running # of tests \n" ++ "\t-f : Do the test setup but do not actually execute the test instruction. \n" ++ ); ++} ++ ++int main (int argc, char **argv) ++{ ++ int c; ++ while ((c = getopt(argc, argv, "dhjvmpfzs:a:b:c:n:r:")) != -1) { ++ switch (c) { ++ case 'h': ++ usage(); ++ return 0; ++ ++ case 'v': ++ verbose++; ++ break; ++ ++ /* Options related to limiting the test iterations. */ ++ case 'a': ++ a_limit=atoi (optarg); ++ printf ("limiting a-iters to %ld.\n", a_limit); ++ break; ++ case 'b': ++ b_limit=atoi (optarg); ++ printf ("limiting b-iters to %ld.\n", b_limit); ++ break; ++ case 'c': ++ c_limit=atoi (optarg); ++ printf ("limiting c-iters to %ld.\n", c_limit); ++ break; ++ case 'n': // run this number of tests. ++ test_count=atoi (optarg); ++ printf ("limiting to %ld tests\n", test_count); ++ break; ++ case 'r': // run just test #. ++ skip_count=atoi (optarg); ++ test_count=0; ++ if (verbose) printf("Running test number %ld\n", skip_count); ++ break; ++ case 's': // skip this number of tests. ++ skip_count=atoi (optarg); ++ printf ("skipping %ld tests\n", skip_count); ++ break; ++ ++ /* debug options. */ ++ case 'd': ++ dump_tables=1; ++ printf("DEBUG:dump_tables.\n"); ++ break; ++ case 'f': ++ setup_only=1; ++ printf("DEBUG:setup_only.\n"); ++ break; ++ case 'j': ++ enable_setjmp=1; ++ printf ("DEBUG:setjmp enabled.\n"); ++ break; ++ case 'm': ++ vrm_override=1; ++ printf ("DEBUG:vrm override enabled.\n"); ++ break; ++ case 'p': ++ prefix_override=1; ++ printf ("DEBUG:prefix override enabled.\n"); ++ break; ++ case 'z': ++ mc_override=1; ++ printf ("DEBUG:MC override enabled.\n"); ++ break; ++ default: ++ usage(); ++ fprintf(stderr, "Unknown argument: '%c'\n", c); ++ } ++ } ++ ++ generic_prologue (); ++ build_vsx_table (); ++ build_args_table (); ++ build_float_vsx_tables (); ++ ++ if (dump_tables) { ++ dump_float_vsx_tables (); ++ dump_vsxargs (); ++ } ++ ++ do_tests (); ++ ++ return 0; ++} ++ ++#else // HAS_ISA_3_1 ++int main (int argc, char **argv) ++{ ++ printf("NO ISA 3.1 SUPPORT\n"); ++ return 0; ++} ++#endif +diff --git a/none/tests/ppc64/test_isa_3_1_R1_RT.stderr.exp b/none/tests/ppc64/test_isa_3_1_R1_RT.stderr.exp +new file mode 100644 +index 000000000..139597f9c +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_RT.stderr.exp +@@ -0,0 +1,2 @@ ++ ++ +diff --git a/none/tests/ppc64/test_isa_3_1_R1_RT.stdout.exp b/none/tests/ppc64/test_isa_3_1_R1_RT.stdout.exp +new file mode 100644 +index 000000000..87594748f +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_RT.stdout.exp +@@ -0,0 +1,138 @@ ++paddi 0_R1 => ffff0000 ++ ++paddi 12_R1 => ffff0012 ++ ++paddi 48_R1 => ffff0048 ++ ++paddi 98_R1 => ffff0098 ++ ++plbz off0_R1 => 1a ++ ++plbz off8_R1 => 1f ++ ++plbz off16_R1 => 1f ++ ++plbz off32_R1 => 1b ++ ++plbz off64_R1 => 1b ++ ++pld off0_R1 => e740000004100000 ++ ++pld off8_R1 => 4e800020 ++ ++pld off16_R1 => 6318001862f7001f ++ ++pld off32_R1 => 639c001c637b001b ++ ++pld off64_R1 => 639c001c637b001b ++ ++plha off0_R1 => 1a ++ ++plha off8_R1 => 1f ++ ++plha off16_R1 => 1f ++ ++plha off32_R1 => 1b ++ ++plha off64_R1 => 1b ++ ++plhz off0_R1 => 1a ++ ++plhz off8_R1 => 1f ++ ++plhz off16_R1 => 1f ++ ++plhz off32_R1 => 1b ++ ++plhz off64_R1 => 1b ++ ++plq off0_R1 => e34000000410001a 62d6001662b5001f ++ ++plq off8_R1 => 62d6001662b5001f 6318001862f7001f ++ ++plq off16_R1 => 6318001862f7001f 635a001a6339001b ++ ++plq off32_R1 => 639c001c637b001b 4e80003b ++ ++plq off48_R1 => 1a 62d6001662b5001f ++ ++plq off64_R1 => 639c001c637b001b 4e80003b ++ ++plwa off0_R1 => 4100000 ++ ++plwa off8_R1 => 4e800020 ++ ++plwa off16_R1 => 0 ++ ++plwa off32_R1 => 637b001b ++ ++plwa off64_R1 => 637b001b ++ ++plwz off0_R1 => 6100000 ++ ++plwz off8_R1 => 4e800020 ++ ++plwz off16_R1 => 0 ++ ++plwz off32_R1 => 637b001b ++ ++plwz off64_R1 => 637b001b ++ ++plxvp off0_R1 => 6318001862f70017 635a001a63390019 ea80000004100000 62d6001662b50015 ++ ++plxvp off8_R1 => 635a001a63390019 639c001c637b001b 62d6001662b50015 6318001862f70017 ++ ++plxvp off16_R1 => 639c001c637b001b 000000004e800020 6318001862f70017 635a001a63390019 ++ ++plxvp off24_R1 => 000000004e800020 0000000000000000 635a001a63390019 639c001c637b001b ++ ++plxvp off32_R1 => 0000000000000000 62d6001662b50015 639c001c637b001b 000000004e800020 ++ ++pstb off0_R1 102030405060708 => 08 ++ ++pstb off8_R1 102030405060708 => 08 ++ ++pstb off16_R1 102030405060708 => 08 ++ ++pstb off32_R1 102030405060708 => 08 ++ ++pstd off0_R1 102030405060708 => 0102030405060708 ++ ++pstd off8_R1 102030405060708 => 0102030405060708 ++ ++pstd off16_R1 102030405060708 => 0102030405060708 ++ ++pstd off32_R1 102030405060708 => 0102030405060708 ++ ++psth off0_R1 102030405060708 => 0708 ++ ++psth off8_R1 102030405060708 => 0708 ++ ++psth off16_R1 102030405060708 => 0708 ++ ++psth off32_R1 102030405060708 => 0708 ++ ++pstq off0_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++pstq off0_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++ ++pstq off8_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++pstq off8_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++ ++pstq off16_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++pstq off16_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++ ++pstq off32_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++pstq off32_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++ ++pstq off64_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++pstq off64_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++ ++pstw off0_R1 102030405060708 => 05060708 ++ ++pstw off8_R1 102030405060708 => 05060708 ++ ++pstw off16_R1 102030405060708 => 05060708 ++ ++pstw off32_R1 102030405060708 => 05060708 ++ ++All done. Tested 66 different instruction groups +diff --git a/none/tests/ppc64/test_isa_3_1_R1_RT.vgtest b/none/tests/ppc64/test_isa_3_1_R1_RT.vgtest +new file mode 100644 +index 000000000..61d7f65a1 +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_RT.vgtest +@@ -0,0 +1,2 @@ ++prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 ++prog: test_isa_3_1_R1_RT +diff --git a/none/tests/ppc64/test_isa_3_1_R1_XT.c b/none/tests/ppc64/test_isa_3_1_R1_XT.c +new file mode 100644 +index 000000000..58885b8d3 +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_XT.c +@@ -0,0 +1,534 @@ ++/* ++ * Valgrind testcase for PowerPC ISA 3.1 ++ * ++ * Copyright (C) 2019-2020 Will Schmidt ++ * ++ * 64bit build: ++ * gcc -Winline -Wall -g -O -mregnames -maltivec -m64 ++ */ ++ ++/* ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#include ++#ifdef HAS_ISA_3_1 ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++/* Condition Register fields. ++ These are used to capture the condition register values immediately after ++ the instruction under test is executed. This is done to help prevent other ++ test overhead (switch statements, result compares, etc) from disturbing ++ the test case results. */ ++unsigned long current_cr; ++unsigned long current_fpscr; ++ ++struct test_list_t current_test; ++ ++#include "isa_3_1_helpers.h" ++static void test_pstxvp_off0_R1 (void) { ++ __asm__ __volatile__ ("pstxvp 20, -0x1f400+0(0),1"); ++} ++static void test_pstxvp_off16_R1 (void) { ++ __asm__ __volatile__ ("pstxvp 20, -0x1f400+16(0),1"); ++} ++static void test_pstxvp_off32_R1 (void) { ++ __asm__ __volatile__ ("pstxvp 20, -0x1f400+32(0),1"); ++} ++static void test_pstxvp_off48_R1 (void) { ++ __asm__ __volatile__ ("pstxvp 20, -0x1f400+48(0),1"); ++} ++static void test_plfd_64_R1 (void) { ++ __asm__ __volatile__ ("plfd 28, +64(0), 1"); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plfd_32_R1 (void) { ++ __asm__ __volatile__ ("plfd 28, +32(0), 1"); ++ PAD_ORI ++} ++static void test_plfd_16_R1 (void) { ++ __asm__ __volatile__ ("plfd 28, +16(0), 1"); ++ PAD_ORI ++} ++static void test_plfd_8_R1 (void) { ++ __asm__ __volatile__ ("plfd 28, +8(0), 1"); ++ PAD_ORI ++} ++static void test_plfd_4_R1 (void) { ++ __asm__ __volatile__ ("plfd 28, +4(0), 1"); ++ PAD_ORI ++} ++static void test_plfd_0_R1 (void) { ++ __asm__ __volatile__ ("plfd 28, +0(0), 1"); ++ PAD_ORI ++} ++static void test_plfs_64_R1 (void) { ++ __asm__ __volatile__ ("plfs 28, +64(0), 1"); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plfs_32_R1 (void) { ++ __asm__ __volatile__ ("plfs 28, +32(0), 1"); ++ PAD_ORI ++} ++static void test_plfs_16_R1 (void) { ++ __asm__ __volatile__ ("plfs 28, +16(0), 1"); ++ PAD_ORI ++} ++static void test_plfs_8_R1 (void) { ++ __asm__ __volatile__ ("plfs 28, +8(0), 1"); ++ PAD_ORI ++} ++static void test_plfs_4_R1 (void) { ++ __asm__ __volatile__ ("plfs 28, +4(0), 1"); ++ PAD_ORI ++} ++static void test_plfs_0_R1 (void) { ++ __asm__ __volatile__ ("plfs 28, +0(0), 1"); ++ PAD_ORI ++} ++static void test_pstfd_32_R1 (void) { ++ __asm__ __volatile__ ("pstfd 26, -0x1f400+32(0), 1"); ++} ++static void test_pstfd_16_R1 (void) { ++ __asm__ __volatile__ ("pstfd 26, -0x1f400+16(0), 1"); ++} ++static void test_pstfd_8_R1 (void) { ++ __asm__ __volatile__ ("pstfd 26, -0x1f400+8(0), 1"); ++} ++static void test_pstfd_4_R1 (void) { ++ __asm__ __volatile__ ("pstfd 26, -0x1f400+4(0), 1"); ++} ++static void test_pstfd_0_R1 (void) { ++ __asm__ __volatile__ ("pstfd 26, -0x1f400+0(0), 1"); ++} ++static void test_pstfs_32_R1 (void) { ++ __asm__ __volatile__ ("pstfs 26, -0x1f400+32(0), 1"); ++} ++static void test_pstfs_16_R1 (void) { ++ __asm__ __volatile__ ("pstfs 26, -0x1f400+16(0), 1"); ++} ++static void test_pstfs_8_R1 (void) { ++ __asm__ __volatile__ ("pstfs 26, -0x1f400+8(0), 1"); ++} ++static void test_pstfs_4_R1 (void) { ++ __asm__ __volatile__ ("pstfs 26, -0x1f400+4(0), 1"); ++} ++static void test_pstfs_0_R1 (void) { ++ __asm__ __volatile__ ("pstfs 26, -0x1f400+0(0), 1"); ++} ++static void test_plxsd_64_R1 (void) { ++ __asm__ __volatile__ ("plxsd %0, +64(0), 1" : "=v" (vrt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plxsd_32_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; plxsd %0, +32(0), 1" : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxsd_16_R1 (void) { ++ __asm__ __volatile__ ("plxsd %0, +16(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxsd_8_R1 (void) { ++ __asm__ __volatile__ ("plxsd %0, +8(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxsd_4_R1 (void) { ++ __asm__ __volatile__ ("plxsd %0, +4(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxsd_0_R1 (void) { ++ __asm__ __volatile__ ("plxsd %0, +0(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxssp_64_R1 (void) { ++ __asm__ __volatile__ ("plxssp %0, +64(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plxssp_32_R1 (void) { ++ __asm__ __volatile__ ("plxssp %0, +32(0), 1; pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxssp_16_R1 (void) { ++ __asm__ __volatile__ ("plxssp %0, +16(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxssp_8_R1 (void) { ++ __asm__ __volatile__ ("plxssp %0, +8(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxssp_4_R1 (void) { ++ __asm__ __volatile__ ("plxssp %0, +4(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxssp_0_R1 (void) { ++ __asm__ __volatile__ ("plxssp %0, +0(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++/* Follow the short-range plxv instructions with nop in order to ++ pad out subsequent instructions. When written there are found ++ to be fluctuations in the instructions to store the result back ++ into the target variable. (pla,pstxv...). ++ */ ++static void test_plxv_16_R1 (void) { ++ __asm__ __volatile__ ("plxv %x0, +16(0), 1; pnop;pnop;pnop;" : "=wa" (vec_xt) ); ++ PAD_ORI ++} ++static void test_plxv_8_R1 (void) { ++ __asm__ __volatile__ ("plxv %x0, +8(0), 1; pnop;pnop;pnop;" : "=wa" (vec_xt) ); ++ PAD_ORI ++} ++static void test_plxv_4_R1 (void) { ++ __asm__ __volatile__ ("plxv %x0, +4(0), 1; pnop;pnop;pnop;" : "=wa" (vec_xt) ); ++ PAD_ORI ++} ++static void test_plxv_0_R1 (void) { ++ __asm__ __volatile__ ("plxv %x0, +0(0), 1; pnop;pnop;pnop; " : "=wa" (vec_xt) ); ++ PAD_ORI ++} ++static void test_pstxsd_64_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+64(0), 1" ); ++} ++static void test_pstxsd_32_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+32(0), 1" ); ++} ++static void test_pstxsd_16_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+16(0), 1" ); ++} ++static void test_pstxsd_8_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+8(0), 1" ); ++} ++static void test_pstxsd_4_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+4(0), 1" ); ++} ++static void test_pstxsd_0_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+0(0), 1" ); ++} ++static void test_pstxssp_64_R1 (void) { ++ __asm__ __volatile__ ("pstxssp 22, -0x1f400+64(0), 1" ); ++} ++static void test_pstxssp_32_R1 (void) { ++ __asm__ __volatile__ ("pstxssp 22, -0x1f400+32(0), 1"); ++} ++static void test_pstxssp_16_R1 (void) { ++ __asm__ __volatile__ ("pstxssp 22, -0x1f400+16(0), 1"); ++} ++static void test_pstxssp_8_R1 (void) { ++ __asm__ __volatile__ ("pstxssp 22, -0x1f400+8(0), 1"); ++} ++static void test_pstxssp_4_R1 (void) { ++ __asm__ __volatile__ ("pstxssp 22, -0x1f400+4(0), 1"); ++} ++static void test_pstxssp_0_R1 (void) { ++ __asm__ __volatile__ ("pstxssp 22, -0x1f400+0(0), 1"); ++} ++static void test_pstxv_16_R1 (void) { ++ __asm__ __volatile__ ("pstxv %x0, -0x1f400+16(0), 1" :: "wa" (vec_xs)); ++} ++static void test_pstxv_8_R1 (void) { ++ __asm__ __volatile__ ("pstxv %x0, -0x1f400+8(0), 1" :: "wa" (vec_xs)); ++} ++static void test_pstxv_4_R1 (void) { ++ __asm__ __volatile__ ("pstxv %x0, -0x1f400+4(0), 1" :: "wa" (vec_xs)); ++} ++static void test_pstxv_0_R1 (void) { ++ __asm__ __volatile__ ("pstxv %x0, -0x1f400+0(0), 1" :: "wa" (vec_xs)); ++} ++ ++static test_list_t testgroup_generic[] = { ++ { &test_plfd_0_R1, "plfd 0_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfd_4_R1, "plfd 4_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfd_8_R1, "plfd 8_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfd_16_R1, "plfd 16_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfd_32_R1, "plfd 32_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfd_64_R1, "plfd 64_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfs_0_R1, "plfs 0_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfs_4_R1, "plfs 4_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfs_8_R1, "plfs 8_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfs_16_R1, "plfs 16_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfs_32_R1, "plfs 32_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfs_64_R1, "plfs 64_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plxsd_0_R1, "plxsd 0_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_plxsd_4_R1, "plxsd 4_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_plxsd_8_R1, "plxsd 8_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_plxsd_16_R1, "plxsd 16_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_plxsd_32_R1, "plxsd 32_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_plxsd_64_R1, "plxsd 64_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_plxssp_0_R1, "plxssp 0_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_plxssp_4_R1, "plxssp 4_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_plxssp_8_R1, "plxssp 8_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_plxssp_16_R1, "plxssp 16_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_plxssp_32_R1, "plxssp 32_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_plxssp_64_R1, "plxssp 64_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_plxv_0_R1, "plxv 0_R1", "XT,D(RA),R"}, /* bcwp */ ++ { &test_plxv_4_R1, "plxv 4_R1", "XT,D(RA),R"}, /* bcwp */ ++ { &test_plxv_8_R1, "plxv 8_R1", "XT,D(RA),R"}, /* bcwp */ ++ { &test_plxv_16_R1, "plxv 16_R1", "XT,D(RA),R"}, /* bcwp */ ++ { &test_pstfd_0_R1, "pstfd 0_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_pstfd_4_R1, "pstfd 4_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_pstfd_8_R1, "pstfd 8_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_pstfd_16_R1, "pstfd 16_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_pstfd_32_R1, "pstfd 32_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_pstfs_0_R1, "pstfs 0_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_pstfs_4_R1, "pstfs 4_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_pstfs_8_R1, "pstfs 8_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_pstfs_16_R1, "pstfs 16_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_pstfs_32_R1, "pstfs 32_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_pstxsd_0_R1, "pstxsd 0_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxsd_4_R1, "pstxsd 4_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxsd_8_R1, "pstxsd 8_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxsd_16_R1, "pstxsd 16_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxsd_32_R1, "pstxsd 32_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxsd_64_R1, "pstxsd 64_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxssp_0_R1, "pstxssp 0_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxssp_4_R1, "pstxssp 4_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxssp_8_R1, "pstxssp 8_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxssp_16_R1, "pstxssp 16_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxssp_32_R1, "pstxssp 32_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxssp_64_R1, "pstxssp 64_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxvp_off0_R1, "pstxvp off0_R1", "XSp,D(RA),R"}, /* bcwp */ ++ { &test_pstxvp_off16_R1, "pstxvp off16_R1", "XSp,D(RA),R"}, /* bcwp */ ++ { &test_pstxvp_off32_R1, "pstxvp off32_R1", "XSp,D(RA),R"}, /* bcwp */ ++ { &test_pstxvp_off48_R1, "pstxvp off48_R1", "XSp,D(RA),R"}, /* bcwp */ ++ { &test_pstxv_0_R1, "pstxv 0_R1", "XS,D(RA),R"}, /* bcwp */ ++ { &test_pstxv_4_R1, "pstxv 4_R1", "XS,D(RA),R"}, /* bcwp */ ++ { &test_pstxv_8_R1, "pstxv 8_R1", "XS,D(RA),R"}, /* bcwp */ ++ { &test_pstxv_16_R1, "pstxv 16_R1", "XS,D(RA),R"}, /* bcwp */ ++ { NULL, NULL }, ++}; ++ ++/* Allow skipping of tests. */ ++unsigned long test_count=0xffff; ++unsigned long skip_count=0; ++unsigned long setup_only=0; ++ ++/* Set up a setjmp/longjmp to gently handle our SIGILLs and SIGSEGVs. */ ++static jmp_buf mybuf; ++ ++/* This (testfunction_generic) is meant to handle all of the instruction ++ variations. The helpers set up the register and iterator values ++ as is appropriate for the instruction being tested. */ ++static void testfunction_generic (const char* instruction_name, ++ test_func_t test_function, ++ unsigned int ignore_flags, ++ char * cur_form) { ++ ++ identify_form_components (instruction_name , cur_form); ++ debug_show_form (instruction_name, cur_form); ++ set_up_iterators (); ++ debug_show_iter_ranges (); ++ initialize_buffer (0); ++ init_pcrelative_write_target (); ++ debug_dump_buffer (); ++ ++ for (vrai = a_start; vrai < a_iters ; vrai+=a_inc) { ++ for (vrbi = b_start; vrbi < b_iters ; vrbi+=b_inc) { ++ for (vrci = c_start; vrci < c_iters ; vrci+=c_inc) { ++ for (vrmi = m_start; (vrmi < m_iters) ; vrmi+=m_inc) { ++ CHECK_OVERRIDES ++ debug_show_current_iteration (); ++ // Be sure to initialize the target registers first. ++ initialize_target_registers (); ++ initialize_source_registers (); ++ vec_xa[0]=0x1234; ++ vec_xa[1]=0x4567; ++ printf ("%s", instruction_name); ++ print_register_header (); ++ printf( " =>"); fflush (stdout); ++ if (!setup_only) { ++ if (enable_setjmp) { ++ if ( setjmp ( mybuf ) ) { ++ printf("signal tripped. (FIXME)\n"); ++ continue; ++ } ++ } ++ (*test_function) (); ++ } ++ print_register_footer (); ++ print_result_buffer (); ++ print_pcrelative_write_target (); ++ printf ("\n"); ++ } ++ } ++ } ++ } ++} ++ ++void mykillhandler ( int x ) { longjmp (mybuf, 1); } ++void mysegvhandler ( int x ) { longjmp (mybuf, 1); } ++ ++static void do_tests ( void ) ++{ ++ int groupcount; ++ char * cur_form; ++ test_group_t group_function = &testfunction_generic; ++ test_list_t *tests = testgroup_generic; ++ ++ struct sigaction kill_action, segv_action; ++ struct sigaction old_kill_action, old_segv_action; ++ if (enable_setjmp) { ++ kill_action.sa_handler = mykillhandler; ++ segv_action.sa_handler = mysegvhandler; ++ sigemptyset ( &kill_action.sa_mask ); ++ sigemptyset ( &segv_action.sa_mask ); ++ kill_action.sa_flags = SA_NODEFER; ++ segv_action.sa_flags = SA_NODEFER; ++ sigaction ( SIGILL, &kill_action, &old_kill_action); ++ sigaction ( SIGSEGV, &segv_action, &old_segv_action); ++ } ++ ++ for (groupcount = 0; tests[groupcount].name != NULL; groupcount++) { ++ cur_form = strdup(tests[groupcount].form); ++ current_test = tests[groupcount]; ++ identify_instruction_by_func_name (current_test.name); ++ if (groupcount < skip_count) continue; ++ if (verbose) printf("Test #%d ,", groupcount); ++ if (verbose > 1) printf(" instruction %s (v=%d)", current_test.name, verbose); ++ (*group_function) (current_test.name, current_test.func, 0, cur_form ); ++ printf ("\n"); ++ if (groupcount >= (skip_count+test_count)) break; ++ } ++ if (debug_show_labels) printf("\n"); ++ printf ("All done. Tested %d different instruction groups\n", groupcount); ++} ++ ++static void usage (void) ++{ ++ fprintf(stderr, ++ "Usage: test_isa_XXX [OPTIONS]\n" ++ "\t-h: display this help and exit\n" ++ "\t-v: increase verbosity\n" ++ "\t-a : limit number of a-iterations to \n" ++ "\t-b : limit number of b-iterations to \n" ++ "\t-c : limit number of c-iterations to \n" ++ "\t-n : limit to this number of tests.\n" ++ "\t-r : run only test # \n" ++ "\t\n" ++ "\t-j :enable setjmp to recover from illegal insns. \n" ++ "\t-m :(dev only?) lock VRM value to zero.\n" ++ "\t-z :(dev only?) lock MC value to zero.\n" ++ "\t-p :(dev only?) disable prefix instructions\n" ++ "\t-s : skip tests \n" ++ "\t-c : stop after running # of tests \n" ++ "\t-f : Do the test setup but do not actually execute the test instruction. \n" ++ ); ++} ++ ++int main (int argc, char **argv) ++{ ++ int c; ++ while ((c = getopt(argc, argv, "dhjvmpfzs:a:b:c:n:r:")) != -1) { ++ switch (c) { ++ case 'h': ++ usage(); ++ return 0; ++ ++ case 'v': ++ verbose++; ++ break; ++ ++ /* Options related to limiting the test iterations. */ ++ case 'a': ++ a_limit=atoi (optarg); ++ printf ("limiting a-iters to %ld.\n", a_limit); ++ break; ++ case 'b': ++ b_limit=atoi (optarg); ++ printf ("limiting b-iters to %ld.\n", b_limit); ++ break; ++ case 'c': ++ c_limit=atoi (optarg); ++ printf ("limiting c-iters to %ld.\n", c_limit); ++ break; ++ case 'n': // run this number of tests. ++ test_count=atoi (optarg); ++ printf ("limiting to %ld tests\n", test_count); ++ break; ++ case 'r': // run just test #. ++ skip_count=atoi (optarg); ++ test_count=0; ++ if (verbose) printf("Running test number %ld\n", skip_count); ++ break; ++ case 's': // skip this number of tests. ++ skip_count=atoi (optarg); ++ printf ("skipping %ld tests\n", skip_count); ++ break; ++ ++ /* debug options. */ ++ case 'd': ++ dump_tables=1; ++ printf("DEBUG:dump_tables.\n"); ++ break; ++ case 'f': ++ setup_only=1; ++ printf("DEBUG:setup_only.\n"); ++ break; ++ case 'j': ++ enable_setjmp=1; ++ printf ("DEBUG:setjmp enabled.\n"); ++ break; ++ case 'm': ++ vrm_override=1; ++ printf ("DEBUG:vrm override enabled.\n"); ++ break; ++ case 'p': ++ prefix_override=1; ++ printf ("DEBUG:prefix override enabled.\n"); ++ break; ++ case 'z': ++ mc_override=1; ++ printf ("DEBUG:MC override enabled.\n"); ++ break; ++ default: ++ usage(); ++ fprintf(stderr, "Unknown argument: '%c'\n", c); ++ } ++ } ++ ++ generic_prologue (); ++ build_vsx_table (); ++ build_args_table (); ++ build_float_vsx_tables (); ++ ++ if (dump_tables) { ++ dump_float_vsx_tables (); ++ dump_vsxargs (); ++ } ++ ++ do_tests (); ++ ++ return 0; ++} ++ ++#else // HAS_ISA_3_1 ++int main (int argc, char **argv) ++{ ++ printf("NO ISA 3.1 SUPPORT\n"); ++ return 0; ++} ++#endif +diff --git a/none/tests/ppc64/test_isa_3_1_R1_XT.stderr.exp b/none/tests/ppc64/test_isa_3_1_R1_XT.stderr.exp +new file mode 100644 +index 000000000..139597f9c +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_XT.stderr.exp +@@ -0,0 +1,2 @@ ++ ++ +diff --git a/none/tests/ppc64/test_isa_3_1_R1_XT.stdout.exp b/none/tests/ppc64/test_isa_3_1_R1_XT.stdout.exp +new file mode 100644 +index 000000000..48d591f4d +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_XT.stdout.exp +@@ -0,0 +1,127 @@ ++plfd 0_R1 =>_ -4.903986e+55 _ cb80000006100000, 0 ++ ++plfd 4_R1 =>_ 3.095878e+167 _ 62b50015cb800004, 0 ++ ++plfd 8_R1 =>_ 1.297320e+168 _ 62d6001662b50015, 0 ++ ++plfd 16_R1 =>_ 2.264413e+169 _ 6318001862f70017, 0 ++ ++plfd 32_R1 =>_ 6.763045e+171 _ 639c001c637b001b, 0 ++ ++plfd 64_R1 =>_ 6.763045e+171 _ 639c001c637b001b, 0 ++ ++plfs 0_R1 =>_ 2.708339e-35 _ 38c2000000000000, 0 ++ ++plfs 4_R1 =>_ -2.560001e+02 _ c070000080000000, 0 ++ ++plfs 8_R1 =>_ 1.669433e+21 _ 4456a002a0000000, 0 ++ ++plfs 16_R1 =>_ 2.278176e+21 _ 445ee002e0000000, 0 ++ ++plfs 32_R1 =>_ 4.630140e+21 _ 446f600360000000, 0 ++ ++plfs 64_R1 =>_ 4.630140e+21 _ 446f600360000000, 0 ++ ++plxsd 0_R1 => a800000004100000,0000000000000000 -5.07588375e-116 +Zero ++ ++plxsd 4_R1 => 7000000a8000004,0000000000000000 5.77662562e-275 +Zero ++ ++plxsd 8_R1 => 700000060000000,0000000000000000 5.77662407e-275 +Zero ++ ++plxsd 16_R1 => 7000000,0000000000000000 +Den +Zero ++ ++plxsd 32_R1 => 6339001963180018,0000000000000000 9.43505226e+169 +Zero ++ ++plxsd 64_R1 => 6339001963180018,0000000000000000 9.43505226e+169 +Zero ++ ++plxssp 0_R1 => 3882000000000000,0000000000000000 6.19888e-05 +Zero +Zero +Zero ++ ++plxssp 4_R1 => bd80000080000000,0000000000000000 -6.25000e-02 -Zero +Zero +Zero ++ ++plxssp 8_R1 => 38e0000000000000,0000000000000000 1.06812e-04 +Zero +Zero +Zero ++ ++plxssp 16_R1 => 38e0000000000000,0000000000000000 1.06812e-04 +Zero +Zero +Zero ++ ++plxssp 32_R1 => 445ac002c0000000,0000000000000000 8.75000e+02 -2.00000e+00 +Zero +Zero ++ ++plxssp 64_R1 => 446b400340000000,0000000000000000 9.41000e+02 2.00000e+00 +Zero +Zero ++ ++plxv 0_R1 => c800000004100000 7000000 ++ ++plxv 4_R1 => 7000000c8000004 700000000000000 ++ ++plxv 8_R1 => 7000000 7000000 ++ ++plxv 16_R1 => 7000000 7000000 ++ ++pstfd 0_R1 43dfe000003fe000 43eff000000ff000 => e000003fe00043df ++pstfd 0_R1 43eff000000ff000 43efefffffcff000 => f000000ff00043ef ++ ++pstfd 4_R1 43dfe000003fe000 43eff000000ff000 => e000003f e00043df ++pstfd 4_R1 43eff000000ff000 43efefffffcff000 => f000000f f00043ef ++ ++pstfd 8_R1 43dfe000003fe000 43eff000000ff000 => e000003fe00043df ++pstfd 8_R1 43eff000000ff000 43efefffffcff000 => f000000ff00043ef ++ ++pstfd 16_R1 43dfe000003fe000 43eff000000ff000 => e000003fe00043df ++pstfd 16_R1 43eff000000ff000 43efefffffcff000 => f000000ff00043ef ++ ++pstfd 32_R1 43dfe000003fe000 43eff000000ff000 => e000003fe00043df ++pstfd 32_R1 43eff000000ff000 43efefffffcff000 => f000000ff00043ef ++ ++pstfs 0_R1 000000005eff0000 000000005f7f8000 => 00005eff ++pstfs 0_R1 000000005f7f8000 000000005f7f8000 => 80005f7f ++ ++pstfs 4_R1 000000005eff0000 000000005f7f8000 => 00005eff ++pstfs 4_R1 000000005f7f8000 000000005f7f8000 => 80005f7f ++ ++pstfs 8_R1 000000005eff0000 000000005f7f8000 => 00005eff ++pstfs 8_R1 000000005f7f8000 000000005f7f8000 => 80005f7f ++ ++pstfs 16_R1 000000005eff0000 000000005f7f8000 => 00005eff ++pstfs 16_R1 000000005f7f8000 000000005f7f8000 => 80005f7f ++ ++pstfs 32_R1 000000005eff0000 000000005f7f8000 => 00005eff ++pstfs 32_R1 000000005f7f8000 000000005f7f8000 => 80005f7f ++ ++pstxsd 0_R1 => 0000000000000000 ++ ++pstxsd 4_R1 => 00000000 00000000 ++ ++pstxsd 8_R1 => 0000000000000000 ++ ++pstxsd 16_R1 => 0000000000000000 ++ ++pstxsd 32_R1 => 0000000000000000 ++ ++pstxsd 64_R1 => 0000000000000000 ++ ++pstxssp 0_R1 => 00000000 ++ ++pstxssp 4_R1 => 00000000 ++ ++pstxssp 8_R1 => 00000000 ++ ++pstxssp 16_R1 => 00000000 ++ ++pstxssp 32_R1 => 00000000 ++ ++pstxssp 64_R1 => 00000000 ++ ++pstxvp off0_R1 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 077e0180055e0180 000e8080000e0080 ++ ++pstxvp off16_R1 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 077e0180055e0180 000e8080000e0080 ++ ++pstxvp off32_R1 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 077e0180055e0180 000e8080000e0080 ++ ++pstxvp off48_R1 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 077e0180055e0180 000e8080000e0080 ++ ++pstxv 0_R1 ff7ffffe7f7ffffe,ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 ++ ++pstxv 4_R1 ff7ffffe7f7ffffe,ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 ++ ++pstxv 8_R1 ff7ffffe7f7ffffe,ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 ++ ++pstxv 16_R1 ff7ffffe7f7ffffe,ff8000007f800000 => fffe7f7f fffeff7f00007f80 0000ff80 ++ ++All done. Tested 58 different instruction groups +diff --git a/none/tests/ppc64/test_isa_3_1_R1_XT.vgtest b/none/tests/ppc64/test_isa_3_1_R1_XT.vgtest +new file mode 100644 +index 000000000..7331aafad +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_XT.vgtest +@@ -0,0 +1,2 @@ ++prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 ++prog: test_isa_3_1_R1_XT +diff --git a/none/tests/ppc64/test_isa_3_1_common.c b/none/tests/ppc64/test_isa_3_1_common.c +index 7c3dc6f00..b3320277b 100644 +--- a/none/tests/ppc64/test_isa_3_1_common.c ++++ b/none/tests/ppc64/test_isa_3_1_common.c +@@ -134,11 +134,13 @@ bool uses_acc_vsrs; + bool uses_pmsk; + bool uses_buffer; // Buffer related. + bool uses_load_buffer, uses_store_buffer, uses_any_buffer; ++bool updates_byte, updates_halfword, updates_word; // output helpers. + bool uses_quad; + unsigned long output_mask; // Output field special handling. + bool instruction_is_sp, instruction_is_sp_estimate; + bool instruction_is_dp, instruction_is_dp_estimate; + bool instruction_is_b16; ++bool instruction_is_relative; + + unsigned long long min (unsigned long long a, unsigned long long b) { + if ( a < b ) +@@ -236,6 +238,18 @@ void identify_form_components (const char *instruction_name, + (strncmp (instruction_name, "pmst", 4) == 0) || + (strncmp (instruction_name, "pst", 3) == 0) || + (strncmp (instruction_name, "st", 2) == 0)); ++ updates_byte = ( ++ (strncmp (instruction_name, "pstb", 4) == 0) ); ++ updates_halfword = ( ++ (strncmp (instruction_name, "psth", 4) == 0) || ++ (strncmp (instruction_name, "pstfs", 4) == 0) || ++ (strncmp (instruction_name, "pstxsd", 4) == 0) || ++ (strncmp (instruction_name, "pstxssp", 4) == 0) || ++ (strncmp (instruction_name, "pstxv", 4) == 0) || ++ (strncmp (instruction_name, "psfs", 4) == 0) ); ++ updates_word = ( ++ (strncmp (instruction_name, "pstw", 4) == 0) ); ++ + uses_any_buffer = (strstr (cur_form, "(RA)") != NULL); + uses_buffer = uses_any_buffer||uses_load_buffer||uses_store_buffer; + +@@ -268,6 +282,15 @@ void identify_form_components (const char *instruction_name, + instruction_is_b16 = ( current_test.mask & B16_MASK ); + } + ++/* Parse the provided function name to set assorted values. ++ In particular, set an indicator when the instruction test has ++ indicated it will run with R==1 that indicates it is a PC-relative ++ instruction. Those tests should all have "_R1" as part of ++ the function name. */ ++void identify_instruction_by_func_name(const char * function_name) { ++ instruction_is_relative = ( (strstr (function_name, "R1") != NULL)); ++} ++ + void display_form_components (char * cur_form) { + printf (" %s\n", cur_form); + printf ("Instruction form elements: "); +@@ -288,7 +311,7 @@ void display_form_components (char * cur_form) { + if (has_frbp) printf ("frbp "); + if (has_frs) printf ("frs "); + if (has_frsp) printf ("frsp "); +- if (has_frt) printf ("frt "); ++ if (has_frt) printf ("frt%s ",(instruction_is_relative)?"-raw":""); + if (has_frtp) printf ("frtp "); + if (has_xa) printf ("xa "); + if (has_xap) printf ("xap "); +@@ -298,6 +321,7 @@ void display_form_components (char * cur_form) { + if (has_xsp) printf ("xsp "); + if (has_xt) printf ("xt "); + if (has_xtp) printf ("xtp "); ++ if (instruction_is_relative) printf ("R==1 "); + if (uses_acc_src) printf ("AS "); + if (uses_acc_dest) printf ("AT "); + printf ("\n"); +@@ -991,6 +1015,107 @@ if (debug_show_values) printf (" buffer:"); + } + } + ++/* **** Reloc Buffer **************************************** */ ++/* Create a large buffer to be the destination for pc-relative ++ * writes. This test is built with linker hints in order ++ * to ensure our buffer, stored in the .bss section, is at a ++ * mostly known offset from the instructions being exercised, ++ * so a hardcoded offset from the PC (pc-relative) will be ++ * on-target. ++ * If there are significant reworks to the code, the bss or ++ * text sections, or the offsets used may need to change. ++ * ++ * The linker hints are specifically -Tbss and -Ttext. ++ * gcc foo.c test_isa_3_1_common.c -I../../../ -Wl,-Tbss 0x20000 -Wl,-Ttext 0x40000 ++ */ ++ /* RELOC_BUFFER_SIZE is defined to 0x1000 in isa_3_1_helpers.h */ ++#define RELOC_BUFFER_PATTERN 0x0001000100010001 ++volatile unsigned long long pcrelative_write_target[RELOC_BUFFER_SIZE]; ++ ++/* Initialize the buffer to known values. */ ++void init_pcrelative_write_target() { ++ int i; ++ for (i=0;i %llx\n",i,ref_value,curr_value); ++ if (updates_byte) { ++ for (z=0;z<8;z++) { ++ rshift=z*8; ++ if (verbose) printf("z:%d ",z); ++ init_token = (ref_value>>rshift) & 0xff; ++ curr_token = (curr_value>>rshift) & 0xff; ++ if (verbose) ++ printf("wms byte:: %llx -> %llx \n",init_token,curr_token); ++ if (init_token == curr_token && (updates_byte||updates_halfword||updates_word) ) { ++ printf("%2s"," "); ++ } else { ++ printf("%02llx",curr_token); ++ } ++ } ++ } ++ else if (updates_halfword) { ++ for (z=0;z<4;z++) { ++ rshift=z*16; ++ if (verbose) printf("z:%d ",z); ++ init_token = (ref_value>>rshift) & 0xffff; ++ curr_token = (curr_value>>rshift) & 0xffff; ++ if (verbose) ++ printf("wms half:: %llx -> %llx \n",init_token,curr_token); ++ if (init_token == curr_token) { ++ printf("%2s"," "); ++ } else { ++ printf("%04llx",curr_token); ++ } ++ } ++ } ++ else if (updates_word) { ++ for (z=0;z<2;z++) { ++ rshift=z*32; ++ if (verbose) printf("z:%d ",z); ++ init_token = (ref_value>>rshift) & 0xffffffff; ++ curr_token = (curr_value>>rshift) & 0xffffffff; ++ if (verbose) ++ printf("wms word:: %llx -> %llx \n",init_token,curr_token); ++ if (init_token == curr_token ) { ++ printf("%2s"," "); ++ } else { ++ printf("%08llx",curr_token); ++ } ++ } ++ } ++ else { ++ printf("%016llx ",curr_value); ++ } ++ } ++ } ++} ++ ++/* Helper that returns the address of the pcrelative_write_target buffer. ++ Due to variances in where the sections land in memory, this value is ++ used to normalize the results. (see paddi tests for usage). */ ++unsigned long long pcrelative_buff_addr(int x) { ++ /* Return the base address of the array. The base address will be ++ a function of the code load address. */ ++ return (unsigned long long) &pcrelative_write_target[x]; ++} ++ + void print_undefined () { + if (debug_show_values) + printf (" [Undef]"); +@@ -1339,7 +1464,7 @@ void print_frt () { + /* If the result is a dfp128 value, the dfp128 value is + contained in the frt, frtp values which are split across + a pair of VSRs. */ +- if (uses_dfp128_output) { ++ if (!instruction_is_relative && uses_dfp128_output) { + if (verbose) print_vsr (28); + if (verbose) print_vsr (29); + value1 = get_vsrhd_vs28 (); +@@ -1347,7 +1472,12 @@ void print_frt () { + dissect_dfp128_float (value1, value3); + } else { + if (debug_show_raw_values) generic_print_float_as_hex (frt); +- printf (" %e", frt); ++ if (instruction_is_relative) { ++ printf ("_ %e _ ", frt); ++ print_vsr (28); ++ } else { ++ printf (" %e", frt); ++ } + if (has_frtp) { + if (debug_show_raw_values) generic_print_float_as_hex (frtp); + printf (" %e", frtp); +@@ -1652,7 +1782,15 @@ void print_all() { + void print_register_header () { + post_test = 0; + if (debug_show_all_regs) print_all(); +- if (has_ra) print_ra (); ++ ++ if (has_ra) { ++ /* Suppress the print of RA if the instruction has ++ R==1, since the ra value must be zero for the ++ instruction to be valid. */ ++ if (!instruction_is_relative) ++ print_ra(); ++ } ++ + if (has_rb) print_rb (); + if (has_rc) print_rc (); + if (has_rs) print_rs(); +@@ -1894,6 +2032,11 @@ void set_up_iterators () { + } else { + a_start=0; b_start=0; c_start=0; m_start=0; + } ++ /* Special casing for R==1 tests. */ ++ if (instruction_is_relative) { ++ a_iters = 1; ++ m_start=3; m_iters=4; ++ } + if ((has_vra+has_vrb+has_vrc+has_vrm+has_xa+has_xb+uses_MC > 2) && + (!debug_enable_all_iters)) { + /* Instruction tests using multiple fields will generate a lot of +@@ -2196,15 +2339,12 @@ void initialize_source_registers () { + vrb[0] = vsxargs[ (vrbi ) % isr_modulo]; + vrb[1] = vsxargs[ (vrbi+1) % isr_modulo]; + } +- +- if (has_xa) { +- vec_xa[0] = vsxargs[ (vrai ) % isr_modulo]; +- vec_xa[1] = vsxargs[ (vrai+1) % isr_modulo]; +- } +- if (has_xb) { +- vec_xb[0] = vsxargs[ (vrbi ) % isr_modulo]; +- vec_xb[1] = vsxargs[ (vrbi+1) % isr_modulo]; +- } ++ ++ if (instruction_is_relative) { ++ /* for pstxsd and friends using R=1 */ ++ vec_xa[0] = vsxargs[ (vrai+2 ) % isr_modulo]; ++ vec_xa[1] = vsxargs[ (vrai+3 ) % isr_modulo]; ++ } + + // xap 'shares' with the second half of an xa-pair. + if (has_xap ) { diff --git a/SOURCES/valgrind-3.18.1-ppc-pstq.patch b/SOURCES/valgrind-3.18.1-ppc-pstq.patch new file mode 100644 index 0000000..2e23d18 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-ppc-pstq.patch @@ -0,0 +1,47 @@ +commit ae8c6de01417023e78763de145b1c0e6ddd87277 +Author: Carl Love +Date: Wed Oct 20 20:40:13 2021 +0000 + + Fix for the prefixed stq instruction in PC relative mode. + + The pstq instruction for R=1, was not using the correct effective address. + The EA_hi and EA_lo should have been based on the value of EA as calculated + by the function calculate_prefix_EA. Unfortuanely, the EA_hi and EA_lo + addresses were still using the previous code (not PC relative) to calculate + the address from the contants of RA plus the offset. + +diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c +index 8afd77490..543fa9574 100644 +--- a/VEX/priv/guest_ppc_toIR.c ++++ b/VEX/priv/guest_ppc_toIR.c +@@ -9838,23 +9838,24 @@ static Bool dis_int_store_ds_prefix ( UInt prefix, + if (host_endness == VexEndnessBE) { + + /* upper 64-bits */ +- assign( EA_hi, ea_rAor0_simm( rA_addr, immediate_val ) ); ++ assign( EA_hi, mkexpr(EA)); + + /* lower 64-bits */ +- assign( EA_lo, ea_rAor0_simm( rA_addr, immediate_val+8 ) ); ++ assign( EA_lo, binop(Iop_Add64, mkexpr(EA), mkU64(8))); ++ + } else { + /* upper 64-bits */ +- assign( EA_hi, ea_rAor0_simm( rA_addr, immediate_val+8 ) ); ++ assign( EA_hi, binop(Iop_Add64, mkexpr(EA), mkU64(8))); + + /* lower 64-bits */ +- assign( EA_lo, ea_rAor0_simm( rA_addr, immediate_val ) ); ++ assign( EA_lo, mkexpr(EA)); + } + } else { + /* upper half of upper 64-bits */ +- assign( EA_hi, ea_rAor0_simm( rA_addr, immediate_val+4 ) ); ++ assign( EA_hi, binop(Iop_Add32, mkexpr(EA), mkU32(4))); + + /* lower half of upper 64-bits */ +- assign( EA_lo, ea_rAor0_simm( rA_addr, immediate_val+12 ) ); ++ assign( EA_lo, binop(Iop_Add32, mkexpr(EA), mkU32(12))); + } + + /* Note, the store order for stq instruction is the same for BE diff --git a/SOURCES/valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch b/SOURCES/valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch new file mode 100644 index 0000000..bb36c80 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch @@ -0,0 +1,60 @@ +commit 6e08ee95f7f1b1c3fd434fa380cc5b2cc3e3f7c7 +Author: Carl Love +Date: Fri Oct 29 16:30:33 2021 -0500 + + Bug 444571 - PPC, fix the lxsibzx and lxsihzx so they only load their respective sized data. + + The lxsibzx was doing a 64-bit load. The result was initializing + additional bytes in the register that should not have been initialized. + The memcheck/tests/linux/dlclose_leak test detected the issue. The + code generation uses lxsibzx and stxsibx with -mcpu=power9. Previously + the lbz and stb instructions were generated. + + The same issue was noted and fixed with the lxsihzx instruction. The + memcheck/tests/linux/badrw test now passes as well. + + https://bugs.kde.org/show_bug.cgi?id=444571 + +diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c +index d90d566ed..8afd77490 100644 +--- a/VEX/priv/guest_ppc_toIR.c ++++ b/VEX/priv/guest_ppc_toIR.c +@@ -25359,19 +25359,17 @@ dis_vx_load ( UInt prefix, UInt theInstr ) + + else + irx_addr = mkexpr( EA ); +- +- byte = load( Ity_I64, irx_addr ); ++ /* byte load */ ++ byte = load( Ity_I8, irx_addr ); + putVSReg( XT, binop( Iop_64HLtoV128, +- binop( Iop_And64, +- byte, +- mkU64( 0xFF ) ), ++ unop( Iop_8Uto64, byte ), + mkU64( 0 ) ) ); + break; + } + + case 0x32D: // lxsihzx + { +- IRExpr *byte; ++ IRExpr *hword; + IRExpr* irx_addr; + + DIP("lxsihzx %u,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr); +@@ -25382,11 +25380,10 @@ dis_vx_load ( UInt prefix, UInt theInstr ) + else + irx_addr = mkexpr( EA ); + +- byte = load( Ity_I64, irx_addr ); ++ hword = load( Ity_I16, irx_addr ); + putVSReg( XT, binop( Iop_64HLtoV128, +- binop( Iop_And64, +- byte, +- mkU64( 0xFFFF ) ), ++ unop( Iop_16Uto64, ++ hword ), + mkU64( 0 ) ) ); + break; + } diff --git a/SOURCES/valgrind-3.18.1-rseq-enosys.patch b/SOURCES/valgrind-3.18.1-rseq-enosys.patch new file mode 100644 index 0000000..40664c3 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-rseq-enosys.patch @@ -0,0 +1,180 @@ +commit 67e7b20eb256baec225b3d7df1f03d731bf5e939 +Author: Mark Wielaard +Date: Fri Dec 10 17:41:59 2021 +0100 + + Implement linux rseq syscall as ENOSYS + + This implements rseq for amd64, arm, arm64, ppc32, ppc64, + s390x and x86 linux as ENOSYS (without warning). + + glibc will start using rseq to accelerate sched_getcpu, if + available. This would cause a warning from valgrind every + time a new thread is started. + + Real rseq (restartable sequences) support is pretty hard, so + for now just explicitly return ENOSYS (just like we do for clone3). + + https://sourceware.org/pipermail/libc-alpha/2021-December/133656.html + +diff --git a/coregrind/m_syswrap/syswrap-amd64-linux.c b/coregrind/m_syswrap/syswrap-amd64-linux.c +index 5062324a1..18b25f80a 100644 +--- a/coregrind/m_syswrap/syswrap-amd64-linux.c ++++ b/coregrind/m_syswrap/syswrap-amd64-linux.c +@@ -862,6 +862,8 @@ static SyscallTableEntry syscall_table[] = { + + LINXY(__NR_statx, sys_statx), // 332 + ++ GENX_(__NR_rseq, sys_ni_syscall), // 334 ++ + LINX_(__NR_membarrier, sys_membarrier), // 324 + + LINX_(__NR_copy_file_range, sys_copy_file_range), // 326 +diff --git a/coregrind/m_syswrap/syswrap-arm-linux.c b/coregrind/m_syswrap/syswrap-arm-linux.c +index 556dd844b..d583cef0c 100644 +--- a/coregrind/m_syswrap/syswrap-arm-linux.c ++++ b/coregrind/m_syswrap/syswrap-arm-linux.c +@@ -1024,6 +1024,7 @@ static SyscallTableEntry syscall_main_table[] = { + LINX_(__NR_pwritev2, sys_pwritev2), // 393 + + LINXY(__NR_statx, sys_statx), // 397 ++ GENX_(__NR_rseq, sys_ni_syscall), // 398 + + LINXY(__NR_clock_gettime64, sys_clock_gettime64), // 403 + LINX_(__NR_clock_settime64, sys_clock_settime64), // 404 +diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c +index b87107727..2066a38ea 100644 +--- a/coregrind/m_syswrap/syswrap-arm64-linux.c ++++ b/coregrind/m_syswrap/syswrap-arm64-linux.c +@@ -823,8 +823,9 @@ static SyscallTableEntry syscall_main_table[] = { + // (__NR_pkey_mprotect, sys_ni_syscall), // 288 + // (__NR_pkey_alloc, sys_ni_syscall), // 289 + // (__NR_pkey_free, sys_ni_syscall), // 290 ++ LINXY(__NR_statx, sys_statx), // 291 + +- LINXY(__NR_statx, sys_statx), // 397 ++ GENX_(__NR_rseq, sys_ni_syscall), // 293 + + LINXY(__NR_io_uring_setup, sys_io_uring_setup), // 425 + LINXY(__NR_io_uring_enter, sys_io_uring_enter), // 426 +diff --git a/coregrind/m_syswrap/syswrap-ppc32-linux.c b/coregrind/m_syswrap/syswrap-ppc32-linux.c +index 6263ab845..637b2504e 100644 +--- a/coregrind/m_syswrap/syswrap-ppc32-linux.c ++++ b/coregrind/m_syswrap/syswrap-ppc32-linux.c +@@ -1028,6 +1028,8 @@ static SyscallTableEntry syscall_table[] = { + + LINXY(__NR_statx, sys_statx), // 383 + ++ GENX_(__NR_rseq, sys_ni_syscall), // 387 ++ + LINXY(__NR_clock_gettime64, sys_clock_gettime64), // 403 + LINX_(__NR_clock_settime64, sys_clock_settime64), // 404 + +diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c +index a26b41c32..93956d3cc 100644 +--- a/coregrind/m_syswrap/syswrap-ppc64-linux.c ++++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c +@@ -1019,6 +1019,8 @@ static SyscallTableEntry syscall_table[] = { + + LINXY(__NR_statx, sys_statx), // 383 + ++ GENX_(__NR_rseq, sys_ni_syscall), // 387 ++ + LINXY(__NR_io_uring_setup, sys_io_uring_setup), // 425 + LINXY(__NR_io_uring_enter, sys_io_uring_enter), // 426 + LINXY(__NR_io_uring_register, sys_io_uring_register), // 427 +diff --git a/coregrind/m_syswrap/syswrap-s390x-linux.c b/coregrind/m_syswrap/syswrap-s390x-linux.c +index 5c9209859..73f9684c4 100644 +--- a/coregrind/m_syswrap/syswrap-s390x-linux.c ++++ b/coregrind/m_syswrap/syswrap-s390x-linux.c +@@ -860,6 +860,8 @@ static SyscallTableEntry syscall_table[] = { + + LINXY(__NR_statx, sys_statx), // 379 + ++ GENX_(__NR_rseq, sys_ni_syscall), // 381 ++ + LINXY(__NR_io_uring_setup, sys_io_uring_setup), // 425 + LINXY(__NR_io_uring_enter, sys_io_uring_enter), // 426 + LINXY(__NR_io_uring_register, sys_io_uring_register), // 427 +diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c b/coregrind/m_syswrap/syswrap-x86-linux.c +index 1d8f45d33..8662ff501 100644 +--- a/coregrind/m_syswrap/syswrap-x86-linux.c ++++ b/coregrind/m_syswrap/syswrap-x86-linux.c +@@ -1619,6 +1619,8 @@ static SyscallTableEntry syscall_table[] = { + /* Explicitly not supported on i386 yet. */ + GENX_(__NR_arch_prctl, sys_ni_syscall), // 384 + ++ GENX_(__NR_rseq, sys_ni_syscall), // 386 ++ + LINXY(__NR_clock_gettime64, sys_clock_gettime64), // 403 + LINX_(__NR_clock_settime64, sys_clock_settime64), // 404 + +diff --git a/include/vki/vki-scnums-arm-linux.h b/include/vki/vki-scnums-arm-linux.h +index ff560e19d..485db8b26 100644 +--- a/include/vki/vki-scnums-arm-linux.h ++++ b/include/vki/vki-scnums-arm-linux.h +@@ -432,6 +432,7 @@ + #define __NR_pkey_alloc 395 + #define __NR_pkey_free 396 + #define __NR_statx 397 ++#define __NR_rseq 398 + + + +diff --git a/include/vki/vki-scnums-arm64-linux.h b/include/vki/vki-scnums-arm64-linux.h +index 9aa3b2b5f..acdfb39c6 100644 +--- a/include/vki/vki-scnums-arm64-linux.h ++++ b/include/vki/vki-scnums-arm64-linux.h +@@ -323,9 +323,11 @@ + #define __NR_pkey_alloc 289 + #define __NR_pkey_free 290 + #define __NR_statx 291 ++#define __NR_io_pgetevents 291 ++#define __NR_rseq 293 + + #undef __NR_syscalls +-#define __NR_syscalls 292 ++#define __NR_syscalls 294 + + ///* + // * All syscalls below here should go away really, +diff --git a/include/vki/vki-scnums-ppc32-linux.h b/include/vki/vki-scnums-ppc32-linux.h +index 6987ad941..08fa77df0 100644 +--- a/include/vki/vki-scnums-ppc32-linux.h ++++ b/include/vki/vki-scnums-ppc32-linux.h +@@ -415,6 +415,7 @@ + #define __NR_pkey_alloc 384 + #define __NR_pkey_free 385 + #define __NR_pkey_mprotect 386 ++#define __NR_rseq 387 + + #endif /* __VKI_SCNUMS_PPC32_LINUX_H */ + +diff --git a/include/vki/vki-scnums-ppc64-linux.h b/include/vki/vki-scnums-ppc64-linux.h +index 6827964fd..a76fa6d32 100644 +--- a/include/vki/vki-scnums-ppc64-linux.h ++++ b/include/vki/vki-scnums-ppc64-linux.h +@@ -407,6 +407,7 @@ + #define __NR_pkey_alloc 384 + #define __NR_pkey_free 385 + #define __NR_pkey_mprotect 386 ++#define __NR_rseq 387 + + #endif /* __VKI_SCNUMS_PPC64_LINUX_H */ + +diff --git a/include/vki/vki-scnums-s390x-linux.h b/include/vki/vki-scnums-s390x-linux.h +index 6487e20c9..869c04584 100644 +--- a/include/vki/vki-scnums-s390x-linux.h ++++ b/include/vki/vki-scnums-s390x-linux.h +@@ -342,8 +342,11 @@ + #define __NR_s390_guarded_storage 378 + #define __NR_statx 379 + #define __NR_s390_sthyi 380 ++#define __NR_kexec_file_load 381 ++#define __NR_io_pgetevents 382 ++#define __NR_rseq 383 + +-#define NR_syscalls 381 ++#define NR_syscalls 384 + + /* + * There are some system calls that are not present on 64 bit, some diff --git a/SOURCES/valgrind-3.18.1-rust-v0-demangle.patch b/SOURCES/valgrind-3.18.1-rust-v0-demangle.patch new file mode 100644 index 0000000..e48a106 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-rust-v0-demangle.patch @@ -0,0 +1,137 @@ +commit 4831385c6706b377851284adc4c4545fff4c6564 +Author: Nicholas Nethercote +Date: Tue Nov 9 12:30:07 2021 +1100 + + Fix Rust v0 demangling. + + It's currently broken due to a silly test that prevents the v0 + demangling code from even running. + + The commit also adds a test, to avoid such problems in the future. + +diff --git a/coregrind/m_demangle/demangle.c b/coregrind/m_demangle/demangle.c +index 16161da2a..3fd7cb75f 100644 +--- a/coregrind/m_demangle/demangle.c ++++ b/coregrind/m_demangle/demangle.c +@@ -118,8 +118,13 @@ void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling, + } + + /* Possibly undo (1) */ ++ // - C++ mangled symbols start with "_Z" (possibly with exceptions?) ++ // - Rust "legacy" mangled symbols start with "_Z". ++ // - Rust "v0" mangled symbols start with "_R". ++ // XXX: the Java/Rust/Ada demangling here probably doesn't work. See ++ // https://bugs.kde.org/show_bug.cgi?id=445235 for details. + if (do_cxx_demangling && VG_(clo_demangle) +- && orig != NULL && orig[0] == '_' && orig[1] == 'Z') { ++ && orig != NULL && orig[0] == '_' && (orig[1] == 'Z' || orig[1] == 'R')) { + /* !!! vvv STATIC vvv !!! */ + static HChar* demangled = NULL; + /* !!! ^^^ STATIC ^^^ !!! */ +diff --git a/memcheck/tests/demangle-rust.c b/memcheck/tests/demangle-rust.c +new file mode 100644 +index 000000000..f2a458b2a +--- /dev/null ++++ b/memcheck/tests/demangle-rust.c +@@ -0,0 +1,31 @@ ++// Valgrind supports demangling Rust symbols (both the "v0" and "legacy" ++// mangling schemes), but we don't want to add a dependency on the Rust ++// compiler for a single test. So this is a C program with function names that ++// are mangled Rust symbols. In the output, they become demangled Rust names. ++// It's a hack, but a useful one. ++ ++#include ++ ++// A v0 symbol that demangles to: ::fold_with:: ++int _RINvYNtNtCs4uGc65yWeeX_12rustc_middle2ty13PredicateKindNtNtB5_4fold12TypeFoldable9fold_withNtNtNtCsgI90OQiJWEs_11rustc_infer5infer7resolve24OpportunisticVarResolverECsdozMG8X9FIu_21rustc_trait_selection(int *p) ++{ ++ return *p ? 1 : 2; ++} ++ ++// A v0 symbol that demangles to: rustc_expand::mbe::macro_parser::parse_tt ++int _RNvNtNtCsaqSe1lZGvEL_12rustc_expand3mbe12macro_parser8parse_tt(int* p) ++{ ++ return _RINvYNtNtCs4uGc65yWeeX_12rustc_middle2ty13PredicateKindNtNtB5_4fold12TypeFoldable9fold_withNtNtNtCsgI90OQiJWEs_11rustc_infer5infer7resolve24OpportunisticVarResolverECsdozMG8X9FIu_21rustc_trait_selection(p); ++} ++ ++// A legacy symbol that demangles to: core::str::lossy::Utf8Lossy::from_bytes ++int _ZN4core3str5lossy9Utf8Lossy10from_bytes17heb1677c8cb728b0bE(int* p) ++{ ++ return _RNvNtNtCsaqSe1lZGvEL_12rustc_expand3mbe12macro_parser8parse_tt(p); ++} ++ ++int main(void) ++{ ++ return _ZN4core3str5lossy9Utf8Lossy10from_bytes17heb1677c8cb728b0bE(malloc(sizeof(int))); ++} ++ +diff --git a/memcheck/tests/demangle-rust.stderr.exp b/memcheck/tests/demangle-rust.stderr.exp +new file mode 100644 +index 000000000..f04bb625b +--- /dev/null ++++ b/memcheck/tests/demangle-rust.stderr.exp +@@ -0,0 +1,6 @@ ++Conditional jump or move depends on uninitialised value(s) ++ at 0x........: ::fold_with:: (demangle-rust.c:12) ++ by 0x........: rustc_expand::mbe::macro_parser::parse_tt (demangle-rust.c:18) ++ by 0x........: core::str::lossy::Utf8Lossy::from_bytes (demangle-rust.c:24) ++ by 0x........: main (demangle-rust.c:29) ++ +diff --git a/memcheck/tests/demangle-rust.vgtest b/memcheck/tests/demangle-rust.vgtest +new file mode 100644 +index 000000000..d726c6b2e +--- /dev/null ++++ b/memcheck/tests/demangle-rust.vgtest +@@ -0,0 +1,2 @@ ++prog: demangle-rust ++vgopts: -q + +commit c1bfa115f985633722f25922d2996c231e8c9d8d +Author: Mark Wielaard +Date: Wed Nov 10 09:02:36 2021 +0100 + + Add demangle-rust.vgtest demangle-rust.stderr.exp to EXTRA_DIST + +diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am +index 4d0476e2d..7837d87c7 100644 +--- a/memcheck/tests/Makefile.am ++++ b/memcheck/tests/Makefile.am +@@ -281,6 +281,7 @@ EXTRA_DIST = \ + realloc3.stderr.exp realloc3.vgtest \ + recursive-merge.stderr.exp recursive-merge.vgtest \ + resvn_stack.stderr.exp resvn_stack.vgtest \ ++ demangle-rust.vgtest demangle-rust.stderr.exp \ + sbfragment.stdout.exp sbfragment.stderr.exp sbfragment.vgtest \ + sem.stderr.exp sem.vgtest \ + sendmsg.stderr.exp sendmsg.stderr.exp-solaris sendmsg.vgtest \ + +commit d151907e5d8ff393f4fef126c8ae445ea8813661 +Author: Mark Wielaard +Date: Thu Nov 11 18:02:09 2021 +0100 + + Add demangle-rust to check_PROGRAMS + + The demangle-rust.vgtest would fail because the demangle-rust binary + wasn't build by default. Add it to check_PROGRAMS and define + demangle_rust_SOURCES to make sure it is always build. + +diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am +index 7837d87c7..449710020 100644 +--- a/memcheck/tests/Makefile.am ++++ b/memcheck/tests/Makefile.am +@@ -392,6 +392,7 @@ check_PROGRAMS = \ + custom_alloc \ + custom-overlap \ + demangle \ ++ demangle-rust \ + big_debuginfo_symbol \ + deep-backtrace \ + describe-block \ +@@ -505,6 +506,7 @@ endif + leak_cpp_interior_SOURCES = leak_cpp_interior.cpp + + demangle_SOURCES = demangle.cpp ++demangle_rust_SOURCES = demangle-rust.c + + # Suppress various gcc warnings which are correct, but for things + # we are actually testing for at runtime. diff --git a/SOURCES/valgrind-3.18.1-s390x-EXRL.patch b/SOURCES/valgrind-3.18.1-s390x-EXRL.patch new file mode 100644 index 0000000..6927cc3 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-s390x-EXRL.patch @@ -0,0 +1,549 @@ +commit b77dbefe72e4a5c7bcf1576a02c909010bd56991 +Author: Andreas Arnez +Date: Fri Oct 22 19:55:12 2021 +0200 + + Bug 444242 - s390x: Sign-extend "relative long" offset in EXRL + + In s390_irgen_EXRL, the offset is zero-extended instead of sign-extended, + typically causing Valgrind to crash when a negative offset occurs. + + Fix this with a new helper function that calculates a "relative long" + address from a 32-bit offset. Replace other calculations of "relative + long" addresses by invocations of this function as well. And for + consistency, do the same with "relative" (short) addresses. + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 72222ab04..fffc563d4 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -399,6 +399,22 @@ mkF64i(ULong value) + return IRExpr_Const(IRConst_F64i(value)); + } + ++/* Return the 64-bit address with the given 32-bit "relative long" offset from ++ the current guest instruction being translated. */ ++static __inline__ Addr64 ++addr_rel_long(UInt offset) ++{ ++ return guest_IA_curr_instr + ((Addr64)(Long)(Int)offset << 1); ++} ++ ++/* Return the 64-bit address with the given 16-bit "relative" offset from the ++ current guest instruction being translated. */ ++static __inline__ Addr64 ++addr_relative(UShort offset) ++{ ++ return guest_IA_curr_instr + ((Addr64)(Long)(Short)offset << 1); ++} ++ + /* Little helper function for my sanity. ITE = if-then-else */ + static IRExpr * + mkite(IRExpr *condition, IRExpr *iftrue, IRExpr *iffalse) +@@ -5516,7 +5532,7 @@ static const HChar * + s390_irgen_BRAS(UChar r1, UShort i2) + { + put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + 4ULL)); +- call_function_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ call_function_and_chase(addr_relative(i2)); + + return "bras"; + } +@@ -5525,7 +5541,7 @@ static const HChar * + s390_irgen_BRASL(UChar r1, UInt i2) + { + put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + 6ULL)); +- call_function_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)); ++ call_function_and_chase(addr_rel_long(i2)); + + return "brasl"; + } +@@ -5538,12 +5554,11 @@ s390_irgen_BRC(UChar r1, UShort i2) + if (r1 == 0) { + } else { + if (r1 == 15) { +- always_goto_and_chase( +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ always_goto_and_chase(addr_relative(i2)); + } else { + assign(cond, s390_call_calculate_cond(r1)); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + } + } +@@ -5561,11 +5576,11 @@ s390_irgen_BRCL(UChar r1, UInt i2) + if (r1 == 0) { + } else { + if (r1 == 15) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)); ++ always_goto_and_chase(addr_rel_long(i2)); + } else { + assign(cond, s390_call_calculate_cond(r1)); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)); ++ addr_rel_long(i2)); + } + } + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) +@@ -5579,7 +5594,7 @@ s390_irgen_BRCT(UChar r1, UShort i2) + { + put_gpr_w1(r1, binop(Iop_Sub32, get_gpr_w1(r1), mkU32(1))); + if_condition_goto(binop(Iop_CmpNE32, get_gpr_w1(r1), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brct"; + } +@@ -5589,7 +5604,7 @@ s390_irgen_BRCTH(UChar r1, UInt i2) + { + put_gpr_w0(r1, binop(Iop_Sub32, get_gpr_w0(r1), mkU32(1))); + if_condition_goto(binop(Iop_CmpNE32, get_gpr_w0(r1), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brcth"; + } +@@ -5599,7 +5614,7 @@ s390_irgen_BRCTG(UChar r1, UShort i2) + { + put_gpr_dw0(r1, binop(Iop_Sub64, get_gpr_dw0(r1), mkU64(1))); + if_condition_goto(binop(Iop_CmpNE64, get_gpr_dw0(r1), mkU64(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brctg"; + } +@@ -5612,7 +5627,7 @@ s390_irgen_BRXH(UChar r1, UChar r3, UShort i2) + assign(value, get_gpr_w1(r3 | 1)); + put_gpr_w1(r1, binop(Iop_Add32, get_gpr_w1(r1), get_gpr_w1(r3))); + if_condition_goto(binop(Iop_CmpLT32S, mkexpr(value), get_gpr_w1(r1)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brxh"; + } +@@ -5625,7 +5640,7 @@ s390_irgen_BRXHG(UChar r1, UChar r3, UShort i2) + assign(value, get_gpr_dw0(r3 | 1)); + put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), get_gpr_dw0(r3))); + if_condition_goto(binop(Iop_CmpLT64S, mkexpr(value), get_gpr_dw0(r1)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brxhg"; + } +@@ -5638,7 +5653,7 @@ s390_irgen_BRXLE(UChar r1, UChar r3, UShort i2) + assign(value, get_gpr_w1(r3 | 1)); + put_gpr_w1(r1, binop(Iop_Add32, get_gpr_w1(r1), get_gpr_w1(r3))); + if_condition_goto(binop(Iop_CmpLE32S, get_gpr_w1(r1), mkexpr(value)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brxle"; + } +@@ -5651,7 +5666,7 @@ s390_irgen_BRXLG(UChar r1, UChar r3, UShort i2) + assign(value, get_gpr_dw0(r3 | 1)); + put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), get_gpr_dw0(r3))); + if_condition_goto(binop(Iop_CmpLE64S, get_gpr_dw0(r1), mkexpr(value)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brxlg"; + } +@@ -5782,8 +5797,7 @@ s390_irgen_CRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I32); + + assign(op1, get_gpr_w1(r1)); +- assign(op2, load(Ity_I32, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int) +- i2 << 1)))); ++ assign(op2, load(Ity_I32, mkU64(addr_rel_long(i2)))); + s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2); + + return "crl"; +@@ -5796,8 +5810,7 @@ s390_irgen_CGRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I64); + + assign(op1, get_gpr_dw0(r1)); +- assign(op2, load(Ity_I64, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int) +- i2 << 1)))); ++ assign(op2, load(Ity_I64, mkU64(addr_rel_long(i2)))); + s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2); + + return "cgrl"; +@@ -5810,8 +5823,7 @@ s390_irgen_CGFRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I64); + + assign(op1, get_gpr_dw0(r1)); +- assign(op2, unop(Iop_32Sto64, load(Ity_I32, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ assign(op2, unop(Iop_32Sto64, load(Ity_I32, mkU64(addr_rel_long(i2))))); + s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2); + + return "cgfrl"; +@@ -5875,15 +5887,14 @@ s390_irgen_CRJ(UChar r1, UChar r2, UShort i4, UChar m3) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase( +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_w1(r1)); + assign(op2, get_gpr_w1(r2)); + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_SIGNED_COMPARE, + op1, op2)); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -5901,15 +5912,14 @@ s390_irgen_CGRJ(UChar r1, UChar r2, UShort i4, UChar m3) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase( +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_dw0(r1)); + assign(op2, get_gpr_dw0(r2)); + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_SIGNED_COMPARE, + op1, op2)); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -5975,14 +5985,14 @@ s390_irgen_CIJ(UChar r1, UChar m3, UShort i4, UChar i2) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_w1(r1)); + op2 = (Int)(Char)i2; + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_SIGNED_COMPARE, op1, + mktemp(Ity_I32, mkU32((UInt)op2)))); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -6000,14 +6010,14 @@ s390_irgen_CGIJ(UChar r1, UChar m3, UShort i4, UChar i2) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_dw0(r1)); + op2 = (Long)(Char)i2; + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_SIGNED_COMPARE, op1, + mktemp(Ity_I64, mkU64((ULong)op2)))); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -6131,8 +6141,7 @@ s390_irgen_CHRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I32); + + assign(op1, get_gpr_w1(r1)); +- assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkU64(addr_rel_long(i2))))); + s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2); + + return "chrl"; +@@ -6145,8 +6154,7 @@ s390_irgen_CGHRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I64); + + assign(op1, get_gpr_dw0(r1)); +- assign(op2, unop(Iop_16Sto64, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ assign(op2, unop(Iop_16Sto64, load(Ity_I16, mkU64(addr_rel_long(i2))))); + s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2); + + return "cghrl"; +@@ -6401,8 +6409,7 @@ s390_irgen_CLRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I32); + + assign(op1, get_gpr_w1(r1)); +- assign(op2, load(Ity_I32, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int) +- i2 << 1)))); ++ assign(op2, load(Ity_I32, mkU64(addr_rel_long(i2)))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2); + + return "clrl"; +@@ -6415,8 +6422,7 @@ s390_irgen_CLGRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I64); + + assign(op1, get_gpr_dw0(r1)); +- assign(op2, load(Ity_I64, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int) +- i2 << 1)))); ++ assign(op2, load(Ity_I64, mkU64(addr_rel_long(i2)))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2); + + return "clgrl"; +@@ -6429,8 +6435,7 @@ s390_irgen_CLGFRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I64); + + assign(op1, get_gpr_dw0(r1)); +- assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkU64(addr_rel_long(i2))))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2); + + return "clgfrl"; +@@ -6443,8 +6448,7 @@ s390_irgen_CLHRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I32); + + assign(op1, get_gpr_w1(r1)); +- assign(op2, unop(Iop_16Uto32, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ assign(op2, unop(Iop_16Uto32, load(Ity_I16, mkU64(addr_rel_long(i2))))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2); + + return "clhrl"; +@@ -6457,8 +6461,7 @@ s390_irgen_CLGHRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I64); + + assign(op1, get_gpr_dw0(r1)); +- assign(op2, unop(Iop_16Uto64, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ assign(op2, unop(Iop_16Uto64, load(Ity_I16, mkU64(addr_rel_long(i2))))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2); + + return "clghrl"; +@@ -6730,14 +6733,14 @@ s390_irgen_CLRJ(UChar r1, UChar r2, UShort i4, UChar m3) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_w1(r1)); + assign(op2, get_gpr_w1(r2)); + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_UNSIGNED_COMPARE, + op1, op2)); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -6755,14 +6758,14 @@ s390_irgen_CLGRJ(UChar r1, UChar r2, UShort i4, UChar m3) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_dw0(r1)); + assign(op2, get_gpr_dw0(r2)); + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_UNSIGNED_COMPARE, + op1, op2)); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -6828,14 +6831,14 @@ s390_irgen_CLIJ(UChar r1, UChar m3, UShort i4, UChar i2) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_w1(r1)); + op2 = (UInt)i2; + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_UNSIGNED_COMPARE, op1, + mktemp(Ity_I32, mkU32(op2)))); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -6853,14 +6856,14 @@ s390_irgen_CLGIJ(UChar r1, UChar m3, UShort i4, UChar i2) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_dw0(r1)); + op2 = (ULong)i2; + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_UNSIGNED_COMPARE, op1, + mktemp(Ity_I64, mkU64(op2)))); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -7539,8 +7542,7 @@ s390_irgen_LGFI(UChar r1, UInt i2) + static const HChar * + s390_irgen_LRL(UChar r1, UInt i2) + { +- put_gpr_w1(r1, load(Ity_I32, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int) +- i2 << 1)))); ++ put_gpr_w1(r1, load(Ity_I32, mkU64(addr_rel_long(i2)))); + + return "lrl"; + } +@@ -7548,8 +7550,7 @@ s390_irgen_LRL(UChar r1, UInt i2) + static const HChar * + s390_irgen_LGRL(UChar r1, UInt i2) + { +- put_gpr_dw0(r1, load(Ity_I64, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int) +- i2 << 1)))); ++ put_gpr_dw0(r1, load(Ity_I64, mkU64(addr_rel_long(i2)))); + + return "lgrl"; + } +@@ -7557,8 +7558,7 @@ s390_irgen_LGRL(UChar r1, UInt i2) + static const HChar * + s390_irgen_LGFRL(UChar r1, UInt i2) + { +- put_gpr_dw0(r1, unop(Iop_32Sto64, load(Ity_I32, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ put_gpr_dw0(r1, unop(Iop_32Sto64, load(Ity_I32, mkU64(addr_rel_long(i2))))); + + return "lgfrl"; + } +@@ -7598,7 +7598,7 @@ s390_irgen_LAEY(UChar r1, IRTemp op2addr) + static const HChar * + s390_irgen_LARL(UChar r1, UInt i2) + { +- put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1))); ++ put_gpr_dw0(r1, mkU64(addr_rel_long(i2))); + + return "larl"; + } +@@ -8038,8 +8038,7 @@ s390_irgen_LGHI(UChar r1, UShort i2) + static const HChar * + s390_irgen_LHRL(UChar r1, UInt i2) + { +- put_gpr_w1(r1, unop(Iop_16Sto32, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ put_gpr_w1(r1, unop(Iop_16Sto32, load(Ity_I16, mkU64(addr_rel_long(i2))))); + + return "lhrl"; + } +@@ -8047,8 +8046,7 @@ s390_irgen_LHRL(UChar r1, UInt i2) + static const HChar * + s390_irgen_LGHRL(UChar r1, UInt i2) + { +- put_gpr_dw0(r1, unop(Iop_16Sto64, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ put_gpr_dw0(r1, unop(Iop_16Sto64, load(Ity_I16, mkU64(addr_rel_long(i2))))); + + return "lghrl"; + } +@@ -8088,8 +8086,7 @@ s390_irgen_LLGF(UChar r1, IRTemp op2addr) + static const HChar * + s390_irgen_LLGFRL(UChar r1, UInt i2) + { +- put_gpr_dw0(r1, unop(Iop_32Uto64, load(Ity_I32, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ put_gpr_dw0(r1, unop(Iop_32Uto64, load(Ity_I32, mkU64(addr_rel_long(i2))))); + + return "llgfrl"; + } +@@ -8169,8 +8166,7 @@ s390_irgen_LLGH(UChar r1, IRTemp op2addr) + static const HChar * + s390_irgen_LLHRL(UChar r1, UInt i2) + { +- put_gpr_w1(r1, unop(Iop_16Uto32, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ put_gpr_w1(r1, unop(Iop_16Uto32, load(Ity_I16, mkU64(addr_rel_long(i2))))); + + return "llhrl"; + } +@@ -8178,8 +8174,7 @@ s390_irgen_LLHRL(UChar r1, UInt i2) + static const HChar * + s390_irgen_LLGHRL(UChar r1, UInt i2) + { +- put_gpr_dw0(r1, unop(Iop_16Uto64, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ put_gpr_dw0(r1, unop(Iop_16Uto64, load(Ity_I16, mkU64(addr_rel_long(i2))))); + + return "llghrl"; + } +@@ -10064,8 +10059,7 @@ s390_irgen_STG(UChar r1, IRTemp op2addr) + static const HChar * + s390_irgen_STRL(UChar r1, UInt i2) + { +- store(mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)), +- get_gpr_w1(r1)); ++ store(mkU64(addr_rel_long(i2)), get_gpr_w1(r1)); + + return "strl"; + } +@@ -10073,8 +10067,7 @@ s390_irgen_STRL(UChar r1, UInt i2) + static const HChar * + s390_irgen_STGRL(UChar r1, UInt i2) + { +- store(mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)), +- get_gpr_dw0(r1)); ++ store(mkU64(addr_rel_long(i2)), get_gpr_dw0(r1)); + + return "stgrl"; + } +@@ -10203,8 +10196,7 @@ s390_irgen_STHY(UChar r1, IRTemp op2addr) + static const HChar * + s390_irgen_STHRL(UChar r1, UInt i2) + { +- store(mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)), +- get_gpr_hw3(r1)); ++ store(mkU64(addr_rel_long(i2)), get_gpr_hw3(r1)); + + return "sthrl"; + } +@@ -13282,7 +13274,7 @@ static const HChar * + s390_irgen_EXRL(UChar r1, UInt offset) + { + IRTemp addr = newTemp(Ity_I64); +- Addr64 bytes_addr = guest_IA_curr_instr + offset * 2UL; ++ Addr64 bytes_addr = addr_rel_long(offset); + UChar *bytes = (UChar *)(HWord)bytes_addr; + /* we might save one round trip because we know the target */ + if (!last_execute_target) +diff --git a/none/tests/s390x/exrl.c b/none/tests/s390x/exrl.c +index 2c99602d8..e669e484f 100644 +--- a/none/tests/s390x/exrl.c ++++ b/none/tests/s390x/exrl.c +@@ -54,6 +54,17 @@ int main(void) + printf("|\n"); + printf("\n"); + ++ printf("------- EXRL with negative offset\n"); ++ asm volatile( "j 2f\n\t" ++ "1:\n\t" ++ "mvc 2(1,%0),0(%0)\n\t" ++ "2:\n\t" ++ "lghi 1,8\n\t" ++ ".insn ril,0xc60000000000,1,1b\n\t" // exrl 1, 1b ++ : : "a" (target) ++ : "1", "2", "3", "4"); ++ printf(" target = |%s|\n", target); ++ + return 0; + } + +diff --git a/none/tests/s390x/exrl.stdout.exp b/none/tests/s390x/exrl.stdout.exp +index 520919e92..30dcde829 100644 +--- a/none/tests/s390x/exrl.stdout.exp ++++ b/none/tests/s390x/exrl.stdout.exp +@@ -11,3 +11,5 @@ after: target = |0123456789aXXXXX| + ------- EXRL to OR in the syscall number (writes out target) + target = |0123456789aXXXXX| + ++------- EXRL with negative offset ++ target = |01010101010XXXXX| diff --git a/SOURCES/valgrind-3.18.1-s390x-vdso.patch b/SOURCES/valgrind-3.18.1-s390x-vdso.patch new file mode 100644 index 0000000..501fc56 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-s390x-vdso.patch @@ -0,0 +1,28 @@ +commit 99bf5dabf7865aaea7f2192373633e026c6fb16e +Author: Andreas Arnez +Date: Thu Dec 9 15:27:41 2021 +0100 + + Bug 444481 - Don't unmap the vDSO on s390x + + Newer Linux kernels on s390x may use the vDSO as a "trampoline" for + syscall restart. This means that the vDSO is no longer optional, and + unmapping it may lead to a segmentation fault when a system call restart + is performed. + + So far Valgrind has been unmapping the vDSO on s390x. Just don't do this + anymore. + +diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c +index 7d02d5567..95508ad1e 100644 +--- a/coregrind/m_initimg/initimg-linux.c ++++ b/coregrind/m_initimg/initimg-linux.c +@@ -892,7 +892,8 @@ Addr setup_client_stack( void* init_sp, + # if !defined(VGP_ppc32_linux) && !defined(VGP_ppc64be_linux) \ + && !defined(VGP_ppc64le_linux) \ + && !defined(VGP_mips32_linux) && !defined(VGP_mips64_linux) \ +- && !defined(VGP_nanomips_linux) ++ && !defined(VGP_nanomips_linux) \ ++ && !defined(VGP_s390x_linux) + case AT_SYSINFO_EHDR: { + /* Trash this, because we don't reproduce it */ + const NSegment* ehdrseg = VG_(am_find_nsegment)((Addr)auxv->u.a_ptr); diff --git a/SPECS/valgrind.spec b/SPECS/valgrind.spec index fb30bc5..4838ada 100644 --- a/SPECS/valgrind.spec +++ b/SPECS/valgrind.spec @@ -3,7 +3,7 @@ Summary: Tool for finding memory management bugs in programs Name: %{?scl_prefix}valgrind Version: 3.18.1 -Release: 1%{?dist} +Release: 6%{?dist} Epoch: 1 License: GPLv2+ URL: http://www.valgrind.org/ @@ -85,6 +85,55 @@ Patch4: valgrind-3.16.0-some-stack-protector.patch # Add some -Wl,z,now. Patch5: valgrind-3.16.0-some-Wl-z-now.patch +# KDE#444495 dhat/tests/copy fails on s390x +Patch6: valgrind-3.18.1-dhat-tests-copy.patch + +# KDE#444242 s390x: Sign-extend "relative long" offset in EXRL +Patch7: valgrind-3.18.1-s390x-EXRL.patch + +# KDE#444571 - PPC, fix lxsibzx and lxsihzx +Patch8: valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch + +# commit ae8c6de01417023e78763de145b1c0e6ddd87277 +# commit 3950c5d661ee09526cddcf24daf5fc22bc83f70c +# Fix for the prefixed stq instruction in PC relative mode. +# KDE#444836 pstq instruction for R=1 is not storing to the correct address +Patch9: valgrind-3.18.1-ppc-pstq.patch +Patch10: valgrind-3.18.1-ppc-pstq-tests.patch + +# commit 64ab89162906d5b9e2de6c3afe476fec861ef7ec +# gdbserver_tests: Filter out glibc hwcaps libc.so +Patch11: valgrind-3.18.1-gdbserver_tests-hwcap.patch + +# KDE#445184 Rust v0 symbol demangling is broken +Patch12: valgrind-3.18.1-rust-v0-demangle.patch + +# KDE#445354 arm64 backend: incorrect code emitted for doubleword CAS +Patch13: valgrind-3.18.1-arm64-doubleword-cas.patch + +# KDE#444399 arm64: unhandled instruction LD{,A}XP and ST{,L}XP +Patch14: valgrind-3.18.1-arm64-ldaxp-stlxp.patch + +# KDE#445415 arm64 front end: alignment checks missing for atomic instructions. +Patch15: valgrind-3.18.1-arm64-atomic-align.patch + +# commit 595341b150312d2407bd43304449bf39ec3e1fa8 +# amd64 front end: add more spec rules +Patch16: valgrind-3.18.1-amd64-more-spec-rules.patch + +# KDE#445504 Using C++ condition_variable results in bogus +# "mutex is locked simultaneously by two threads" warning +Patch17: valgrind-3.18.1-condvar.patch + +# KDE#445668 Inline stack frame generation is broken for Rust binaries +Patch18: valgrind-3.18.1-demangle-namespace.patch + +# KDE#405377 Handle new Linux kernel feature: Restartable Sequences ("rseq") +Patch19: valgrind-3.18.1-rseq-enosys.patch + +# KDE#444481 gdb_server test failures on s390x +Patch20: valgrind-3.18.1-s390x-vdso.patch + BuildRequires: make BuildRequires: glibc-devel @@ -222,6 +271,22 @@ Valgrind User Manual for details. %patch5 -p1 %endif +%patch6 -p1 +%patch7 -p1 +%patch8 -p1 +%patch9 -p1 +%patch10 -p1 +%patch11 -p1 +%patch12 -p1 +%patch13 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 +%patch20 -p1 + %build # LTO triggers undefined symbols in valgrind. Valgrind has a --enable-lto # configure time option, but that doesn't seem to help. @@ -450,6 +515,25 @@ fi %endif %changelog +* Tue Dec 14 2021 Mark Wielaard - 3.18.1-6 +- Add valgrind-3.18.1-rseq-enosys.patch +- Add valgrind-3.18.1-s390x-vdso.patch + +* Wed Nov 24 2021 Mark Wielaard - 3.18.1-5 +- Add valgrind-3.18.1-dhat-tests-copy.patch +- Add valgrind-3.18.1-s390x-EXRL.patch +- Add valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch +- Add valgrind-3.18.1-ppc-pstq.patch +- Add valgrind-3.18.1-ppc-pstq-tests.patch +- Add valgrind-3.18.1-gdbserver_tests-hwcap.patch +- Add valgrind-3.18.1-rust-v0-demangle.patch +- Add valgrind-3.18.1-arm64-doubleword-cas.patch +- Add valgrind-3.18.1-arm64-ldaxp-stlxp.patch +- Add valgrind-3.18.1-arm64-atomic-align.patch +- Add valgrind-3.18.1-amd64-more-spec-rules.patch +- Add valgrind-3.18.1-condvar.patch +- Add valgrind-3.18.1-demangle-namespace.patch + * Thu Oct 21 2021 Mark Wielaard - 3.18.1-1 - Update to upstream 3.18.1 final